From c41afe41f0fb13b80ca79e987a21ea411e1545a7 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Thu, 21 Jun 2018 20:45:54 -0400 Subject: [PATCH] initial commit --- dnn/lpcnet.py | 23 +++++++++++ dnn/mdense.py | 94 +++++++++++++++++++++++++++++++++++++++++++++ dnn/train_lpcnet.py | 31 +++++++++++++++ dnn/ulaw.py | 14 +++++++ 4 files changed, 162 insertions(+) create mode 100644 dnn/lpcnet.py create mode 100644 dnn/mdense.py create mode 100755 dnn/train_lpcnet.py create mode 100644 dnn/ulaw.py diff --git a/dnn/lpcnet.py b/dnn/lpcnet.py new file mode 100644 index 00000000..94fac9d1 --- /dev/null +++ b/dnn/lpcnet.py @@ -0,0 +1,23 @@ +#!/usr/bin/python3 + +import math +from keras.models import Model +from keras.layers import Input, LSTM, CuDNNGRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Bidirectional, MaxPooling1D, Activation +from keras import backend as K +from mdense import MDense +import numpy as np +import h5py +import sys + +rnn_units=256 +pcm_bits = 8 +pcm_levels = 1+2**pcm_bits + +def new_wavernn_model(): + pcm = Input(shape=(None, 1)) + rnn = CuDNNGRU(rnn_units, return_sequences=True) + md = MDense(pcm_levels, activation='softmax') + ulaw_prob = md(rnn(pcm)) + + model = Model(pcm, ulaw_prob) + return model diff --git a/dnn/mdense.py b/dnn/mdense.py new file mode 100644 index 00000000..c262f8bf --- /dev/null +++ b/dnn/mdense.py @@ -0,0 +1,94 @@ +from keras import backend as K +from keras.engine.topology import Layer +from keras.layers import activations, initializers, regularizers, constraints, InputSpec +import numpy as np +import math + +class MDense(Layer): + + def __init__(self, outputs, + channels=2, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs): + if 'input_shape' not in kwargs and 'input_dim' in kwargs: + kwargs['input_shape'] = (kwargs.pop('input_dim'),) + super(MDense, self).__init__(**kwargs) + self.units = outputs + self.channels = channels + self.activation = activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.input_spec = InputSpec(min_ndim=2) + self.supports_masking = True + + def build(self, input_shape): + assert len(input_shape) >= 2 + input_dim = input_shape[-1] + + self.kernel = self.add_weight(shape=(self.units, input_dim, self.channels), + initializer=self.kernel_initializer, + name='kernel', + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint) + if self.use_bias: + self.bias = self.add_weight(shape=(self.units, self.channels), + initializer=self.bias_initializer, + name='bias', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + else: + self.bias = None + self.factor = self.add_weight(shape=(self.units, self.channels), + initializer='ones', + name='factor', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) + self.built = True + + def call(self, inputs): + output = K.dot(inputs, self.kernel) + if self.use_bias: + output = output + self.bias + output = K.tanh(output) * self.factor + output = K.sum(output, axis=-1) + if self.activation is not None: + output = self.activation(output) + return output + + def compute_output_shape(self, input_shape): + assert input_shape and len(input_shape) >= 2 + assert input_shape[-1] + output_shape = list(input_shape) + output_shape[-1] = self.units + return tuple(output_shape) + + def get_config(self): + config = { + 'units': self.units, + 'activation': activations.serialize(self.activation), + 'use_bias': self.use_bias, + 'kernel_initializer': initializers.serialize(self.kernel_initializer), + 'bias_initializer': initializers.serialize(self.bias_initializer), + 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), + 'bias_regularizer': regularizers.serialize(self.bias_regularizer), + 'activity_regularizer': regularizers.serialize(self.activity_regularizer), + 'kernel_constraint': constraints.serialize(self.kernel_constraint), + 'bias_constraint': constraints.serialize(self.bias_constraint) + } + base_config = super(MDense, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/dnn/train_lpcnet.py b/dnn/train_lpcnet.py new file mode 100755 index 00000000..9cbe4ebe --- /dev/null +++ b/dnn/train_lpcnet.py @@ -0,0 +1,31 @@ +#!/usr/bin/python3 + +import lpcnet +import sys +import numpy as np +from keras.optimizers import Adam +from ulaw import ulaw2lin, lin2ulaw + +nb_epochs = 10 +batch_size = 32 + +model = lpcnet.new_wavernn_model() +model.compile(optimizer=Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) +model.summary() + +pcmfile = sys.argv[1] +chunk_size = int(sys.argv[2]) + +data = np.fromfile(pcmfile, dtype='int16') +#data = data[:100000000] +data = data/32768 +nb_frames = (len(data)-1)//chunk_size + +in_data = data[:nb_frames*chunk_size] +#out_data = data[1:1+nb_frames*chunk_size]//256 + 128 +out_data = lin2ulaw(data[1:1+nb_frames*chunk_size]) + 128 + +in_data = np.reshape(in_data, (nb_frames, chunk_size, 1)) +out_data = np.reshape(out_data, (nb_frames, chunk_size, 1)) + +model.fit(in_data, out_data, batch_size=batch_size, epochs=nb_epochs, validation_split=0.2) diff --git a/dnn/ulaw.py b/dnn/ulaw.py new file mode 100644 index 00000000..f8bae52d --- /dev/null +++ b/dnn/ulaw.py @@ -0,0 +1,14 @@ + +import numpy as np +import math + +def ulaw2lin(u): + return (math.exp(u/128*math.log(256))-1)/255 + + +def lin2ulaw(x): + s = np.sign(x) + x = np.abs(x) + u = (s*(128*np.log(1+255*x)/math.log(256))) + u = np.round(u) + return u.astype('int16')