diff --git a/dnn/causalconv.py b/dnn/causalconv.py deleted file mode 100644 index 4c2e6488..00000000 --- a/dnn/causalconv.py +++ /dev/null @@ -1,52 +0,0 @@ -from keras import backend as K -from keras.engine.topology import Layer -from keras.layers import activations, initializers, regularizers, constraints, InputSpec, Conv1D -import numpy as np - -class CausalConv(Conv1D): - - def __init__(self, filters, - kernel_size, - dilation_rate=1, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - return_memory=False, - **kwargs): - - super(CausalConv, self).__init__( - filters=filters, - kernel_size=kernel_size, - strides=1, - padding='valid', - data_format='channels_last', - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - **kwargs) - self.mem_size = dilation_rate*(kernel_size-1) - self.return_memory = return_memory - - def call(self, inputs, memory=None): - if memory is None: - mem = K.zeros((K.shape(inputs)[0], self.mem_size, K.shape(inputs)[-1])) - else: - mem = K.variable(K.cast_to_floatx(memory)) - inputs = K.concatenate([mem, inputs], axis=1) - ret = super(CausalConv, self).call(inputs) - if self.return_memory: - ret = ret, inputs[:, :self.mem_size, :] - return ret diff --git a/dnn/dump_lpcnet.py b/dnn/dump_lpcnet.py deleted file mode 100755 index 52e70f42..00000000 --- a/dnn/dump_lpcnet.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/python3 -'''Copyright (c) 2017-2018 Mozilla - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -''' - -import lpcnet -import sys -import numpy as np -from keras.optimizers import Adam -from keras.callbacks import ModelCheckpoint -from keras.layers import Layer, GRU, CuDNNGRU, Dense, Conv1D, Embedding -from ulaw import ulaw2lin, lin2ulaw -from mdense import MDense -import keras.backend as K -import h5py -import re - -max_rnn_neurons = 1 -max_conv_inputs = 1 -max_mdense_tmp = 1 - -def printVector(f, vector, name, dtype='float'): - v = np.reshape(vector, (-1)); - #print('static const float ', name, '[', len(v), '] = \n', file=f) - f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v))) - for i in range(0, len(v)): - f.write('{}'.format(v[i])) - if (i!=len(v)-1): - f.write(',') - else: - break; - if (i%8==7): - f.write("\n ") - else: - f.write(" ") - #print(v, file=f) - f.write('\n};\n\n') - return; - -def printSparseVector(f, A, name): - N = A.shape[0] - W = np.zeros((0,)) - diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])]) - A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N])) - A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N])) - A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:])) - printVector(f, diag, name + '_diag') - idx = np.zeros((0,), dtype='int') - for i in range(3*N//16): - pos = idx.shape[0] - idx = np.append(idx, -1) - nb_nonzero = 0 - for j in range(N): - if np.sum(np.abs(A[j, i*16:(i+1)*16])) > 1e-10: - nb_nonzero = nb_nonzero + 1 - idx = np.append(idx, j) - W = np.concatenate([W, A[j, i*16:(i+1)*16]]) - idx[pos] = nb_nonzero - printVector(f, W, name) - #idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16) - printVector(f, idx, name + '_idx', dtype='int') - return; - -def dump_layer_ignore(self, f, hf): - print("ignoring layer " + self.name + " of type " + self.__class__.__name__) - return False -Layer.dump_layer = dump_layer_ignore - -def dump_sparse_gru(self, f, hf): - global max_rnn_neurons - name = 'sparse_' + self.name - print("printing layer " + name + " of type sparse " + self.__class__.__name__) - weights = self.get_weights() - printSparseVector(f, weights[1], name + '_recurrent_weights') - printVector(f, weights[-1], name + '_bias') - if hasattr(self, 'activation'): - activation = self.activation.__name__.upper() - else: - activation = 'TANH' - if hasattr(self, 'reset_after') and not self.reset_after: - reset_after = 0 - else: - reset_after = 1 - neurons = weights[0].shape[1]//3 - max_rnn_neurons = max(max_rnn_neurons, neurons) - f.write('const SparseGRULayer {} = {{\n {}_bias,\n {}_recurrent_weights_diag,\n {}_recurrent_weights,\n {}_recurrent_weights_idx,\n {}, ACTIVATION_{}, {}\n}};\n\n' - .format(name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after)) - hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) - hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) - hf.write('extern const SparseGRULayer {};\n\n'.format(name)); - return True - -def dump_gru_layer(self, f, hf): - global max_rnn_neurons - name = self.name - print("printing layer " + name + " of type " + self.__class__.__name__) - weights = self.get_weights() - printVector(f, weights[0], name + '_weights') - printVector(f, weights[1], name + '_recurrent_weights') - printVector(f, weights[-1], name + '_bias') - if hasattr(self, 'activation'): - activation = self.activation.__name__.upper() - else: - activation = 'TANH' - if hasattr(self, 'reset_after') and not self.reset_after: - reset_after = 0 - else: - reset_after = 1 - neurons = weights[0].shape[1]//3 - max_rnn_neurons = max(max_rnn_neurons, neurons) - f.write('const GRULayer {} = {{\n {}_bias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n' - .format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after)) - hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) - hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) - hf.write('extern const GRULayer {};\n\n'.format(name)); - return True -CuDNNGRU.dump_layer = dump_gru_layer -GRU.dump_layer = dump_gru_layer - -def dump_dense_layer_impl(name, weights, bias, activation, f, hf): - printVector(f, weights, name + '_weights') - printVector(f, bias, name + '_bias') - f.write('const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' - .format(name, name, name, weights.shape[0], weights.shape[1], activation)) - hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1])) - hf.write('extern const DenseLayer {};\n\n'.format(name)); - -def dump_dense_layer(self, f, hf): - name = self.name - print("printing layer " + name + " of type " + self.__class__.__name__) - weights = self.get_weights() - activation = self.activation.__name__.upper() - dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf) - return False - -Dense.dump_layer = dump_dense_layer - -def dump_mdense_layer(self, f, hf): - global max_mdense_tmp - name = self.name - print("printing layer " + name + " of type " + self.__class__.__name__) - weights = self.get_weights() - printVector(f, np.transpose(weights[0], (1, 2, 0)), name + '_weights') - printVector(f, np.transpose(weights[1], (1, 0)), name + '_bias') - printVector(f, np.transpose(weights[2], (1, 0)), name + '_factor') - activation = self.activation.__name__.upper() - max_mdense_tmp = max(max_mdense_tmp, weights[0].shape[0]*weights[0].shape[2]) - f.write('const MDenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}_factor,\n {}, {}, {}, ACTIVATION_{}\n}};\n\n' - .format(name, name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation)) - hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[0])) - hf.write('extern const MDenseLayer {};\n\n'.format(name)); - return False -MDense.dump_layer = dump_mdense_layer - -def dump_conv1d_layer(self, f, hf): - global max_conv_inputs - name = self.name - print("printing layer " + name + " of type " + self.__class__.__name__) - weights = self.get_weights() - printVector(f, weights[0], name + '_weights') - printVector(f, weights[-1], name + '_bias') - activation = self.activation.__name__.upper() - max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0]) - f.write('const Conv1DLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, {}, ACTIVATION_{}\n}};\n\n' - .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation)) - hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2])) - hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1))) - hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2)) - hf.write('extern const Conv1DLayer {};\n\n'.format(name)); - return True -Conv1D.dump_layer = dump_conv1d_layer - - -def dump_embedding_layer_impl(name, weights, f, hf): - printVector(f, weights, name + '_weights') - f.write('const EmbeddingLayer {} = {{\n {}_weights,\n {}, {}\n}};\n\n' - .format(name, name, weights.shape[0], weights.shape[1])) - hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1])) - hf.write('extern const EmbeddingLayer {};\n\n'.format(name)); - -def dump_embedding_layer(self, f, hf): - name = self.name - print("printing layer " + name + " of type " + self.__class__.__name__) - weights = self.get_weights()[0] - dump_embedding_layer_impl(name, weights, f, hf) - return False -Embedding.dump_layer = dump_embedding_layer - - -model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=384, use_gpu=False) -model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) -#model.summary() - -model.load_weights(sys.argv[1]) - -if len(sys.argv) > 2: - cfile = sys.argv[2]; - hfile = sys.argv[3]; -else: - cfile = 'nnet_data.c' - hfile = 'nnet_data.h' - - -f = open(cfile, 'w') -hf = open(hfile, 'w') - - -f.write('/*This file is automatically generated from a Keras model*/\n\n') -f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile)) - -hf.write('/*This file is automatically generated from a Keras model*/\n\n') -hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "nnet.h"\n\n') - -embed_size = lpcnet.embed_size - -E = model.get_layer('embed_sig').get_weights()[0] -W = model.get_layer('gru_a').get_weights()[0][:embed_size,:] -dump_embedding_layer_impl('gru_a_embed_sig', np.dot(E, W), f, hf) -W = model.get_layer('gru_a').get_weights()[0][embed_size:2*embed_size,:] -dump_embedding_layer_impl('gru_a_embed_pred', np.dot(E, W), f, hf) -W = model.get_layer('gru_a').get_weights()[0][2*embed_size:3*embed_size,:] -dump_embedding_layer_impl('gru_a_embed_exc', np.dot(E, W), f, hf) -W = model.get_layer('gru_a').get_weights()[0][3*embed_size:,:] -#FIXME: dump only half the biases -b = model.get_layer('gru_a').get_weights()[2] -dump_dense_layer_impl('gru_a_dense_feature', W, b, 'LINEAR', f, hf) - -layer_list = [] -for i, layer in enumerate(model.layers): - if layer.dump_layer(f, hf): - layer_list.append(layer.name) - -dump_sparse_gru(model.get_layer('gru_a'), f, hf) - -hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons)) -hf.write('#define MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs)) -hf.write('#define MAX_MDENSE_TMP {}\n\n'.format(max_mdense_tmp)) - - -hf.write('typedef struct {\n') -for i, name in enumerate(layer_list): - hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper())) -hf.write('} NNetState;\n') - -hf.write('\n\n#endif\n') - -f.close() -hf.close() diff --git a/dnn/gatedconv.py b/dnn/gatedconv.py deleted file mode 100644 index 5d15c806..00000000 --- a/dnn/gatedconv.py +++ /dev/null @@ -1,65 +0,0 @@ -from keras import backend as K -from keras.engine.topology import Layer -from keras.layers import activations, initializers, regularizers, constraints, InputSpec, Conv1D, Dense -import numpy as np - -class GatedConv(Conv1D): - - def __init__(self, filters, - kernel_size, - dilation_rate=1, - activation='tanh', - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - return_memory=False, - **kwargs): - - super(GatedConv, self).__init__( - filters=2*filters, - kernel_size=kernel_size, - strides=1, - padding='valid', - data_format='channels_last', - dilation_rate=dilation_rate, - activation='linear', - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - **kwargs) - self.mem_size = dilation_rate*(kernel_size-1) - self.return_memory = return_memory - self.out_dims = filters - self.nongate_activation = activations.get(activation) - - def call(self, inputs, cond=None, memory=None): - if memory is None: - mem = K.zeros((K.shape(inputs)[0], self.mem_size, K.shape(inputs)[-1])) - else: - mem = K.variable(K.cast_to_floatx(memory)) - inputs = K.concatenate([mem, inputs], axis=1) - ret = super(GatedConv, self).call(inputs) - if cond is not None: - d = Dense(2*self.out_dims, use_bias=False, activation='linear') - ret = ret + d(cond) - ret = self.nongate_activation(ret[:, :, :self.out_dims]) * activations.sigmoid(ret[:, :, self.out_dims:]) - if self.return_memory: - ret = ret, inputs[:, :self.mem_size, :] - return ret - - def compute_output_shape(self, input_shape): - assert input_shape and len(input_shape) >= 2 - assert input_shape[-1] - output_shape = list(input_shape) - output_shape[-1] = self.out_dims - return tuple(output_shape) diff --git a/dnn/lpcnet.py b/dnn/lpcnet.py deleted file mode 100644 index effd6398..00000000 --- a/dnn/lpcnet.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/python3 -'''Copyright (c) 2018 Mozilla - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -''' - -import math -from keras.models import Model -from keras.layers import Input, GRU, CuDNNGRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation -from keras import backend as K -from keras.initializers import Initializer -from keras.callbacks import Callback -from mdense import MDense -import numpy as np -import h5py -import sys - -frame_size = 160 -pcm_bits = 8 -embed_size = 128 -pcm_levels = 2**pcm_bits - -class Sparsify(Callback): - def __init__(self, t_start, t_end, interval, density): - super(Sparsify, self).__init__() - self.batch = 0 - self.t_start = t_start - self.t_end = t_end - self.interval = interval - self.final_density = density - - def on_batch_end(self, batch, logs=None): - #print("batch number", self.batch) - self.batch += 1 - if self.batch < self.t_start or ((self.batch-self.t_start) % self.interval != 0 and self.batch < self.t_end): - #print("don't constrain"); - pass - else: - #print("constrain"); - layer = self.model.get_layer('gru_a') - w = layer.get_weights() - p = w[1] - nb = p.shape[1]//p.shape[0] - N = p.shape[0] - #print("nb = ", nb, ", N = ", N); - #print(p.shape) - #print ("density = ", density) - for k in range(nb): - density = self.final_density[k] - if self.batch < self.t_end: - r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start) - density = 1 - (1-self.final_density[k])*(1 - r*r*r) - A = p[:, k*N:(k+1)*N] - A = A - np.diag(np.diag(A)) - A = np.transpose(A, (1, 0)) - L=np.reshape(A, (N, N//16, 16)) - S=np.sum(L*L, axis=-1) - SS=np.sort(np.reshape(S, (-1,))) - thresh = SS[round(N*N//16*(1-density))] - mask = (S>=thresh).astype('float32'); - mask = np.repeat(mask, 16, axis=1) - mask = np.minimum(1, mask + np.diag(np.ones((N,)))) - mask = np.transpose(mask, (1, 0)) - p[:, k*N:(k+1)*N] = p[:, k*N:(k+1)*N]*mask - #print(thresh, np.mean(mask)) - w[1] = p - layer.set_weights(w) - - -class PCMInit(Initializer): - def __init__(self, gain=.1, seed=None): - self.gain = gain - self.seed = seed - - def __call__(self, shape, dtype=None): - num_rows = 1 - for dim in shape[:-1]: - num_rows *= dim - num_cols = shape[-1] - flat_shape = (num_rows, num_cols) - if self.seed is not None: - np.random.seed(self.seed) - a = np.random.uniform(-1.7321, 1.7321, flat_shape) - #a[:,0] = math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows - #a[:,1] = .5*a[:,0]*a[:,0]*a[:,0] - a = a + np.reshape(math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows, (num_rows, 1)) - return self.gain * a - - def get_config(self): - return { - 'gain': self.gain, - 'seed': self.seed - } - -def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, use_gpu=True, adaptation=False): - pcm = Input(shape=(None, 3)) - feat = Input(shape=(None, nb_used_features)) - pitch = Input(shape=(None, 1)) - dec_feat = Input(shape=(None, 128)) - dec_state1 = Input(shape=(rnn_units1,)) - dec_state2 = Input(shape=(rnn_units2,)) - - padding = 'valid' if training else 'same' - fconv1 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv1') - fconv2 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv2') - - embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig') - cpcm = Reshape((-1, embed_size*3))(embed(pcm)) - - pembed = Embedding(256, 64, name='embed_pitch') - cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))]) - - cfeat = fconv2(fconv1(cat_feat)) - - fdense1 = Dense(128, activation='tanh', name='feature_dense1') - fdense2 = Dense(128, activation='tanh', name='feature_dense2') - - cfeat = fdense2(fdense1(cfeat)) - - rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1)) - - if use_gpu: - rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a') - rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b') - else: - rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a') - rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b') - - rnn_in = Concatenate()([cpcm, rep(cfeat)]) - md = MDense(pcm_levels, activation='softmax', name='dual_fc') - gru_out1, _ = rnn(rnn_in) - gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)])) - ulaw_prob = md(gru_out2) - - if adaptation: - rnn.trainable=False - rnn2.trainable=False - md.trainable=False - embed.Trainable=False - - model = Model([pcm, feat, pitch], ulaw_prob) - model.rnn_units1 = rnn_units1 - model.rnn_units2 = rnn_units2 - model.nb_used_features = nb_used_features - model.frame_size = frame_size - - encoder = Model([feat, pitch], cfeat) - - dec_rnn_in = Concatenate()([cpcm, dec_feat]) - dec_gru_out1, state1 = rnn(dec_rnn_in, initial_state=dec_state1) - dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]), initial_state=dec_state2) - dec_ulaw_prob = md(dec_gru_out2) - - decoder = Model([pcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2]) - return model, encoder, decoder diff --git a/dnn/mdense.py b/dnn/mdense.py deleted file mode 100644 index c262f8bf..00000000 --- a/dnn/mdense.py +++ /dev/null @@ -1,94 +0,0 @@ -from keras import backend as K -from keras.engine.topology import Layer -from keras.layers import activations, initializers, regularizers, constraints, InputSpec -import numpy as np -import math - -class MDense(Layer): - - def __init__(self, outputs, - channels=2, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - if 'input_shape' not in kwargs and 'input_dim' in kwargs: - kwargs['input_shape'] = (kwargs.pop('input_dim'),) - super(MDense, self).__init__(**kwargs) - self.units = outputs - self.channels = channels - self.activation = activations.get(activation) - self.use_bias = use_bias - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - self.input_spec = InputSpec(min_ndim=2) - self.supports_masking = True - - def build(self, input_shape): - assert len(input_shape) >= 2 - input_dim = input_shape[-1] - - self.kernel = self.add_weight(shape=(self.units, input_dim, self.channels), - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - if self.use_bias: - self.bias = self.add_weight(shape=(self.units, self.channels), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - self.factor = self.add_weight(shape=(self.units, self.channels), - initializer='ones', - name='factor', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) - self.built = True - - def call(self, inputs): - output = K.dot(inputs, self.kernel) - if self.use_bias: - output = output + self.bias - output = K.tanh(output) * self.factor - output = K.sum(output, axis=-1) - if self.activation is not None: - output = self.activation(output) - return output - - def compute_output_shape(self, input_shape): - assert input_shape and len(input_shape) >= 2 - assert input_shape[-1] - output_shape = list(input_shape) - output_shape[-1] = self.units - return tuple(output_shape) - - def get_config(self): - config = { - 'units': self.units, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super(MDense, self).get_config() - return dict(list(base_config.items()) + list(config.items())) diff --git a/dnn/train_lpcnet.py b/dnn/train_lpcnet.py deleted file mode 100755 index 4c0cdfa3..00000000 --- a/dnn/train_lpcnet.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/python3 -'''Copyright (c) 2018 Mozilla - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -''' - -# Train a LPCNet model (note not a Wavenet model) - -import lpcnet -import sys -import numpy as np -from keras.optimizers import Adam -from keras.callbacks import ModelCheckpoint -from ulaw import ulaw2lin, lin2ulaw -import keras.backend as K -import h5py - -import tensorflow as tf -from keras.backend.tensorflow_backend import set_session -config = tf.ConfigProto() - -# use this option to reserve GPU memory, e.g. for running more than -# one thing at a time. Best to disable for GPUs with small memory -config.gpu_options.per_process_gpu_memory_fraction = 0.44 - -set_session(tf.Session(config=config)) - -nb_epochs = 120 - -# Try reducing batch_size if you run out of memory on your GPU -batch_size = 64 - -model, _, _ = lpcnet.new_lpcnet_model(training=True) - -model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) -model.summary() - -feature_file = sys.argv[1] -pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples -frame_size = model.frame_size -nb_features = 55 -nb_used_features = model.nb_used_features -feature_chunk_size = 15 -pcm_chunk_size = frame_size*feature_chunk_size - -# u for unquantised, load 16 bit PCM samples and convert to mu-law - -data = np.fromfile(pcm_file, dtype='uint8') -nb_frames = len(data)//(4*pcm_chunk_size) - -features = np.fromfile(feature_file, dtype='float32') - -# limit to discrete number of frames -data = data[:nb_frames*4*pcm_chunk_size] -features = features[:nb_frames*feature_chunk_size*nb_features] - -features = np.reshape(features, (nb_frames*feature_chunk_size, nb_features)) - -sig = np.reshape(data[0::4], (nb_frames, pcm_chunk_size, 1)) -pred = np.reshape(data[1::4], (nb_frames, pcm_chunk_size, 1)) -in_exc = np.reshape(data[2::4], (nb_frames, pcm_chunk_size, 1)) -out_exc = np.reshape(data[3::4], (nb_frames, pcm_chunk_size, 1)) -del data - -print("ulaw std = ", np.std(out_exc)) - -features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features)) -features = features[:, :, :nb_used_features] -features[:,:,18:36] = 0 - -fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0) -fpad2 = np.concatenate([features[1:, :2, :], features[0:1, -2:, :]], axis=0) -features = np.concatenate([fpad1, features, fpad2], axis=1) - - -periods = (.1 + 50*features[:,:,36:37]+100).astype('int16') - -in_data = np.concatenate([sig, pred, in_exc], axis=-1) - -del sig -del pred -del in_exc - -# dump models to disk as we go -checkpoint = ModelCheckpoint('lpcnet30_384_10_G16_{epoch:02d}.h5') - -#Set this to True to adapt an existing model (e.g. on new data) -adaptation = False - -if adaptation: - #Adapting from an existing model - model.load_weights('lpcnet24c_384_10_G16_120.h5') - sparsify = lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2)) - lr = 0.0001 - decay = 0 -else: - #Training from scratch - sparsify = lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2)) - lr = 0.001 - decay = 5e-5 - -model.compile(optimizer=Adam(lr, amsgrad=True, decay=decay), loss='sparse_categorical_crossentropy') -model.save_weights('lpcnet30_384_10_G16_00.h5'); -model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify]) diff --git a/dnn/test_lpcnet.py b/dnn/training_tf2/test_lpcnet.py similarity index 89% rename from dnn/test_lpcnet.py rename to dnn/training_tf2/test_lpcnet.py index e19d2064..90216275 100755 --- a/dnn/test_lpcnet.py +++ b/dnn/training_tf2/test_lpcnet.py @@ -28,19 +28,11 @@ import lpcnet import sys import numpy as np -from keras.optimizers import Adam -from keras.callbacks import ModelCheckpoint from ulaw import ulaw2lin, lin2ulaw -import keras.backend as K import h5py -import tensorflow as tf -from keras.backend.tensorflow_backend import set_session -config = tf.ConfigProto() -config.gpu_options.per_process_gpu_memory_fraction = 0.2 -set_session(tf.Session(config=config)) -model, enc, dec = lpcnet.new_lpcnet_model(use_gpu=False) +model, enc, dec = lpcnet.new_lpcnet_model() model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) #model.summary() @@ -63,7 +55,7 @@ periods = (.1 + 50*features[:,:,36:37]+100).astype('int16') -model.load_weights('lpcnet20h_384_10_G16_80.h5') +model.load_weights('lpcnet34bq17_384_01.h5') order = 16 diff --git a/dnn/ulaw.py b/dnn/ulaw.py deleted file mode 100644 index b79d4315..00000000 --- a/dnn/ulaw.py +++ /dev/null @@ -1,19 +0,0 @@ - -import numpy as np -import math - -scale = 255.0/32768.0 -scale_1 = 32768.0/255.0 -def ulaw2lin(u): - u = u - 128 - s = np.sign(u) - u = np.abs(u) - return s*scale_1*(np.exp(u/128.*math.log(256))-1) - - -def lin2ulaw(x): - s = np.sign(x) - x = np.abs(x) - u = (s*(128*np.log(1+scale*x)/math.log(256))) - u = np.clip(128 + np.round(u), 0, 255) - return u.astype('int16')