From 824dbecaecaf0796696ee79081ebd70f1cffb455 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Mon, 9 Jul 2018 18:20:52 -0400 Subject: [PATCH] decoder wip --- dnn/lpcnet.py | 17 +++++++++--- dnn/test_lpcnet.py | 64 +++++++++++++++++++++++++++++++++++++++++++++ dnn/train_lpcnet.py | 9 ++++--- 3 files changed, 82 insertions(+), 8 deletions(-) create mode 100755 dnn/test_lpcnet.py diff --git a/dnn/lpcnet.py b/dnn/lpcnet.py index 75944ce9..21858815 100644 --- a/dnn/lpcnet.py +++ b/dnn/lpcnet.py @@ -19,6 +19,7 @@ def new_wavernn_model(): pcm = Input(shape=(None, 1)) pitch = Input(shape=(None, 1)) feat = Input(shape=(None, nb_used_features)) + dec_feat = Input(shape=(None, 32)) conv1 = Conv1D(16, 7, padding='causal') pconv1 = Conv1D(16, 5, padding='same') @@ -26,7 +27,7 @@ def new_wavernn_model(): fconv1 = Conv1D(128, 3, padding='same') fconv2 = Conv1D(32, 3, padding='same') - if True: + if False: cpcm = conv1(pcm) cpitch = pconv2(pconv1(pitch)) else: @@ -37,10 +38,18 @@ def new_wavernn_model(): rep = Lambda(lambda x: K.repeat_elements(x, 160, 1)) - rnn = CuDNNGRU(rnn_units, return_sequences=True) + rnn = CuDNNGRU(rnn_units, return_sequences=True, return_state=True) rnn_in = Concatenate()([cpcm, cpitch, rep(cfeat)]) md = MDense(pcm_levels, activation='softmax') - ulaw_prob = md(rnn(rnn_in)) + gru_out, state = rnn(rnn_in) + ulaw_prob = md(gru_out) model = Model([pcm, pitch, feat], ulaw_prob) - return model + encoder = Model(feat, cfeat) + + dec_rnn_in = Concatenate()([cpcm, cpitch, dec_feat]) + dec_gru_out, state = rnn(dec_rnn_in) + dec_ulaw_prob = md(dec_gru_out) + + decoder = Model([pcm, pitch, dec_feat], [dec_ulaw_prob, state]) + return model, encoder, decoder diff --git a/dnn/test_lpcnet.py b/dnn/test_lpcnet.py new file mode 100755 index 00000000..9559fd8c --- /dev/null +++ b/dnn/test_lpcnet.py @@ -0,0 +1,64 @@ +#!/usr/bin/python3 + +import lpcnet +import sys +import numpy as np +from keras.optimizers import Adam +from keras.callbacks import ModelCheckpoint +from ulaw import ulaw2lin, lin2ulaw +import keras.backend as K +import h5py +from adadiff import Adadiff + +#import tensorflow as tf +#from keras.backend.tensorflow_backend import set_session +#config = tf.ConfigProto() +#config.gpu_options.per_process_gpu_memory_fraction = 0.28 +#set_session(tf.Session(config=config)) + +nb_epochs = 40 +batch_size = 64 + +model, enc, dec = lpcnet.new_wavernn_model() +model.compile(optimizer=Adadiff(), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) +model.summary() + +pcmfile = sys.argv[1] +feature_file = sys.argv[2] +frame_size = 160 +nb_features = 54 +nb_used_features = lpcnet.nb_used_features +feature_chunk_size = 15 +pcm_chunk_size = frame_size*feature_chunk_size + +data = np.fromfile(pcmfile, dtype='int8') +nb_frames = len(data)//pcm_chunk_size + +features = np.fromfile(feature_file, dtype='float32') + +data = data[:nb_frames*pcm_chunk_size] +features = features[:nb_frames*feature_chunk_size*nb_features] + +in_data = np.concatenate([data[0:1], data[:-1]])/16.; + +features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features)) + +in_data = np.reshape(in_data, (nb_frames*pcm_chunk_size, 1)) +out_data = np.reshape(data, (nb_frames*pcm_chunk_size, 1)) + + +model.load_weights('lpcnet1h_30.h5') + +order = 16 + +pcm = 0.*out_data +for c in range(1, nb_frames): + for fr in range(1, feature_chunk_size): + f = c*feature_chunk_size + fr + a = features[c, fr, nb_used_features+1:] + #print(a) + gain = 1; + for i in range(frame_size): + pcm[f*frame_size + i, 0] = gain*out_data[f*frame_size + i, 0] - sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1, 0]) + print(pcm[f*frame_size + i, 0]) + diff --git a/dnn/train_lpcnet.py b/dnn/train_lpcnet.py index 437b4330..833dc34c 100755 --- a/dnn/train_lpcnet.py +++ b/dnn/train_lpcnet.py @@ -8,18 +8,19 @@ from keras.callbacks import ModelCheckpoint from ulaw import ulaw2lin, lin2ulaw import keras.backend as K import h5py +from adadiff import Adadiff import tensorflow as tf from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() -config.gpu_options.per_process_gpu_memory_fraction = 0.44 +config.gpu_options.per_process_gpu_memory_fraction = 0.28 set_session(tf.Session(config=config)) nb_epochs = 40 batch_size = 64 model = lpcnet.new_wavernn_model() -model.compile(optimizer=Adam(0.0008), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) +model.compile(optimizer=Adadiff(), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) model.summary() pcmfile = sys.argv[1] @@ -62,8 +63,8 @@ features = features[:, :, :nb_used_features] # f.create_dataset('data', data=in_data[:50000, :, :]) # f.create_dataset('feat', data=features[:50000, :, :]) -checkpoint = ModelCheckpoint('lpcnet1g_{epoch:02d}.h5') +checkpoint = ModelCheckpoint('lpcnet1k_{epoch:02d}.h5') #model.load_weights('wavernn1c_01.h5') -model.compile(optimizer=Adam(0.002, amsgrad=True, decay=2e-4), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) +model.compile(optimizer=Adadiff(), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) model.fit([in_data, in_pitch, features], out_data, batch_size=batch_size, epochs=30, validation_split=0.2, callbacks=[checkpoint])