From 5d8a1313d621491a2bd6743740ef5385d9a6d89d Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Wed, 11 Jul 2018 01:30:30 -0400 Subject: [PATCH] decodes something... --- dnn/lpcnet.py | 7 ++++--- dnn/test_lpcnet.py | 22 +++++++++++++++------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/dnn/lpcnet.py b/dnn/lpcnet.py index 21858815..dcb7f3dd 100644 --- a/dnn/lpcnet.py +++ b/dnn/lpcnet.py @@ -9,7 +9,7 @@ import numpy as np import h5py import sys -rnn_units=64 +rnn_units=512 pcm_bits = 8 pcm_levels = 2**pcm_bits nb_used_features = 37 @@ -20,6 +20,7 @@ def new_wavernn_model(): pitch = Input(shape=(None, 1)) feat = Input(shape=(None, nb_used_features)) dec_feat = Input(shape=(None, 32)) + dec_state = Input(shape=(rnn_units,)) conv1 = Conv1D(16, 7, padding='causal') pconv1 = Conv1D(16, 5, padding='same') @@ -48,8 +49,8 @@ def new_wavernn_model(): encoder = Model(feat, cfeat) dec_rnn_in = Concatenate()([cpcm, cpitch, dec_feat]) - dec_gru_out, state = rnn(dec_rnn_in) + dec_gru_out, state = rnn(dec_rnn_in, initial_state=dec_state) dec_ulaw_prob = md(dec_gru_out) - decoder = Model([pcm, pitch, dec_feat], [dec_ulaw_prob, state]) + decoder = Model([pcm, pitch, dec_feat, dec_state], [dec_ulaw_prob, state]) return model, encoder, decoder diff --git a/dnn/test_lpcnet.py b/dnn/test_lpcnet.py index 185893be..4c47274e 100755 --- a/dnn/test_lpcnet.py +++ b/dnn/test_lpcnet.py @@ -21,7 +21,7 @@ batch_size = 64 model, enc, dec = lpcnet.new_wavernn_model() model.compile(optimizer=Adadiff(), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) -model.summary() +#model.summary() pcmfile = sys.argv[1] feature_file = sys.argv[2] @@ -47,14 +47,15 @@ in_data = np.reshape(in_data, (nb_frames*pcm_chunk_size, 1)) out_data = np.reshape(data, (nb_frames*pcm_chunk_size, 1)) -model.load_weights('lpcnet1h_30.h5') +model.load_weights('lpcnet1i_30.h5') order = 16 pcm = 0.*out_data -exc = 0.*out_data +exc = out_data-0 pitch = np.zeros((1, 1, 1), dtype='float32') -iexc = np.zeros((1, 1, 1), dtype='float32') +fexc = np.zeros((1, 1, 1), dtype='float32') +iexc = np.zeros((1, 1, 1), dtype='int16') state = np.zeros((1, lpcnet.rnn_units), dtype='float32') for c in range(1, nb_frames): cfeat = enc.predict(features[c:c+1, :, :nb_used_features]) @@ -68,7 +69,14 @@ for c in range(1, nb_frames): period = period - 4 for i in range(frame_size): pitch[0, 0, 0] = exc[f*frame_size + i - period, 0] - #p, state = dec.predict([ - pcm[f*frame_size + i, 0] = gain*out_data[f*frame_size + i, 0] - sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1, 0]) - print(pcm[f*frame_size + i, 0]) + fexc[0, 0, 0] = exc[f*frame_size + i - 1] + #print(cfeat.shape) + p, state = dec.predict([fexc, pitch, cfeat[:, fr:fr+1, :], state]) + p = p/(1e-5 + np.sum(p)) + #print(np.sum(p)) + iexc[0, 0, 0] = np.argmax(np.random.multinomial(1, p[0,0,:], 1))-128 + exc[f*frame_size + i] = iexc[0, 0, 0]/16. + #out_data[f*frame_size + i, 0] = iexc[0, 0, 0] + pcm[f*frame_size + i, 0] = gain*iexc[0, 0, 0] - sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1, 0]) + print(iexc[0, 0, 0], out_data[f*frame_size + i, 0], pcm[f*frame_size + i, 0])