diff --git a/dnn/lpcnet.c b/dnn/lpcnet.c index c740a077..3c289fd8 100644 --- a/dnn/lpcnet.c +++ b/dnn/lpcnet.c @@ -54,7 +54,7 @@ static void print_vector(float *x, int N) } #endif -void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, const float *features, int pitch) +void run_frame_network(LPCNetState *lpcnet, float *rc, float *gru_a_condition, float *gru_b_condition, const float *features, int pitch) { NNetState *net; float condition[FEATURE_DENSE2_OUT_SIZE]; @@ -74,6 +74,7 @@ void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b memcpy(lpcnet->old_input[0], in, FRAME_INPUT_SIZE*sizeof(in[0])); compute_dense(&feature_dense1, dense1_out, conv2_out); compute_dense(&feature_dense2, condition, dense1_out); + RNN_COPY(rc, condition, LPC_ORDER); compute_dense(&gru_a_dense_feature, gru_a_condition, condition); compute_dense(&gru_b_dense_feature, gru_b_condition, condition); if (lpcnet->frame_count < 1000) lpcnet->frame_count++; @@ -154,32 +155,13 @@ void rc2lpc(float *lpc, const float *rc) } } -void lpc_from_features(LPCNetState *lpcnet,const float *features) -{ - NNetState *net; - float in[NB_FEATURES]; - float conv1_out[F2RC_CONV1_OUT_SIZE]; - float conv2_out[F2RC_CONV2_OUT_SIZE]; - float dense1_out[F2RC_DENSE3_OUT_SIZE]; - float rc[LPC_ORDER]; - net = &lpcnet->nnet; - RNN_COPY(in, features, NB_FEATURES); - compute_conv1d(&f2rc_conv1, conv1_out, net->f2rc_conv1_state, in); - if (lpcnet->frame_count < F2RC_CONV1_DELAY + 1) RNN_CLEAR(conv1_out, F2RC_CONV1_OUT_SIZE); - compute_conv1d(&f2rc_conv2, conv2_out, net->f2rc_conv2_state, conv1_out); - if (lpcnet->frame_count < (FEATURES_DELAY_F2RC + 1)) RNN_CLEAR(conv2_out, F2RC_CONV2_OUT_SIZE); - memmove(lpcnet->old_input_f2rc[1], lpcnet->old_input_f2rc[0], (FEATURES_DELAY_F2RC-1)*NB_FEATURES*sizeof(in[0])); - memcpy(lpcnet->old_input_f2rc[0], in, NB_FEATURES*sizeof(in[0])); - compute_dense(&f2rc_dense3, dense1_out, conv2_out); - compute_dense(&f2rc_dense4_outp_rc, rc, dense1_out); - rc2lpc(lpcnet->old_lpc[0], rc); -} #endif LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N) { int i; float lpc[LPC_ORDER]; + float rc[LPC_ORDER]; float gru_a_condition[3*GRU_A_STATE_SIZE]; float gru_b_condition[3*GRU_B_STATE_SIZE]; int pitch; @@ -188,10 +170,9 @@ LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, pitch = IMIN(255, IMAX(33, pitch)); memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0])); lpcnet->old_gain[0] = features[PITCH_GAIN_FEATURE]; - run_frame_network(lpcnet, gru_a_condition, gru_b_condition, features, pitch); + run_frame_network(lpcnet, rc, gru_a_condition, gru_b_condition, features, pitch); #ifdef END2END - lpc_from_features(lpcnet,features); - memcpy(lpc, lpcnet->old_lpc[0], LPC_ORDER*sizeof(lpc[0])); + rc2lpc(lpc, rc); #else memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0])); memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0])); diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h index 9ec8e50f..fedcd58e 100644 --- a/dnn/lpcnet_private.h +++ b/dnn/lpcnet_private.h @@ -22,18 +22,11 @@ #define FEATURES_DELAY (FEATURE_CONV1_DELAY + FEATURE_CONV2_DELAY) -#ifdef END2END - #define FEATURES_DELAY_F2RC (F2RC_CONV1_DELAY + F2RC_CONV2_DELAY) -#endif - struct LPCNetState { NNetState nnet; int last_exc; float last_sig[LPC_ORDER]; float old_input[FEATURES_DELAY][FEATURE_CONV2_OUT_SIZE]; -#ifdef END2END - float old_input_f2rc[FEATURES_DELAY_F2RC][F2RC_CONV2_OUT_SIZE]; -#endif float old_lpc[FEATURES_DELAY][LPC_ORDER]; float old_gain[FEATURES_DELAY]; float sampling_logit_table[256]; diff --git a/dnn/training_tf2/difflpc.py b/dnn/training_tf2/difflpc.py deleted file mode 100644 index efa5e21c..00000000 --- a/dnn/training_tf2/difflpc.py +++ /dev/null @@ -1,27 +0,0 @@ -""" -Tensorflow model (differentiable lpc) to learn the lpcs from the features -""" - -from tensorflow.keras.models import Model -from tensorflow.keras.layers import Input, Dense, Concatenate, Lambda, Conv1D, Multiply, Layer, LeakyReLU -from tensorflow.keras import backend as K -from tf_funcs import diff_rc2lpc - -frame_size = 160 -lpcoeffs_N = 16 - -def difflpc(nb_used_features = 20, training=False): - feat = Input(shape=(None, nb_used_features)) # BFCC - padding = 'valid' if training else 'same' - L1 = Conv1D(100, 3, padding=padding, activation='tanh', name='f2rc_conv1') - L2 = Conv1D(75, 3, padding=padding, activation='tanh', name='f2rc_conv2') - L3 = Dense(50, activation='tanh',name = 'f2rc_dense3') - L4 = Dense(lpcoeffs_N, activation='tanh',name = "f2rc_dense4_outp_rc") - rc = L4(L3(L2(L1(feat)))) - # Differentiable RC 2 LPC - lpcoeffs = diff_rc2lpc(name = "rc2lpc")(rc) - - model = Model(feat,lpcoeffs,name = 'f2lpc') - model.nb_used_features = nb_used_features - model.frame_size = frame_size - return model diff --git a/dnn/training_tf2/dump_lpcnet.py b/dnn/training_tf2/dump_lpcnet.py index 102b021d..01f1c987 100755 --- a/dnn/training_tf2/dump_lpcnet.py +++ b/dnn/training_tf2/dump_lpcnet.py @@ -291,12 +291,6 @@ for i, layer in enumerate(model.layers): if layer.dump_layer(f, hf): layer_list.append(layer.name) -if flag_e2e: - print("-- Weight Dumping for the Differentiable LPC Block --") - for i, layer in enumerate(model.get_layer("f2lpc").layers): - if layer.dump_layer(f, hf): - layer_list.append(layer.name) - dump_sparse_gru(model.get_layer('gru_a'), f, hf) hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons)) diff --git a/dnn/training_tf2/lpcnet.py b/dnn/training_tf2/lpcnet.py index e08b809a..2f14ecd3 100644 --- a/dnn/training_tf2/lpcnet.py +++ b/dnn/training_tf2/lpcnet.py @@ -40,7 +40,6 @@ import h5py import sys from tf_funcs import * from diffembed import diff_Embed -import difflpc frame_size = 160 pcm_bits = 8 @@ -226,6 +225,15 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, train padding = 'valid' if training else 'same' fconv1 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv1') fconv2 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv2') + pembed = Embedding(256, 64, name='embed_pitch') + cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))]) + + cfeat = fconv2(fconv1(cat_feat)) + + fdense1 = Dense(128, activation='tanh', name='feature_dense1') + fdense2 = Dense(128, activation='tanh', name='feature_dense2') + + cfeat = fdense2(fdense1(cfeat)) if not flag_e2e: embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig') @@ -233,8 +241,7 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, train else: Input_extractor = Lambda(lambda x: K.expand_dims(x[0][:,:,x[1]],axis = -1)) error_calc = Lambda(lambda x: tf_l2u(tf_u2l(x[0]) - tf.roll(tf_u2l(x[1]),1,axis = 1))) - feat2lpc = difflpc.difflpc(training = training) - lpcoeffs = feat2lpc(feat) + lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat) tensor_preds = diff_pred(name = "lpc2preds")([Input_extractor([pcm,0]),lpcoeffs]) past_errors = error_calc([Input_extractor([pcm,0]),tensor_preds]) embed = diff_Embed(name='embed_sig',initializer = PCMInit()) @@ -243,15 +250,6 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, train cpcm_decoder = Concatenate()([Input_extractor([pcm,0]),Input_extractor([pcm,1]),Input_extractor([pcm,2])]) cpcm_decoder = Reshape((-1, embed_size*3))(embed(cpcm_decoder)) - pembed = Embedding(256, 64, name='embed_pitch') - cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))]) - - cfeat = fconv2(fconv1(cat_feat)) - - fdense1 = Dense(128, activation='tanh', name='feature_dense1') - fdense2 = Dense(128, activation='tanh', name='feature_dense2') - - cfeat = fdense2(fdense1(cfeat)) rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1)) diff --git a/dnn/training_tf2/tf_funcs.py b/dnn/training_tf2/tf_funcs.py index cf593184..467c4138 100644 --- a/dnn/training_tf2/tf_funcs.py +++ b/dnn/training_tf2/tf_funcs.py @@ -49,6 +49,7 @@ class diff_rc2lpc(Layer): temp = Concatenate(axis = 2)([temp,input[1]]) return temp Llpc = Lambda(pred_lpc_recursive) + inputs = inputs[:,:,:lpcoeffs_N] lpc_init = inputs for i in range(1,lpcoeffs_N): lpc_init = Llpc([lpc_init[:,:,:i],K.expand_dims(inputs[:,:,i],axis = -1)]) @@ -66,4 +67,4 @@ class diff_lpc2rc(Layer): for i in range(1,lpcoeffs_N): j = (lpcoeffs_N - i + 1) rc_init = Lrc([rc_init[:,:,:(j - 1)],rc_init[:,:,(j - 1):]]) - return rc_init \ No newline at end of file + return rc_init