Sharing conditioning network with LPC

This commit is contained in:
Jean-Marc Valin 2021-07-24 18:09:20 -04:00
parent c1532559a2
commit ab9a09266f
6 changed files with 17 additions and 77 deletions

View file

@ -54,7 +54,7 @@ static void print_vector(float *x, int N)
}
#endif
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, const float *features, int pitch)
void run_frame_network(LPCNetState *lpcnet, float *rc, float *gru_a_condition, float *gru_b_condition, const float *features, int pitch)
{
NNetState *net;
float condition[FEATURE_DENSE2_OUT_SIZE];
@ -74,6 +74,7 @@ void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b
memcpy(lpcnet->old_input[0], in, FRAME_INPUT_SIZE*sizeof(in[0]));
compute_dense(&feature_dense1, dense1_out, conv2_out);
compute_dense(&feature_dense2, condition, dense1_out);
RNN_COPY(rc, condition, LPC_ORDER);
compute_dense(&gru_a_dense_feature, gru_a_condition, condition);
compute_dense(&gru_b_dense_feature, gru_b_condition, condition);
if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
@ -154,32 +155,13 @@ void rc2lpc(float *lpc, const float *rc)
}
}
void lpc_from_features(LPCNetState *lpcnet,const float *features)
{
NNetState *net;
float in[NB_FEATURES];
float conv1_out[F2RC_CONV1_OUT_SIZE];
float conv2_out[F2RC_CONV2_OUT_SIZE];
float dense1_out[F2RC_DENSE3_OUT_SIZE];
float rc[LPC_ORDER];
net = &lpcnet->nnet;
RNN_COPY(in, features, NB_FEATURES);
compute_conv1d(&f2rc_conv1, conv1_out, net->f2rc_conv1_state, in);
if (lpcnet->frame_count < F2RC_CONV1_DELAY + 1) RNN_CLEAR(conv1_out, F2RC_CONV1_OUT_SIZE);
compute_conv1d(&f2rc_conv2, conv2_out, net->f2rc_conv2_state, conv1_out);
if (lpcnet->frame_count < (FEATURES_DELAY_F2RC + 1)) RNN_CLEAR(conv2_out, F2RC_CONV2_OUT_SIZE);
memmove(lpcnet->old_input_f2rc[1], lpcnet->old_input_f2rc[0], (FEATURES_DELAY_F2RC-1)*NB_FEATURES*sizeof(in[0]));
memcpy(lpcnet->old_input_f2rc[0], in, NB_FEATURES*sizeof(in[0]));
compute_dense(&f2rc_dense3, dense1_out, conv2_out);
compute_dense(&f2rc_dense4_outp_rc, rc, dense1_out);
rc2lpc(lpcnet->old_lpc[0], rc);
}
#endif
LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N)
{
int i;
float lpc[LPC_ORDER];
float rc[LPC_ORDER];
float gru_a_condition[3*GRU_A_STATE_SIZE];
float gru_b_condition[3*GRU_B_STATE_SIZE];
int pitch;
@ -188,10 +170,9 @@ LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features,
pitch = IMIN(255, IMAX(33, pitch));
memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0]));
lpcnet->old_gain[0] = features[PITCH_GAIN_FEATURE];
run_frame_network(lpcnet, gru_a_condition, gru_b_condition, features, pitch);
run_frame_network(lpcnet, rc, gru_a_condition, gru_b_condition, features, pitch);
#ifdef END2END
lpc_from_features(lpcnet,features);
memcpy(lpc, lpcnet->old_lpc[0], LPC_ORDER*sizeof(lpc[0]));
rc2lpc(lpc, rc);
#else
memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));

View file

@ -22,18 +22,11 @@
#define FEATURES_DELAY (FEATURE_CONV1_DELAY + FEATURE_CONV2_DELAY)
#ifdef END2END
#define FEATURES_DELAY_F2RC (F2RC_CONV1_DELAY + F2RC_CONV2_DELAY)
#endif
struct LPCNetState {
NNetState nnet;
int last_exc;
float last_sig[LPC_ORDER];
float old_input[FEATURES_DELAY][FEATURE_CONV2_OUT_SIZE];
#ifdef END2END
float old_input_f2rc[FEATURES_DELAY_F2RC][F2RC_CONV2_OUT_SIZE];
#endif
float old_lpc[FEATURES_DELAY][LPC_ORDER];
float old_gain[FEATURES_DELAY];
float sampling_logit_table[256];

View file

@ -1,27 +0,0 @@
"""
Tensorflow model (differentiable lpc) to learn the lpcs from the features
"""
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate, Lambda, Conv1D, Multiply, Layer, LeakyReLU
from tensorflow.keras import backend as K
from tf_funcs import diff_rc2lpc
frame_size = 160
lpcoeffs_N = 16
def difflpc(nb_used_features = 20, training=False):
feat = Input(shape=(None, nb_used_features)) # BFCC
padding = 'valid' if training else 'same'
L1 = Conv1D(100, 3, padding=padding, activation='tanh', name='f2rc_conv1')
L2 = Conv1D(75, 3, padding=padding, activation='tanh', name='f2rc_conv2')
L3 = Dense(50, activation='tanh',name = 'f2rc_dense3')
L4 = Dense(lpcoeffs_N, activation='tanh',name = "f2rc_dense4_outp_rc")
rc = L4(L3(L2(L1(feat))))
# Differentiable RC 2 LPC
lpcoeffs = diff_rc2lpc(name = "rc2lpc")(rc)
model = Model(feat,lpcoeffs,name = 'f2lpc')
model.nb_used_features = nb_used_features
model.frame_size = frame_size
return model

View file

@ -291,12 +291,6 @@ for i, layer in enumerate(model.layers):
if layer.dump_layer(f, hf):
layer_list.append(layer.name)
if flag_e2e:
print("-- Weight Dumping for the Differentiable LPC Block --")
for i, layer in enumerate(model.get_layer("f2lpc").layers):
if layer.dump_layer(f, hf):
layer_list.append(layer.name)
dump_sparse_gru(model.get_layer('gru_a'), f, hf)
hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))

View file

@ -40,7 +40,6 @@ import h5py
import sys
from tf_funcs import *
from diffembed import diff_Embed
import difflpc
frame_size = 160
pcm_bits = 8
@ -226,23 +225,6 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, train
padding = 'valid' if training else 'same'
fconv1 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv1')
fconv2 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv2')
if not flag_e2e:
embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig')
cpcm = Reshape((-1, embed_size*3))(embed(pcm))
else:
Input_extractor = Lambda(lambda x: K.expand_dims(x[0][:,:,x[1]],axis = -1))
error_calc = Lambda(lambda x: tf_l2u(tf_u2l(x[0]) - tf.roll(tf_u2l(x[1]),1,axis = 1)))
feat2lpc = difflpc.difflpc(training = training)
lpcoeffs = feat2lpc(feat)
tensor_preds = diff_pred(name = "lpc2preds")([Input_extractor([pcm,0]),lpcoeffs])
past_errors = error_calc([Input_extractor([pcm,0]),tensor_preds])
embed = diff_Embed(name='embed_sig',initializer = PCMInit())
cpcm = Concatenate()([Input_extractor([pcm,0]),tensor_preds,past_errors])
cpcm = Reshape((-1, embed_size*3))(embed(cpcm))
cpcm_decoder = Concatenate()([Input_extractor([pcm,0]),Input_extractor([pcm,1]),Input_extractor([pcm,2])])
cpcm_decoder = Reshape((-1, embed_size*3))(embed(cpcm_decoder))
pembed = Embedding(256, 64, name='embed_pitch')
cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))])
@ -253,6 +235,22 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, train
cfeat = fdense2(fdense1(cfeat))
if not flag_e2e:
embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig')
cpcm = Reshape((-1, embed_size*3))(embed(pcm))
else:
Input_extractor = Lambda(lambda x: K.expand_dims(x[0][:,:,x[1]],axis = -1))
error_calc = Lambda(lambda x: tf_l2u(tf_u2l(x[0]) - tf.roll(tf_u2l(x[1]),1,axis = 1)))
lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat)
tensor_preds = diff_pred(name = "lpc2preds")([Input_extractor([pcm,0]),lpcoeffs])
past_errors = error_calc([Input_extractor([pcm,0]),tensor_preds])
embed = diff_Embed(name='embed_sig',initializer = PCMInit())
cpcm = Concatenate()([Input_extractor([pcm,0]),tensor_preds,past_errors])
cpcm = Reshape((-1, embed_size*3))(embed(cpcm))
cpcm_decoder = Concatenate()([Input_extractor([pcm,0]),Input_extractor([pcm,1]),Input_extractor([pcm,2])])
cpcm_decoder = Reshape((-1, embed_size*3))(embed(cpcm_decoder))
rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
quant = quant_regularizer if quantize else None

View file

@ -49,6 +49,7 @@ class diff_rc2lpc(Layer):
temp = Concatenate(axis = 2)([temp,input[1]])
return temp
Llpc = Lambda(pred_lpc_recursive)
inputs = inputs[:,:,:lpcoeffs_N]
lpc_init = inputs
for i in range(1,lpcoeffs_N):
lpc_init = Llpc([lpc_init[:,:,:i],K.expand_dims(inputs[:,:,i],axis = -1)])