Sharing conditioning network with LPC
This commit is contained in:
parent
c1532559a2
commit
ab9a09266f
6 changed files with 17 additions and 77 deletions
29
dnn/lpcnet.c
29
dnn/lpcnet.c
|
@ -54,7 +54,7 @@ static void print_vector(float *x, int N)
|
|||
}
|
||||
#endif
|
||||
|
||||
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, const float *features, int pitch)
|
||||
void run_frame_network(LPCNetState *lpcnet, float *rc, float *gru_a_condition, float *gru_b_condition, const float *features, int pitch)
|
||||
{
|
||||
NNetState *net;
|
||||
float condition[FEATURE_DENSE2_OUT_SIZE];
|
||||
|
@ -74,6 +74,7 @@ void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b
|
|||
memcpy(lpcnet->old_input[0], in, FRAME_INPUT_SIZE*sizeof(in[0]));
|
||||
compute_dense(&feature_dense1, dense1_out, conv2_out);
|
||||
compute_dense(&feature_dense2, condition, dense1_out);
|
||||
RNN_COPY(rc, condition, LPC_ORDER);
|
||||
compute_dense(&gru_a_dense_feature, gru_a_condition, condition);
|
||||
compute_dense(&gru_b_dense_feature, gru_b_condition, condition);
|
||||
if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
|
||||
|
@ -154,32 +155,13 @@ void rc2lpc(float *lpc, const float *rc)
|
|||
}
|
||||
}
|
||||
|
||||
void lpc_from_features(LPCNetState *lpcnet,const float *features)
|
||||
{
|
||||
NNetState *net;
|
||||
float in[NB_FEATURES];
|
||||
float conv1_out[F2RC_CONV1_OUT_SIZE];
|
||||
float conv2_out[F2RC_CONV2_OUT_SIZE];
|
||||
float dense1_out[F2RC_DENSE3_OUT_SIZE];
|
||||
float rc[LPC_ORDER];
|
||||
net = &lpcnet->nnet;
|
||||
RNN_COPY(in, features, NB_FEATURES);
|
||||
compute_conv1d(&f2rc_conv1, conv1_out, net->f2rc_conv1_state, in);
|
||||
if (lpcnet->frame_count < F2RC_CONV1_DELAY + 1) RNN_CLEAR(conv1_out, F2RC_CONV1_OUT_SIZE);
|
||||
compute_conv1d(&f2rc_conv2, conv2_out, net->f2rc_conv2_state, conv1_out);
|
||||
if (lpcnet->frame_count < (FEATURES_DELAY_F2RC + 1)) RNN_CLEAR(conv2_out, F2RC_CONV2_OUT_SIZE);
|
||||
memmove(lpcnet->old_input_f2rc[1], lpcnet->old_input_f2rc[0], (FEATURES_DELAY_F2RC-1)*NB_FEATURES*sizeof(in[0]));
|
||||
memcpy(lpcnet->old_input_f2rc[0], in, NB_FEATURES*sizeof(in[0]));
|
||||
compute_dense(&f2rc_dense3, dense1_out, conv2_out);
|
||||
compute_dense(&f2rc_dense4_outp_rc, rc, dense1_out);
|
||||
rc2lpc(lpcnet->old_lpc[0], rc);
|
||||
}
|
||||
#endif
|
||||
|
||||
LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N)
|
||||
{
|
||||
int i;
|
||||
float lpc[LPC_ORDER];
|
||||
float rc[LPC_ORDER];
|
||||
float gru_a_condition[3*GRU_A_STATE_SIZE];
|
||||
float gru_b_condition[3*GRU_B_STATE_SIZE];
|
||||
int pitch;
|
||||
|
@ -188,10 +170,9 @@ LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features,
|
|||
pitch = IMIN(255, IMAX(33, pitch));
|
||||
memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0]));
|
||||
lpcnet->old_gain[0] = features[PITCH_GAIN_FEATURE];
|
||||
run_frame_network(lpcnet, gru_a_condition, gru_b_condition, features, pitch);
|
||||
run_frame_network(lpcnet, rc, gru_a_condition, gru_b_condition, features, pitch);
|
||||
#ifdef END2END
|
||||
lpc_from_features(lpcnet,features);
|
||||
memcpy(lpc, lpcnet->old_lpc[0], LPC_ORDER*sizeof(lpc[0]));
|
||||
rc2lpc(lpc, rc);
|
||||
#else
|
||||
memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
|
||||
memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));
|
||||
|
|
|
@ -22,18 +22,11 @@
|
|||
|
||||
#define FEATURES_DELAY (FEATURE_CONV1_DELAY + FEATURE_CONV2_DELAY)
|
||||
|
||||
#ifdef END2END
|
||||
#define FEATURES_DELAY_F2RC (F2RC_CONV1_DELAY + F2RC_CONV2_DELAY)
|
||||
#endif
|
||||
|
||||
struct LPCNetState {
|
||||
NNetState nnet;
|
||||
int last_exc;
|
||||
float last_sig[LPC_ORDER];
|
||||
float old_input[FEATURES_DELAY][FEATURE_CONV2_OUT_SIZE];
|
||||
#ifdef END2END
|
||||
float old_input_f2rc[FEATURES_DELAY_F2RC][F2RC_CONV2_OUT_SIZE];
|
||||
#endif
|
||||
float old_lpc[FEATURES_DELAY][LPC_ORDER];
|
||||
float old_gain[FEATURES_DELAY];
|
||||
float sampling_logit_table[256];
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
"""
|
||||
Tensorflow model (differentiable lpc) to learn the lpcs from the features
|
||||
"""
|
||||
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.keras.layers import Input, Dense, Concatenate, Lambda, Conv1D, Multiply, Layer, LeakyReLU
|
||||
from tensorflow.keras import backend as K
|
||||
from tf_funcs import diff_rc2lpc
|
||||
|
||||
frame_size = 160
|
||||
lpcoeffs_N = 16
|
||||
|
||||
def difflpc(nb_used_features = 20, training=False):
|
||||
feat = Input(shape=(None, nb_used_features)) # BFCC
|
||||
padding = 'valid' if training else 'same'
|
||||
L1 = Conv1D(100, 3, padding=padding, activation='tanh', name='f2rc_conv1')
|
||||
L2 = Conv1D(75, 3, padding=padding, activation='tanh', name='f2rc_conv2')
|
||||
L3 = Dense(50, activation='tanh',name = 'f2rc_dense3')
|
||||
L4 = Dense(lpcoeffs_N, activation='tanh',name = "f2rc_dense4_outp_rc")
|
||||
rc = L4(L3(L2(L1(feat))))
|
||||
# Differentiable RC 2 LPC
|
||||
lpcoeffs = diff_rc2lpc(name = "rc2lpc")(rc)
|
||||
|
||||
model = Model(feat,lpcoeffs,name = 'f2lpc')
|
||||
model.nb_used_features = nb_used_features
|
||||
model.frame_size = frame_size
|
||||
return model
|
|
@ -291,12 +291,6 @@ for i, layer in enumerate(model.layers):
|
|||
if layer.dump_layer(f, hf):
|
||||
layer_list.append(layer.name)
|
||||
|
||||
if flag_e2e:
|
||||
print("-- Weight Dumping for the Differentiable LPC Block --")
|
||||
for i, layer in enumerate(model.get_layer("f2lpc").layers):
|
||||
if layer.dump_layer(f, hf):
|
||||
layer_list.append(layer.name)
|
||||
|
||||
dump_sparse_gru(model.get_layer('gru_a'), f, hf)
|
||||
|
||||
hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
|
||||
|
|
|
@ -40,7 +40,6 @@ import h5py
|
|||
import sys
|
||||
from tf_funcs import *
|
||||
from diffembed import diff_Embed
|
||||
import difflpc
|
||||
|
||||
frame_size = 160
|
||||
pcm_bits = 8
|
||||
|
@ -226,23 +225,6 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, train
|
|||
padding = 'valid' if training else 'same'
|
||||
fconv1 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv1')
|
||||
fconv2 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv2')
|
||||
|
||||
if not flag_e2e:
|
||||
embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig')
|
||||
cpcm = Reshape((-1, embed_size*3))(embed(pcm))
|
||||
else:
|
||||
Input_extractor = Lambda(lambda x: K.expand_dims(x[0][:,:,x[1]],axis = -1))
|
||||
error_calc = Lambda(lambda x: tf_l2u(tf_u2l(x[0]) - tf.roll(tf_u2l(x[1]),1,axis = 1)))
|
||||
feat2lpc = difflpc.difflpc(training = training)
|
||||
lpcoeffs = feat2lpc(feat)
|
||||
tensor_preds = diff_pred(name = "lpc2preds")([Input_extractor([pcm,0]),lpcoeffs])
|
||||
past_errors = error_calc([Input_extractor([pcm,0]),tensor_preds])
|
||||
embed = diff_Embed(name='embed_sig',initializer = PCMInit())
|
||||
cpcm = Concatenate()([Input_extractor([pcm,0]),tensor_preds,past_errors])
|
||||
cpcm = Reshape((-1, embed_size*3))(embed(cpcm))
|
||||
cpcm_decoder = Concatenate()([Input_extractor([pcm,0]),Input_extractor([pcm,1]),Input_extractor([pcm,2])])
|
||||
cpcm_decoder = Reshape((-1, embed_size*3))(embed(cpcm_decoder))
|
||||
|
||||
pembed = Embedding(256, 64, name='embed_pitch')
|
||||
cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))])
|
||||
|
||||
|
@ -253,6 +235,22 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, train
|
|||
|
||||
cfeat = fdense2(fdense1(cfeat))
|
||||
|
||||
if not flag_e2e:
|
||||
embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig')
|
||||
cpcm = Reshape((-1, embed_size*3))(embed(pcm))
|
||||
else:
|
||||
Input_extractor = Lambda(lambda x: K.expand_dims(x[0][:,:,x[1]],axis = -1))
|
||||
error_calc = Lambda(lambda x: tf_l2u(tf_u2l(x[0]) - tf.roll(tf_u2l(x[1]),1,axis = 1)))
|
||||
lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat)
|
||||
tensor_preds = diff_pred(name = "lpc2preds")([Input_extractor([pcm,0]),lpcoeffs])
|
||||
past_errors = error_calc([Input_extractor([pcm,0]),tensor_preds])
|
||||
embed = diff_Embed(name='embed_sig',initializer = PCMInit())
|
||||
cpcm = Concatenate()([Input_extractor([pcm,0]),tensor_preds,past_errors])
|
||||
cpcm = Reshape((-1, embed_size*3))(embed(cpcm))
|
||||
cpcm_decoder = Concatenate()([Input_extractor([pcm,0]),Input_extractor([pcm,1]),Input_extractor([pcm,2])])
|
||||
cpcm_decoder = Reshape((-1, embed_size*3))(embed(cpcm_decoder))
|
||||
|
||||
|
||||
rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
|
||||
|
||||
quant = quant_regularizer if quantize else None
|
||||
|
|
|
@ -49,6 +49,7 @@ class diff_rc2lpc(Layer):
|
|||
temp = Concatenate(axis = 2)([temp,input[1]])
|
||||
return temp
|
||||
Llpc = Lambda(pred_lpc_recursive)
|
||||
inputs = inputs[:,:,:lpcoeffs_N]
|
||||
lpc_init = inputs
|
||||
for i in range(1,lpcoeffs_N):
|
||||
lpc_init = Llpc([lpc_init[:,:,:i],K.expand_dims(inputs[:,:,i],axis = -1)])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue