mirror of
https://github.com/xiph/opus.git
synced 2025-06-01 16:17:42 +00:00
Pre-compute GRU B conditioning
Adapted from PR: https://github.com/mozilla/LPCNet/pull/134 by zhuxiaoxu <zhuxiaoxu@ainirobot.com> but had to be reworked due to previous weight quantization changes.
This commit is contained in:
parent
0d53fad50d
commit
c74330e850
4 changed files with 76 additions and 14 deletions
17
dnn/lpcnet.c
17
dnn/lpcnet.c
|
@ -54,9 +54,10 @@ static void print_vector(float *x, int N)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void run_frame_network(LPCNetState *lpcnet, float *condition, float *gru_a_condition, const float *features, int pitch)
|
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, const float *features, int pitch)
|
||||||
{
|
{
|
||||||
NNetState *net;
|
NNetState *net;
|
||||||
|
float condition[FEATURE_DENSE2_OUT_SIZE];
|
||||||
float in[FRAME_INPUT_SIZE];
|
float in[FRAME_INPUT_SIZE];
|
||||||
float conv1_out[FEATURE_CONV1_OUT_SIZE];
|
float conv1_out[FEATURE_CONV1_OUT_SIZE];
|
||||||
float conv2_out[FEATURE_CONV2_OUT_SIZE];
|
float conv2_out[FEATURE_CONV2_OUT_SIZE];
|
||||||
|
@ -74,13 +75,15 @@ void run_frame_network(LPCNetState *lpcnet, float *condition, float *gru_a_condi
|
||||||
compute_dense(&feature_dense1, dense1_out, conv2_out);
|
compute_dense(&feature_dense1, dense1_out, conv2_out);
|
||||||
compute_dense(&feature_dense2, condition, dense1_out);
|
compute_dense(&feature_dense2, condition, dense1_out);
|
||||||
compute_dense(&gru_a_dense_feature, gru_a_condition, condition);
|
compute_dense(&gru_a_dense_feature, gru_a_condition, condition);
|
||||||
|
compute_dense(&gru_b_dense_feature, gru_b_condition, condition);
|
||||||
if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
|
if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
int run_sample_network(NNetState *net, const float *condition, const float *gru_a_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table)
|
int run_sample_network(NNetState *net, const float *gru_a_condition, const float *gru_b_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table)
|
||||||
{
|
{
|
||||||
float gru_a_input[3*GRU_A_STATE_SIZE];
|
float gru_a_input[3*GRU_A_STATE_SIZE];
|
||||||
float in_b[GRU_A_STATE_SIZE+FEATURE_DENSE2_OUT_SIZE];
|
float in_b[GRU_A_STATE_SIZE+FEATURE_DENSE2_OUT_SIZE];
|
||||||
|
float gru_b_input[3*GRU_B_STATE_SIZE];
|
||||||
#if 1
|
#if 1
|
||||||
compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &gru_a_embed_sig, last_sig, &gru_a_embed_pred, pred, &gru_a_embed_exc, last_exc);
|
compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &gru_a_embed_sig, last_sig, &gru_a_embed_pred, pred, &gru_a_embed_exc, last_exc);
|
||||||
#else
|
#else
|
||||||
|
@ -92,8 +95,8 @@ int run_sample_network(NNetState *net, const float *condition, const float *gru_
|
||||||
/*compute_gru3(&gru_a, net->gru_a_state, gru_a_input);*/
|
/*compute_gru3(&gru_a, net->gru_a_state, gru_a_input);*/
|
||||||
compute_sparse_gru(&sparse_gru_a, net->gru_a_state, gru_a_input);
|
compute_sparse_gru(&sparse_gru_a, net->gru_a_state, gru_a_input);
|
||||||
RNN_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);
|
RNN_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);
|
||||||
RNN_COPY(&in_b[GRU_A_STATE_SIZE], condition, FEATURE_DENSE2_OUT_SIZE);
|
RNN_COPY(gru_b_input, gru_b_condition, 3*GRU_B_STATE_SIZE);
|
||||||
compute_gru2(&gru_b, net->gru_b_state, in_b);
|
compute_gruB(&gru_b, gru_b_input, net->gru_b_state, in_b);
|
||||||
return sample_mdense(&dual_fc, net->gru_b_state, sampling_logit_table);
|
return sample_mdense(&dual_fc, net->gru_b_state, sampling_logit_table);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -131,16 +134,16 @@ LPCNET_EXPORT void lpcnet_destroy(LPCNetState *lpcnet)
|
||||||
LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N)
|
LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
float condition[FEATURE_DENSE2_OUT_SIZE];
|
|
||||||
float lpc[LPC_ORDER];
|
float lpc[LPC_ORDER];
|
||||||
float gru_a_condition[3*GRU_A_STATE_SIZE];
|
float gru_a_condition[3*GRU_A_STATE_SIZE];
|
||||||
|
float gru_b_condition[3*GRU_B_STATE_SIZE];
|
||||||
int pitch;
|
int pitch;
|
||||||
/* Matches the Python code -- the 0.1 avoids rounding issues. */
|
/* Matches the Python code -- the 0.1 avoids rounding issues. */
|
||||||
pitch = (int)floor(.1 + 50*features[36]+100);
|
pitch = (int)floor(.1 + 50*features[36]+100);
|
||||||
pitch = IMIN(255, IMAX(33, pitch));
|
pitch = IMIN(255, IMAX(33, pitch));
|
||||||
memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0]));
|
memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0]));
|
||||||
lpcnet->old_gain[0] = features[PITCH_GAIN_FEATURE];
|
lpcnet->old_gain[0] = features[PITCH_GAIN_FEATURE];
|
||||||
run_frame_network(lpcnet, condition, gru_a_condition, features, pitch);
|
run_frame_network(lpcnet, gru_a_condition, gru_b_condition, features, pitch);
|
||||||
memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
|
memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
|
||||||
memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));
|
memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));
|
||||||
lpc_from_cepstrum(lpcnet->old_lpc[0], features);
|
lpc_from_cepstrum(lpcnet->old_lpc[0], features);
|
||||||
|
@ -160,7 +163,7 @@ LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features,
|
||||||
for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpc[j];
|
for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpc[j];
|
||||||
last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
|
last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
|
||||||
pred_ulaw = lin2ulaw(pred);
|
pred_ulaw = lin2ulaw(pred);
|
||||||
exc = run_sample_network(&lpcnet->nnet, condition, gru_a_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table);
|
exc = run_sample_network(&lpcnet->nnet, gru_a_condition, gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table);
|
||||||
pcm = pred + ulaw2lin(exc);
|
pcm = pred + ulaw2lin(exc);
|
||||||
RNN_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
|
RNN_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
|
||||||
lpcnet->last_sig[0] = pcm;
|
lpcnet->last_sig[0] = pcm;
|
||||||
|
|
44
dnn/nnet.c
44
dnn/nnet.c
|
@ -296,6 +296,50 @@ void compute_gru2(const GRULayer *gru, float *state, const float *input)
|
||||||
state[i] = h[i];
|
state[i] = h[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int N, M;
|
||||||
|
int stride;
|
||||||
|
float zrh[3*MAX_RNN_NEURONS];
|
||||||
|
float recur[3*MAX_RNN_NEURONS];
|
||||||
|
float *z;
|
||||||
|
float *r;
|
||||||
|
float *h;
|
||||||
|
M = gru->nb_inputs;
|
||||||
|
N = gru->nb_neurons;
|
||||||
|
z = zrh;
|
||||||
|
r = &zrh[N];
|
||||||
|
h = &zrh[2*N];
|
||||||
|
celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS);
|
||||||
|
celt_assert(input != state);
|
||||||
|
celt_assert(gru->reset_after);
|
||||||
|
stride = 3*N;
|
||||||
|
/* Compute update gate. */
|
||||||
|
#ifdef USE_SU_BIAS
|
||||||
|
for (i=0;i<3*N;i++)
|
||||||
|
zrh[i] = gru->subias[i] + gru_b_condition[i];
|
||||||
|
#else
|
||||||
|
for (i=0;i<3*N;i++)
|
||||||
|
zrh[i] = gru->bias[i] + gru_b_condition[i];
|
||||||
|
#endif
|
||||||
|
sgemv_accum8x4(zrh, gru->input_weights, 3*N, M, stride, input);
|
||||||
|
for (i=0;i<3*N;i++)
|
||||||
|
recur[i] = gru->bias[3*N + i];
|
||||||
|
sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state);
|
||||||
|
for (i=0;i<2*N;i++)
|
||||||
|
zrh[i] += recur[i];
|
||||||
|
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID);
|
||||||
|
for (i=0;i<N;i++)
|
||||||
|
h[i] += recur[2*N+i]*r[i];
|
||||||
|
compute_activation(h, h, N, gru->activation);
|
||||||
|
for (i=0;i<N;i++)
|
||||||
|
h[i] = z[i]*state[i] + (1-z[i])*h[i];
|
||||||
|
for (i=0;i<N;i++)
|
||||||
|
state[i] = h[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void compute_gru3(const GRULayer *gru, float *state, const float *input)
|
void compute_gru3(const GRULayer *gru, float *state, const float *input)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
|
@ -103,6 +103,8 @@ void compute_gru(const GRULayer *gru, float *state, const float *input);
|
||||||
|
|
||||||
void compute_gru2(const GRULayer *gru, float *state, const float *input);
|
void compute_gru2(const GRULayer *gru, float *state, const float *input);
|
||||||
|
|
||||||
|
void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input);
|
||||||
|
|
||||||
void compute_gru3(const GRULayer *gru, float *state, const float *input);
|
void compute_gru3(const GRULayer *gru, float *state, const float *input);
|
||||||
|
|
||||||
void compute_sparse_gru(const SparseGRULayer *gru, float *state, const float *input);
|
void compute_sparse_gru(const SparseGRULayer *gru, float *state, const float *input);
|
||||||
|
|
|
@ -126,16 +126,16 @@ def dump_sparse_gru(self, f, hf):
|
||||||
hf.write('extern const SparseGRULayer {};\n\n'.format(name));
|
hf.write('extern const SparseGRULayer {};\n\n'.format(name));
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def dump_gru_layer(self, f, hf):
|
def dump_grub(self, f, hf, gru_a_size):
|
||||||
global max_rnn_neurons
|
global max_rnn_neurons
|
||||||
name = self.name
|
name = self.name
|
||||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||||
weights = self.get_weights()
|
weights = self.get_weights()
|
||||||
f.write('#ifdef DOT_PROD\n')
|
f.write('#ifdef DOT_PROD\n')
|
||||||
qweight = np.clip(np.round(128.*weights[0]).astype('int'), -128, 127)
|
qweight = np.clip(np.round(128.*weights[0][:gru_a_size, :]).astype('int'), -128, 127)
|
||||||
printVector(f, qweight, name + '_weights', dotp=True, dtype='qweight')
|
printVector(f, qweight, name + '_weights', dotp=True, dtype='qweight')
|
||||||
f.write('#else /*DOT_PROD*/\n')
|
f.write('#else /*DOT_PROD*/\n')
|
||||||
printVector(f, weights[0], name + '_weights')
|
printVector(f, weights[0][:gru_a_size, :], name + '_weights')
|
||||||
f.write('#endif /*DOT_PROD*/\n')
|
f.write('#endif /*DOT_PROD*/\n')
|
||||||
printVector(f, weights[1], name + '_recurrent_weights')
|
printVector(f, weights[1], name + '_recurrent_weights')
|
||||||
printVector(f, weights[-1], name + '_bias')
|
printVector(f, weights[-1], name + '_bias')
|
||||||
|
@ -153,12 +153,18 @@ def dump_gru_layer(self, f, hf):
|
||||||
neurons = weights[0].shape[1]//3
|
neurons = weights[0].shape[1]//3
|
||||||
max_rnn_neurons = max(max_rnn_neurons, neurons)
|
max_rnn_neurons = max(max_rnn_neurons, neurons)
|
||||||
f.write('const GRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n'
|
f.write('const GRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n'
|
||||||
.format(name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
|
.format(name, name, name, name, name, gru_a_size, weights[0].shape[1]//3, activation, reset_after))
|
||||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
|
||||||
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
|
||||||
hf.write('extern const GRULayer {};\n\n'.format(name));
|
hf.write('extern const GRULayer {};\n\n'.format(name));
|
||||||
return True
|
return True
|
||||||
GRU.dump_layer = dump_gru_layer
|
|
||||||
|
def dump_gru_layer_dummy(self, f, hf):
|
||||||
|
name = self.name
|
||||||
|
weights = self.get_weights()
|
||||||
|
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||||
|
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||||
|
return True;
|
||||||
|
|
||||||
|
GRU.dump_layer = dump_gru_layer_dummy
|
||||||
|
|
||||||
def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
|
def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
|
||||||
printVector(f, weights, name + '_weights')
|
printVector(f, weights, name + '_weights')
|
||||||
|
@ -272,6 +278,13 @@ W = model.get_layer('gru_a').get_weights()[0][3*embed_size:,:]
|
||||||
b = model.get_layer('gru_a').get_weights()[2]
|
b = model.get_layer('gru_a').get_weights()[2]
|
||||||
dump_dense_layer_impl('gru_a_dense_feature', W, b, 'LINEAR', f, hf)
|
dump_dense_layer_impl('gru_a_dense_feature', W, b, 'LINEAR', f, hf)
|
||||||
|
|
||||||
|
W = model.get_layer('gru_b').get_weights()[0][model.rnn_units1:,:]
|
||||||
|
b = model.get_layer('gru_b').get_weights()[2]
|
||||||
|
# Set biases to zero because they'll be included in the GRU input part
|
||||||
|
# (we need regular and SU biases)
|
||||||
|
dump_dense_layer_impl('gru_b_dense_feature', W, 0*b, 'LINEAR', f, hf)
|
||||||
|
dump_grub(model.get_layer('gru_b'), f, hf, model.rnn_units1)
|
||||||
|
|
||||||
layer_list = []
|
layer_list = []
|
||||||
for i, layer in enumerate(model.layers):
|
for i, layer in enumerate(model.layers):
|
||||||
if layer.dump_layer(f, hf):
|
if layer.dump_layer(f, hf):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue