diff --git a/dnn/nnet.c b/dnn/nnet.c index 3513a606..ea64e3cf 100644 --- a/dnn/nnet.c +++ b/dnn/nnet.c @@ -283,7 +283,7 @@ void compute_gru2(const GRULayer *gru, float *state, const float *input) sgemv_accum8x4(zrh, gru->input_weights, 3*N, M, stride, input); for (i=0;i<3*N;i++) recur[i] = gru->bias[3*N + i]; - sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state); + sgemv_accum8x4(recur, gru->recurrent_weights, 3*N, N, stride, state); for (i=0;i<2*N;i++) zrh[i] += recur[i]; compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID); @@ -326,7 +326,7 @@ void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *stat sparse_sgemv_accum8x4(zrh, gru->input_weights, 3*N, M, gru->input_weights_idx, input); for (i=0;i<3*N;i++) recur[i] = gru->bias[3*N + i]; - sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state); + sgemv_accum8x4(recur, gru->recurrent_weights, 3*N, N, stride, state); for (i=0;i<2*N;i++) zrh[i] += recur[i]; compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID); @@ -361,7 +361,7 @@ void compute_gru3(const GRULayer *gru, float *state, const float *input) RNN_COPY(zrh, input, 3*N); for (i=0;i<3*N;i++) recur[i] = gru->bias[3*N + i]; - sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state); + sgemv_accum8x4(recur, gru->recurrent_weights, 3*N, N, stride, state); for (i=0;i<2*N;i++) zrh[i] += recur[i]; compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID); diff --git a/dnn/nnet.h b/dnn/nnet.h index 0c06280d..e0504e53 100644 --- a/dnn/nnet.h +++ b/dnn/nnet.h @@ -59,7 +59,7 @@ typedef struct { const float *subias; const qweight *input_weights; const int *input_weights_idx; - const float *recurrent_weights; + const qweight *recurrent_weights; int nb_inputs; int nb_neurons; int activation; diff --git a/dnn/training_tf2/dump_lpcnet.py b/dnn/training_tf2/dump_lpcnet.py index 083dc3ed..8eac0db7 100755 --- a/dnn/training_tf2/dump_lpcnet.py +++ b/dnn/training_tf2/dump_lpcnet.py @@ -138,7 +138,14 @@ def dump_grub(self, f, hf, gru_a_size): print("printing layer " + name + " of type " + self.__class__.__name__) weights = self.get_weights() qweight = printSparseVector(f, weights[0][:gru_a_size, :], name + '_weights', have_diag=False) + + f.write('#ifdef DOT_PROD\n') + qweight = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127) + printVector(f, qweight, name + '_recurrent_weights', dotp=True, dtype='qweight') + f.write('#else /*DOT_PROD*/\n') printVector(f, weights[1], name + '_recurrent_weights') + f.write('#endif /*DOT_PROD*/\n') + printVector(f, weights[-1], name + '_bias') subias = weights[-1].copy() subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0) diff --git a/dnn/training_tf2/lpcnet.py b/dnn/training_tf2/lpcnet.py index 2f14ecd3..7b7bbf1d 100644 --- a/dnn/training_tf2/lpcnet.py +++ b/dnn/training_tf2/lpcnet.py @@ -259,12 +259,12 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, train rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a', recurrent_constraint = constraint, recurrent_regularizer=quant) rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b', - kernel_constraint=constraint, kernel_regularizer=quant) + kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant) else: rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a', recurrent_constraint = constraint, recurrent_regularizer=quant) rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b', - kernel_constraint=constraint, kernel_regularizer=quant) + kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant) rnn_in = Concatenate()([cpcm, rep(cfeat)]) md = MDense(pcm_levels, activation='sigmoid', name='dual_fc')