diff --git a/dnn/nnet.h b/dnn/nnet.h index fb765519..a5257700 100644 --- a/dnn/nnet.h +++ b/dnn/nnet.h @@ -56,6 +56,7 @@ typedef struct { typedef struct { const float *bias; + const float *subias; const qweight *input_weights; const float *recurrent_weights; int nb_inputs; diff --git a/dnn/training_tf2/dump_lpcnet.py b/dnn/training_tf2/dump_lpcnet.py index cfdde727..82109483 100755 --- a/dnn/training_tf2/dump_lpcnet.py +++ b/dnn/training_tf2/dump_lpcnet.py @@ -138,6 +138,9 @@ def dump_gru_layer(self, f, hf): f.write('#endif /*DOT_PROD*/\n') printVector(f, weights[1], name + '_recurrent_weights') printVector(f, weights[-1], name + '_bias') + subias = weights[-1].copy() + subias[0,:] = subias[0,:] - np.sum(np.clip(weights[0], -1, 1),axis=0) + printVector(f, subias, name + '_subias') if hasattr(self, 'activation'): activation = self.activation.__name__.upper() else: @@ -148,8 +151,8 @@ def dump_gru_layer(self, f, hf): reset_after = 1 neurons = weights[0].shape[1]//3 max_rnn_neurons = max(max_rnn_neurons, neurons) - f.write('const GRULayer {} = {{\n {}_bias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n' - .format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after)) + f.write('const GRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n' + .format(name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after)) hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) hf.write('extern const GRULayer {};\n\n'.format(name)); diff --git a/dnn/vec.h b/dnn/vec.h index dd55d998..93504b62 100644 --- a/dnn/vec.h +++ b/dnn/vec.h @@ -194,6 +194,7 @@ static inline void sparse_sgemv_accum16(float *out, const float *w, int rows, co } #ifdef DOT_PROD + #define SCALE (128.f*127.f) #define SCALE_1 (1.f/128.f/127.f) @@ -228,11 +229,6 @@ static inline void sgemv_accum8x4(float *out, const qweight *w, int rows, int co } for (i=0;i