diff --git a/dnn/nfec_enc.c b/dnn/nfec_enc.c new file mode 100644 index 00000000..391c4bf9 --- /dev/null +++ b/dnn/nfec_enc.c @@ -0,0 +1,56 @@ +#include "nfec_enc.h" +#include "nnet.h" +#include "nfec_enc_data.h" + + + +void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input) +{ + float buffer[ENC_DENSE1_OUT_SIZE + ENC_DENSE2_OUT_SIZE + ENC_DENSE3_OUT_SIZE + ENC_DENSE4_OUT_SIZE + ENC_DENSE5_OUT_SIZE + ENC_DENSE6_OUT_SIZE + ENC_DENSE7_OUT_SIZE + ENC_DENSE8_OUT_SIZE + GDENSE1_OUT_SIZE]; + int output_index = 0; + int input_index = 0; + + /* run encoder stack and concatenate output in buffer*/ + compute_dense(&enc_dense1, &buffer[output_index], input); + input_index = output_index; + output_index += ENC_DENSE1_OUT_SIZE; + + compute_gru3(&enc_dense2, enc_state->dense2_state, &buffer[input_index]); + memcpy(&buffer[output_index], enc_state->dense2_state, ENC_DENSE2_OUT_SIZE * sizeof(float)); + input_index = output_index; + output_index += ENC_DENSE2_OUT_SIZE; + + compute_dense(&enc_dense3, &buffer[output_index], &buffer[input_index]); + input_index = output_index; + output_index += ENC_DENSE3_OUT_SIZE; + + compute_gru3(&enc_dense4, enc_state->dense4_state, &buffer[input_index]); + memcpy(&buffer[output_index], enc_state->dense4_state, ENC_DENSE4_OUT_SIZE * sizeof(float)); + input_index = output_index; + output_index += ENC_DENSE4_OUT_SIZE; + + compute_dense(&enc_dense5, &buffer[output_index], &buffer[input_index]); + input_index = output_index; + output_index += ENC_DENSE5_OUT_SIZE; + + compute_gru3(&enc_dense6, enc_state->dense6_state, &buffer[input_index]); + memcpy(&buffer[output_index], enc_state->dense6_state, ENC_DENSE6_OUT_SIZE * sizeof(float)); + input_index = output_index; + output_index += ENC_DENSE6_OUT_SIZE; + + compute_dense(&enc_dense7, &buffer[output_index], &buffer[input_index]); + input_index = output_index; + output_index += ENC_DENSE7_OUT_SIZE; + + compute_dense(&enc_dense8, &buffer[output_index], &buffer[input_index]); + output_index += ENC_DENSE8_OUT_SIZE; + + /* compute latents from concatenated input buffer */ + compute_conv1d(&bits_dense, latents, enc_state->bits_dense_state, buffer); + + /* next, calculate initial state */ + compute_dense(&gdense1, &buffer[output_index], buffer); + input_index = output_index; + compute_dense(&gdense2, initial_state, &buffer[input_index]); + +} \ No newline at end of file diff --git a/dnn/nfec_enc.h b/dnn/nfec_enc.h new file mode 100644 index 00000000..27face1d --- /dev/null +++ b/dnn/nfec_enc.h @@ -0,0 +1,15 @@ +#ifndef _NFEC_ENC_H +#define _NFEC_ENC_H + +#include "nfec_enc_data.h" + +struct NFECEncState{ + float dense2_state[3 * ENC_DENSE2_STATE_SIZE]; + float dense4_state[3 * ENC_DENSE4_STATE_SIZE]; + float dense6_state[3 * ENC_DENSE6_STATE_SIZE]; + float bits_dense_state[BITS_DENSE_STATE_SIZE]; +}; + +void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input); + +#endif \ No newline at end of file diff --git a/dnn/nfec_enc_demo.c b/dnn/nfec_enc_demo.c new file mode 100644 index 00000000..520a34d4 --- /dev/null +++ b/dnn/nfec_enc_demo.c @@ -0,0 +1,46 @@ +#include +#include + +#include "nfec_enc.h" + +void usage() +{ + printf("nfec_enc_demo "); + exit(1); +} + +int main(int argc, char **argv) +{ + struct NFECEncState enc_state; + float feature_buffer[32]; + float dframe[2 * 20]; + float latents[80]; + float initial_state[24]; + int index = 0; + FILE *fid; + + if (argc < 2) + { + usage(); + } + + fid = fopen(argv[1], "rb"); + if (fid == NULL) + { + fprintf(stderr, "could not open feature file %s\n", argv[1]); + usage(); + } + + while (fread(feature_buffer, sizeof(float), 32, fid) == 32) + { + memcpy(dframe[16 * index++], feature_buffer, 16*sizeof(float)); + + if (index == 2) + { + nfec_encode_dframe(&enc_state, latents, initial_state, dframe); + index = 0; + } + } +} + +/* gcc -DDISABLE_DOT_PROD nfec_enc_demo.c nfec_enc.c nnet.c nfec_enc_data.c -o nfec_enc_demo */ \ No newline at end of file diff --git a/dnn/nnet.c b/dnn/nnet.c index 2f78ac0c..b669ecdd 100644 --- a/dnn/nnet.c +++ b/dnn/nnet.c @@ -38,6 +38,7 @@ #include "tansig_table.h" #include "nnet.h" #include "nnet_data.h" +#include "nfec_enc_data.h" #include "plc_data.h" #ifdef NO_OPTIMIZATIONS @@ -129,6 +130,11 @@ void _lpcnet_compute_dense(const DenseLayer *layer, float *output, const float * compute_activation(output, output, N, layer->activation); } +void compute_dense(const DenseLayer *layer, float *output, const float *input) +{ + return _lpcnet_compute_dense(layer, output, input); +} + void compute_mdense(const MDenseLayer *layer, float *output, const float *input) { int i, c; @@ -316,7 +322,7 @@ void compute_gru2(const GRULayer *gru, float *state, const float *input) state[i] = h[i]; } -#define MAX_RNN_NEURONS_ALL IMAX(MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS) +#define MAX_RNN_NEURONS_ALL IMAX(IMAX(MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), NFEC_ENC_MAX_RNN_NEURONS) void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input) { @@ -442,12 +448,14 @@ void compute_sparse_gru(const SparseGRULayer *gru, float *state, const float *in state[i] = z[i]*state[i] + (1-z[i])*h[i]; } +#define MAX_CONV_INPUTS_ALL IMAX(MAX_CONV_INPUTS, NFEC_ENC_MAX_CONV_INPUTS) + void compute_conv1d(const Conv1DLayer *layer, float *output, float *mem, const float *input) { int i; int N, M; int stride; - float tmp[MAX_CONV_INPUTS]; + float tmp[MAX_CONV_INPUTS_ALL]; celt_assert(input != output); celt_assert(layer->nb_inputs*layer->kernel_size <= MAX_CONV_INPUTS); RNN_COPY(tmp, mem, layer->nb_inputs*(layer->kernel_size-1)); diff --git a/dnn/nnet.h b/dnn/nnet.h index 7e2549b2..c38c1393 100644 --- a/dnn/nnet.h +++ b/dnn/nnet.h @@ -98,6 +98,8 @@ void compute_activation(float *output, const float *input, int N, int activation void _lpcnet_compute_dense(const DenseLayer *layer, float *output, const float *input); +void compute_dense(const DenseLayer *layer, float *output, const float *input); + void compute_mdense(const MDenseLayer *layer, float *output, const float *input); int sample_mdense(const MDenseLayer *layer, const float *input, const float *sampling_logit_table, kiss99_ctx *rng); diff --git a/dnn/training_tf2/dump_nfec_model.py b/dnn/training_tf2/dump_nfec_model.py new file mode 100644 index 00000000..14c2d37e --- /dev/null +++ b/dnn/training_tf2/dump_nfec_model.py @@ -0,0 +1,123 @@ +import argparse +import os + +parser = argparse.ArgumentParser() + +parser.add_argument('weights', metavar="", type=str, help='model weight file in hdf5 format') +parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256) +parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80) + +args = parser.parse_args() + +# now import the heavy stuff +from keraslayerdump import dump_conv1d_layer, dump_dense_layer, dump_gru_layer +from rdovae import new_rdovae_model + +def start_header(header_fid, header_name): + header_guard = "_" + os.path.basename(header_name)[:-2].upper() + "_H" + header_fid.write( +f""" +#ifndef {header_guard} +#define {header_guard} + +#include "nnet.h" + +""" + ) + +def finish_header(header_fid): + header_fid.write( +""" +#endif + +""" + ) + +def start_source(source_fid, header_name, weight_file): + source_fid.write( +f""" +/* this source file was automatically generated from weight file {weight_file} */ + +#include "{header_name}" + +""" + ) + +def finish_source(source_fid): + pass + + +if __name__ == "__main__": + + model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size) + model.load_weights(args.weights) + + + # for the time being only dump encoder + encoder_dense_names = [ + 'enc_dense1', + 'enc_dense3', + 'enc_dense5', + 'enc_dense7', + 'enc_dense8', + 'gdense1', + 'gdense2' + ] + + encoder_gru_names = [ + 'enc_dense2', + 'enc_dense4', + 'enc_dense6' + ] + + encoder_conv1d_names = [ + 'bits_dense' + ] + + source_fid = open("nfec_enc_data.c", 'w') + header_fid = open("nfec_enc_data.h", 'w') + + start_header(header_fid, "nfec_enc_data.h") + start_source(source_fid, "nfec_enc_data.h", os.path.basename(args.weights)) + + # dump GRUs + max_rnn_neurons = max( + [ + dump_gru_layer(encoder.get_layer(name), source_fid, header_fid) + for name in encoder_gru_names + ] + ) + + # dump conv layers + max_conv_inputs = max( + [ + dump_conv1d_layer(encoder.get_layer(name), source_fid, header_fid) + for name in encoder_conv1d_names + ] + ) + + # dump Dense layers + for name in encoder_dense_names: + layer = encoder.get_layer(name) + dump_dense_layer(layer, source_fid, header_fid) + + # some global constants + header_fid.write( +f""" +#define NFEC_NUM_FEATURES 20 + +#define NFEC_LATENT_DIM {args.latent_dim} + +#define NFEC_ENC_MAX_RNN_NEURONS {max_rnn_neurons} + +#define NFEC_ENC_MAX_CONV_INPUTS {max_conv_inputs} + +""" + ) + + finish_header(header_fid) + finish_source(source_fid) + + header_fid.close() + source_fid.close() + diff --git a/dnn/training_tf2/keraslayerdump.py b/dnn/training_tf2/keraslayerdump.py new file mode 100644 index 00000000..c3039519 --- /dev/null +++ b/dnn/training_tf2/keraslayerdump.py @@ -0,0 +1,160 @@ +""" helper functions for dumping some Keras layers to C files """ + +import numpy as np + + +def printVector(f, vector, name, dtype='float', dotp=False): + """ prints vector as one-dimensional C array """ + if dotp: + vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8)) + vector = vector.transpose((2, 0, 3, 1)) + v = np.reshape(vector, (-1)) + f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v))) + for i in range(0, len(v)): + f.write('{}'.format(v[i])) + if (i!=len(v)-1): + f.write(',') + else: + break; + if (i%8==7): + f.write("\n ") + else: + f.write(" ") + f.write('\n};\n\n') + return vector + +def printSparseVector(f, A, name, have_diag=True): + N = A.shape[0] + M = A.shape[1] + W = np.zeros((0,), dtype='int') + W0 = np.zeros((0,)) + if have_diag: + diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])]) + A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N])) + A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N])) + A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:])) + printVector(f, diag, name + '_diag') + AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int') + idx = np.zeros((0,), dtype='int') + for i in range(M//8): + pos = idx.shape[0] + idx = np.append(idx, -1) + nb_nonzero = 0 + for j in range(N//4): + block = A[j*4:(j+1)*4, i*8:(i+1)*8] + qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8] + if np.sum(np.abs(block)) > 1e-10: + nb_nonzero = nb_nonzero + 1 + idx = np.append(idx, j*4) + vblock = qblock.transpose((1,0)).reshape((-1,)) + W0 = np.concatenate([W0, block.reshape((-1,))]) + W = np.concatenate([W, vblock]) + idx[pos] = nb_nonzero + f.write('#ifdef DOT_PROD\n') + printVector(f, W, name, dtype='qweight') + f.write('#else /*DOT_PROD*/\n') + printVector(f, W0, name, dtype='qweight') + f.write('#endif /*DOT_PROD*/\n') + printVector(f, idx, name + '_idx', dtype='int') + return AQ + +def dump_sparse_gru(self, f, hf): + name = 'sparse_' + self.name + print("printing layer " + name + " of type sparse " + self.__class__.__name__) + weights = self.get_weights() + qweights = printSparseVector(f, weights[1], name + '_recurrent_weights') + printVector(f, weights[-1], name + '_bias') + subias = weights[-1].copy() + subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0) + printVector(f, subias, name + '_subias') + if hasattr(self, 'activation'): + activation = self.activation.__name__.upper() + else: + activation = 'TANH' + if hasattr(self, 'reset_after') and not self.reset_after: + reset_after = 0 + else: + reset_after = 1 + neurons = weights[0].shape[1]//3 + max_rnn_neurons = neurons + f.write('const SparseGRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_recurrent_weights_diag,\n {}_recurrent_weights,\n {}_recurrent_weights_idx,\n {}, ACTIVATION_{}, {}\n}};\n\n' + .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after)) + hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) + hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) + hf.write('extern const SparseGRULayer {};\n\n'.format(name)); + return max_rnn_neurons + +def dump_gru_layer(self, f, hf, dotp=False, sparse=False): + name = self.name + print("printing layer " + name + " of type " + self.__class__.__name__) + weights = self.get_weights() + if sparse: + qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False) + else: + qweight = printVector(f, weights[0], name + '_weights') + + if dotp: + f.write('#ifdef DOT_PROD\n') + qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127) + printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight') + f.write('#else /*DOT_PROD*/\n') + else: + qweight2 = weights[1] + + printVector(f, weights[1], name + '_recurrent_weights') + if dotp: + f.write('#endif /*DOT_PROD*/\n') + + printVector(f, weights[-1], name + '_bias') + subias = weights[-1].copy() + subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0) + subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0) + printVector(f, subias, name + '_subias') + if hasattr(self, 'activation'): + activation = self.activation.__name__.upper() + else: + activation = 'TANH' + if hasattr(self, 'reset_after') and not self.reset_after: + reset_after = 0 + else: + reset_after = 1 + neurons = weights[0].shape[1]//3 + max_rnn_neurons = neurons + f.write('const GRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_weights,\n NULL,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n' + .format(name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after)) + hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) + hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) + hf.write('extern const GRULayer {};\n\n'.format(name)); + return max_rnn_neurons + +def dump_dense_layer_impl(name, weights, bias, activation, f, hf): + printVector(f, weights, name + '_weights') + printVector(f, bias, name + '_bias') + f.write('const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' + .format(name, name, name, weights.shape[0], weights.shape[1], activation)) + hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1])) + hf.write('extern const DenseLayer {};\n\n'.format(name)); + +def dump_dense_layer(self, f, hf): + name = self.name + print("printing layer " + name + " of type " + self.__class__.__name__) + weights = self.get_weights() + activation = self.activation.__name__.upper() + dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf) + return False + +def dump_conv1d_layer(self, f, hf): + name = self.name + print("printing layer " + name + " of type " + self.__class__.__name__) + weights = self.get_weights() + printVector(f, weights[0], name + '_weights') + printVector(f, weights[-1], name + '_bias') + activation = self.activation.__name__.upper() + max_conv_inputs = weights[0].shape[1]*weights[0].shape[0] + f.write('const Conv1DLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, {}, ACTIVATION_{}\n}};\n\n' + .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation)) + hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2])) + hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1))) + hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2)) + hf.write('extern const Conv1DLayer {};\n\n'.format(name)); + return max_conv_inputs \ No newline at end of file diff --git a/dnn/vec_neon.h b/dnn/vec_neon.h index 8a80b291..d0a87678 100644 --- a/dnn/vec_neon.h +++ b/dnn/vec_neon.h @@ -33,7 +33,12 @@ #ifndef DISABLE_DOT_PROD #define DOT_PROD #endif + +#ifdef DOT_PROD typedef signed char qweight; +#else +typedef float qweight; +#endif #ifndef LPCNET_TEST