diff --git a/dnn/dump_data.c b/dnn/dump_data.c index 0ed47abc..a97cec36 100644 --- a/dnn/dump_data.c +++ b/dnn/dump_data.c @@ -75,7 +75,7 @@ void compute_noise(int *noise, float noise_std) { } -void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file, int nframes) { +void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file, int nframes, int e2e) { int i, k; for (k=0;kexc_mem; /* Excitation out. */ -#ifdef END2END - data[4*i+3] = lin2ulaw(pcm[k*FRAME_SIZE+i]); -#else - data[4*i+3] = e; -#endif + if (e2e) { + data[4*i+3] = lin2ulaw(pcm[k*FRAME_SIZE+i]); + } else { + data[4*i+3] = e; + } /* Simulate error on excitation. */ e += noise[k*FRAME_SIZE+i]; e = IMIN(255, IMAX(0, e)); @@ -118,6 +118,8 @@ static short float2short(float x) int main(int argc, char **argv) { int i; + char *argv0; + int e2e=0; int count=0; static const float a_hp[2] = {-1.99599, 0.99600}; static const float b_hp[2] = {-2, 1}; @@ -148,6 +150,12 @@ int main(int argc, char **argv) { int quantize = 0; srand(getpid()); st = lpcnet_encoder_create(); + argv0=argv[0]; + if (argc > 2 && strcmp(argv[1], "-end2end")==0) { + e2e = 1; + argv++; + argc--; + } if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1; if (argc == 5 && strcmp(argv[1], "-qtrain")==0) { training = 1; @@ -168,8 +176,8 @@ int main(int argc, char **argv) { decode = 1; } if (training == -1) { - fprintf(stderr, "usage: %s -train \n", argv[0]); - fprintf(stderr, " or %s -test \n", argv[0]); + fprintf(stderr, "usage: %s -train \n", argv0); + fprintf(stderr, " or %s -test \n", argv0); return 1; } f1 = fopen(argv[2], "r"); @@ -273,7 +281,7 @@ int main(int argc, char **argv) { if (!quantize) { process_single_frame(st, ffeat); - if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1); + if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1, e2e); } st->pcount++; /* Running on groups of 4 frames. */ @@ -281,7 +289,7 @@ int main(int argc, char **argv) { if (quantize) { unsigned char buf[8]; process_superframe(st, buf, ffeat, encode, quantize); - if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm, 4); + if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm, 4, e2e); } st->pcount = 0; } diff --git a/dnn/training_tf2/dump_lpcnet.py b/dnn/training_tf2/dump_lpcnet.py index bfd09946..768e41b3 100755 --- a/dnn/training_tf2/dump_lpcnet.py +++ b/dnn/training_tf2/dump_lpcnet.py @@ -250,6 +250,7 @@ with h5py.File(filename, "r") as f: units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape) units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape) cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape) + e2e = 'rc2lpc' in f['model_weights'] model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = flag_e2e, cond_size=cond_size) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) @@ -276,6 +277,13 @@ f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\ hf.write('/*This file is automatically generated from a Keras model*/\n\n') hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "nnet.h"\n\n') +if e2e: + hf.write('/* This is an end-to-end model */\n') + hf.write('#define END2END\n\n') +else: + hf.write('/* This is *not* an end-to-end model */\n') + hf.write('/* #define END2END */\n\n') + embed_size = lpcnet.embed_size E = model.get_layer('embed_sig').get_weights()[0]