diff --git a/dnn/README.md b/dnn/README.md index 9fc4d475..3b77c096 100644 --- a/dnn/README.md +++ b/dnn/README.md @@ -19,7 +19,7 @@ This software is also a useful resource as an open source starting point for Wav 1. Then, run the resulting executable: ``` - ./dump_data input.s16 exc.s8 features.f32 pred.s16 pcm.s16 + ./dump_data input.s16 features.f32 pcm.s16 ``` where the first file contains 16 kHz 16-bit raw PCM audio (no header) @@ -29,7 +29,7 @@ always use ±5% or 10% resampling to augment your data). 1. Now that you have your files, you can do the training with: ``` - ./train_lpcnet.py exc.s8 features.f32 pred.s16 pcm.s16 + ./train_lpcnet.py features.f32 pcm.s16 ``` and it will generate a wavenet*.h5 file for each iteration. If it stops with a "Failed to allocate RNN reserve space" message try reducing the *batch\_size* variable in train_wavenet_audio.py. diff --git a/dnn/denoise.c b/dnn/denoise.c index e78e0734..24f4e35d 100644 --- a/dnn/denoise.c +++ b/dnn/denoise.c @@ -579,24 +579,20 @@ int main(int argc, char **argv) { float mem_preemph=0; float x[FRAME_SIZE]; FILE *f1; - FILE *fexc; FILE *ffeat; - FILE *fpred; FILE *fpcm; signed char iexc[FRAME_SIZE]; short pred[FRAME_SIZE]; short pcm[FRAME_SIZE]; DenoiseState *st; st = rnnoise_create(); - if (argc!=6) { - fprintf(stderr, "usage: %s \n", argv[0]); + if (argc!=4) { + fprintf(stderr, "usage: %s \n", argv[0]); return 1; } f1 = fopen(argv[1], "r"); - fexc = fopen(argv[2], "w"); - ffeat = fopen(argv[3], "w"); - fpred = fopen(argv[4], "w"); - fpcm = fopen(argv[5], "w"); + ffeat = fopen(argv[2], "w"); + fpcm = fopen(argv[3], "w"); while (1) { kiss_fft_cpx X[FREQ_SIZE], P[WINDOW_SIZE]; float Ex[NB_BANDS], Ep[NB_BANDS]; @@ -617,17 +613,14 @@ int main(int argc, char **argv) { preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE); compute_frame_features(st, iexc, pred, pcm, X, P, Ex, Ep, Exp, features, x); -#if 1 - fwrite(iexc, sizeof(signed char), FRAME_SIZE, fexc); fwrite(features, sizeof(float), NB_FEATURES, ffeat); - fwrite(pred, sizeof(short), FRAME_SIZE, fpred); fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm); -#endif count++; } //fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1); fclose(f1); - fclose(fexc); + fclose(ffeat); + fclose(fpcm); return 0; } diff --git a/dnn/train_lpcnet.py b/dnn/train_lpcnet.py index c40a1bd9..4e795d8b 100755 --- a/dnn/train_lpcnet.py +++ b/dnn/train_lpcnet.py @@ -56,10 +56,8 @@ model, _, _ = lpcnet.new_lpcnet_model() model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) model.summary() -exc_file = sys.argv[1] # not used at present -feature_file = sys.argv[2] -pred_file = sys.argv[3] # LPC predictor samples. Not used at present, see below -pcm_file = sys.argv[4] # 16 bit unsigned short PCM samples +feature_file = sys.argv[1] +pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples frame_size = 160 nb_features = 55 nb_used_features = model.nb_used_features @@ -96,8 +94,7 @@ features = np.reshape(features, (nb_frames*feature_chunk_size, nb_features)) # Note: the LPC predictor output is now calculated by the loop below, this code was # for an ealier version that implemented the prediction filter in C -upred = np.fromfile(pred_file, dtype='int16') -upred = upred[:nb_frames*pcm_chunk_size] +upred = np.zeros((nb_frames*pcm_chunk_size,), dtype='int16') # Use 16th order LPC to generate LPC prediction output upred[] and (in # mu-law form) pred[]