diff --git a/dnn/README.md b/dnn/README.md index 7557f7d5..80dd7086 100644 --- a/dnn/README.md +++ b/dnn/README.md @@ -29,14 +29,14 @@ always use ±5% or 10% resampling to augment your data). 1. Now that you have your files, you can do the training with: ``` - ./train_wavenet_audio.py exc.s8 features.f32 pred.s16 pcm.s16 + ./train_lpcnet.py exc.s8 features.f32 pred.s16 pcm.s16 ``` and it will generate a wavenet*.h5 file for each iteration. If it stops with a "Failed to allocate RNN reserve space" message try reducing the *batch\_size* variable in train_wavenet_audio.py. 1. You can synthesise speech with: ``` - ./test_wavenet_audio.py features.f32 > pcm.txt + ./test_lpcnet.py features.f32 > pcm.txt ``` The output file pcm.txt contains ASCII PCM samples that need to be converted to WAV for playback diff --git a/dnn/train_lpcnet.py b/dnn/train_lpcnet.py index de8088c7..23b11c76 100755 --- a/dnn/train_lpcnet.py +++ b/dnn/train_lpcnet.py @@ -23,7 +23,7 @@ config.gpu_options.per_process_gpu_memory_fraction = 0.44 set_session(tf.Session(config=config)) -nb_epochs = 40 +nb_epochs = 120 # Try reducing batch_size if you run out of memory on your GPU batch_size = 64 @@ -120,4 +120,4 @@ checkpoint = ModelCheckpoint('lpcnet9_384_10_G16_{epoch:02d}.h5') #model.load_weights('wavenet4f2_30.h5') model.compile(optimizer=Adam(0.001, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) -model.fit([in_data, in_exc, features, periods], out_data, batch_size=batch_size, epochs=120, validation_split=0.0, callbacks=[checkpoint, lpcnet.Sparsify(2000, 40000, 400, 0.1)]) +model.fit([in_data, in_exc, features, periods], out_data, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, lpcnet.Sparsify(2000, 40000, 400, 0.1)])