From b24e53fdfaf6a81585e45488c9c8b7cf7e909db3 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Wed, 13 Oct 2021 02:44:51 -0400 Subject: [PATCH] Adding option to change frame rate network size --- dnn/training_tf2/dump_lpcnet.py | 3 ++- dnn/training_tf2/lpcnet.py | 12 ++++++------ dnn/training_tf2/train_lpcnet.py | 3 ++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/dnn/training_tf2/dump_lpcnet.py b/dnn/training_tf2/dump_lpcnet.py index 26108dbd..bfd09946 100755 --- a/dnn/training_tf2/dump_lpcnet.py +++ b/dnn/training_tf2/dump_lpcnet.py @@ -249,8 +249,9 @@ filename = sys.argv[1] with h5py.File(filename, "r") as f: units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape) units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape) + cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape) -model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = flag_e2e) +model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = flag_e2e, cond_size=cond_size) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) #model.summary() diff --git a/dnn/training_tf2/lpcnet.py b/dnn/training_tf2/lpcnet.py index 39735c6e..3a62e4c1 100644 --- a/dnn/training_tf2/lpcnet.py +++ b/dnn/training_tf2/lpcnet.py @@ -230,24 +230,24 @@ class WeightClip(Constraint): constraint = WeightClip(0.992) -def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_size=128, training=False, adaptation=False, quantize=False, flag_e2e = False): +def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_size=128, training=False, adaptation=False, quantize=False, flag_e2e = False, cond_size=128): pcm = Input(shape=(None, 3), batch_size=batch_size) feat = Input(shape=(None, nb_used_features), batch_size=batch_size) pitch = Input(shape=(None, 1), batch_size=batch_size) - dec_feat = Input(shape=(None, 128)) + dec_feat = Input(shape=(None, cond_size)) dec_state1 = Input(shape=(rnn_units1,)) dec_state2 = Input(shape=(rnn_units2,)) padding = 'valid' if training else 'same' - fconv1 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv1') - fconv2 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv2') + fconv1 = Conv1D(cond_size, 3, padding=padding, activation='tanh', name='feature_conv1') + fconv2 = Conv1D(cond_size, 3, padding=padding, activation='tanh', name='feature_conv2') pembed = Embedding(256, 64, name='embed_pitch') cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))]) cfeat = fconv2(fconv1(cat_feat)) - fdense1 = Dense(128, activation='tanh', name='feature_dense1') - fdense2 = Dense(128, activation='tanh', name='feature_dense2') + fdense1 = Dense(cond_size, activation='tanh', name='feature_dense1') + fdense2 = Dense(cond_size, activation='tanh', name='feature_dense2') if flag_e2e and quantize: fconv1.trainable = False diff --git a/dnn/training_tf2/train_lpcnet.py b/dnn/training_tf2/train_lpcnet.py index 1469563f..fa137c81 100755 --- a/dnn/training_tf2/train_lpcnet.py +++ b/dnn/training_tf2/train_lpcnet.py @@ -45,6 +45,7 @@ parser.add_argument('--grub-density', metavar='', type=flo parser.add_argument('--grub-density-split', nargs=3, metavar=('', '', ''), type=float, help='density of each GRU B input gate (default 1.0, 1.0, 1.0)') parser.add_argument('--grua-size', metavar='', default=384, type=int, help='number of units in GRU A (default 384)') parser.add_argument('--grub-size', metavar='', default=16, type=int, help='number of units in GRU B (default 16)') +parser.add_argument('--cond-size', metavar='', default=128, type=int, help='number of units in conditioning network, aka frame rate network (default 128)') parser.add_argument('--epochs', metavar='', default=120, type=int, help='number of epochs to train for (default 120)') parser.add_argument('--batch-size', metavar='', default=128, type=int, help='batch size to use (default 128)') parser.add_argument('--end2end', dest='flag_e2e', action='store_true', help='Enable end-to-end training (with differentiable LPC computation') @@ -121,7 +122,7 @@ opt = Adam(lr, decay=decay, beta_2=0.99) strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() with strategy.scope(): - model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=args.grua_size, rnn_units2=args.grub_size, batch_size=batch_size, training=True, quantize=quantize, flag_e2e = flag_e2e) + model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=args.grua_size, rnn_units2=args.grub_size, batch_size=batch_size, training=True, quantize=quantize, flag_e2e = flag_e2e, cond_size=args.cond_size) if not flag_e2e: model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics='sparse_categorical_crossentropy') else: