From c8cbfa7e9bc8751dd5f396f2616cb681dda10f1c Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sat, 29 Jan 2022 02:54:48 -0500 Subject: [PATCH] Adding feature prediction (totally untested) --- dnn/training_tf2/lpcnet_plc.py | 100 +++++++++++++++++++ dnn/training_tf2/plc_loader.py | 54 ++++++++++ dnn/training_tf2/train_plc.py | 173 +++++++++++++++++++++++++++++++++ 3 files changed, 327 insertions(+) create mode 100644 dnn/training_tf2/lpcnet_plc.py create mode 100644 dnn/training_tf2/plc_loader.py create mode 100644 dnn/training_tf2/train_plc.py diff --git a/dnn/training_tf2/lpcnet_plc.py b/dnn/training_tf2/lpcnet_plc.py new file mode 100644 index 00000000..04be0e0a --- /dev/null +++ b/dnn/training_tf2/lpcnet_plc.py @@ -0,0 +1,100 @@ +#!/usr/bin/python3 +'''Copyright (c) 2021-2022 Amazon + Copyright (c) 2018-2019 Mozilla + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +''' + +import math +import tensorflow as tf +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise +from tensorflow.compat.v1.keras.layers import CuDNNGRU +from tensorflow.keras import backend as K +from tensorflow.keras.constraints import Constraint +from tensorflow.keras.initializers import Initializer +from tensorflow.keras.callbacks import Callback +import numpy as np + +def quant_regularizer(x): + Q = 128 + Q_1 = 1./Q + #return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x)))) + return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x)))))) + + +class WeightClip(Constraint): + '''Clips the weights incident to each hidden unit to be inside a range + ''' + def __init__(self, c=2): + self.c = c + + def __call__(self, p): + # Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of + # saturation when implementing dot products with SSSE3 or AVX2. + return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1)) + #return K.clip(p, -self.c, self.c) + + def get_config(self): + return {'name': self.__class__.__name__, + 'c': self.c} + +constraint = WeightClip(0.992) + +def new_lpcnet_plc_model(rnn_units=256, nb_used_features=20, batch_size=128, training=False, adaptation=False, quantize=False, cond_size=128): + feat = Input(shape=(None, nb_used_features), batch_size=batch_size) + lost = Input(shape=(None, 1), batch_size=batch_size) + + fdense1 = Dense(cond_size, activation='tanh', name='plc_dense1') + + cfeat = Concatenate()([feat, lost]) + cfeat = fdense1(cfeat) + #cfeat = Conv1D(cond_size, 3, padding='causal', activation='tanh', name='plc_conv1')(cfeat) + + quant = quant_regularizer if quantize else None + + if training: + rnn = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru1', stateful=True, + kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant) + rnn2 = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru2', stateful=True, + kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant) + else: + rnn = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru1', stateful=True, + kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant) + rnn2 = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru2', stateful=True, + kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant) + + gru_out1, _ = rnn(cfeat) + gru_out1 = GaussianNoise(.005)(gru_out1) + gru_out2, _ = rnn2(gru_out1) + + out_dense = Dense(nb_used_features, activation='linear', name='plc_out') + plc_out = out_dense(gru_out2) + + model = Model([feat, lost], plc_out) + model.rnn_units = rnn_units + model.cond_size = cond_size + model.nb_used_features = nb_used_features + + return model diff --git a/dnn/training_tf2/plc_loader.py b/dnn/training_tf2/plc_loader.py new file mode 100644 index 00000000..30d46685 --- /dev/null +++ b/dnn/training_tf2/plc_loader.py @@ -0,0 +1,54 @@ +#!/usr/bin/python3 +'''Copyright (c) 2021-2022 Amazon + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +''' + +import numpy as np +from tensorflow.keras.utils import Sequence + +class PLCLoader(Sequence): + def __init__(self, features, batch_size): + self.batch_size = batch_size + self.nb_batches = features.shape[0]//self.batch_size + self.features = features[:self.nb_batches*self.batch_size, :] + self.on_epoch_end() + + def on_epoch_end(self): + self.indices = np.arange(self.nb_batches*self.batch_size) + np.random.shuffle(self.indices) + + def __getitem__(self, index): + features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :] + lost = (np.random.rand(features.shape[0], features.shape[1]) > .2).astype('float') + lost = np.reshape(lost, (features.shape[0], features.shape[1], 1)) + lost_mask = np.tile(lost, (1,1,features.shape[2])) + + out_features = np.concatenate([features, 1.-lost], axis=-1) + inputs = [features*lost_mask, lost] + outputs = [out_features] + return (inputs, outputs) + + def __len__(self): + return self.nb_batches diff --git a/dnn/training_tf2/train_plc.py b/dnn/training_tf2/train_plc.py new file mode 100644 index 00000000..ed075f53 --- /dev/null +++ b/dnn/training_tf2/train_plc.py @@ -0,0 +1,173 @@ +#!/usr/bin/python3 +'''Copyright (c) 2021-2022 Amazon + Copyright (c) 2018-2019 Mozilla + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +''' + +# Train an LPCNet model + +import argparse +from plc_loader import PLCLoader + +parser = argparse.ArgumentParser(description='Train a PLC model') + +parser.add_argument('features', metavar='', help='binary features file (float32)') +parser.add_argument('output', metavar='', help='trained model file (.h5)') +parser.add_argument('--model', metavar='', default='lpcnet_plc', help='PLC model python definition (without .py)') +group1 = parser.add_mutually_exclusive_group() +group1.add_argument('--quantize', metavar='', help='quantize model') +group1.add_argument('--retrain', metavar='', help='continue training model') +parser.add_argument('--gru-size', metavar='', default=256, type=int, help='number of units in GRU (default 256)') +parser.add_argument('--cond-size', metavar='', default=128, type=int, help='number of units in conditioning network (default 128)') +parser.add_argument('--epochs', metavar='', default=120, type=int, help='number of epochs to train for (default 120)') +parser.add_argument('--batch-size', metavar='', default=128, type=int, help='batch size to use (default 128)') +parser.add_argument('--seq-length', metavar='', default=1000, type=int, help='sequence length to use (default 1000)') +parser.add_argument('--lr', metavar='', type=float, help='learning rate') +parser.add_argument('--decay', metavar='', type=float, help='learning rate decay') +parser.add_argument('--logdir', metavar='', help='directory for tensorboard log files') + + +args = parser.parse_args() + +import importlib +lpcnet = importlib.import_module(args.model) + +import sys +import numpy as np +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger +import tensorflow.keras.backend as K +import h5py + +import tensorflow as tf +#gpus = tf.config.experimental.list_physical_devices('GPU') +#if gpus: +# try: +# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)]) +# except RuntimeError as e: +# print(e) + +nb_epochs = args.epochs + +# Try reducing batch_size if you run out of memory on your GPU +batch_size = args.batch_size + +quantize = args.quantize is not None +retrain = args.retrain is not None + +if quantize: + lr = 0.00003 + decay = 0 + input_model = args.quantize +else: + lr = 0.001 + decay = 2.5e-5 + +if args.lr is not None: + lr = args.lr + +if args.decay is not None: + decay = args.decay + +if retrain: + input_model = args.retrain + +def plc_loss(alpha=1.0): + def loss(y_true,y_pred): + mask = y_true[:,:,-1:] + y_true = y_true[:,:,:-1] + e = (y_true - y_pred)*mask + e_bands = tf.signal.idct(e, norm='ortho') + l1_loss = K.mean(K.abs(e) + alpha*K.abs(e_bands)) + return l1_loss + return loss + +def plc_l1_loss(): + def L1_loss(y_true,y_pred): + mask = y_true[:,:,-1:] + y_true = y_true[:,:,:-1] + e = (y_true - y_pred)*mask + l1_loss = K.mean(K.abs(e)) + return l1_loss + return L1_loss + +def plc_band_loss(): + def L1_band_loss(y_true,y_pred): + mask = y_true[:,:,-1:] + y_true = y_true[:,:,:-1] + e = (y_true - y_pred)*mask + e_bands = tf.signal.idct(e, norm='ortho') + l1_loss = K.mean(K.abs(e_bands)) + return l1_loss + return L1_band_loss + +opt = Adam(lr, decay=decay, beta_2=0.99) +strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() + +with strategy.scope(): + model = lpcnet.new_lpcnet_plc_model(rnn_units=args.gru_size, batch_size=batch_size, training=True, quantize=quantize, cond_size=args.cond_size) + model.compile(optimizer=opt, loss=plc_loss(alpha=1.), metrics=[plc_l1_loss(), plc_band_loss()]) + model.summary() + +lpc_order = 16 + +feature_file = args.features +nb_features = model.nb_used_features + lpc_order +nb_used_features = model.nb_used_features +sequence_size = args.seq_length + +# u for unquantised, load 16 bit PCM samples and convert to mu-law + + +features = np.memmap(feature_file, dtype='float32', mode='r') +nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size +features = features[:nb_sequences*sequence_size*nb_features] + +features = np.reshape(features, (nb_sequences, sequence_size, nb_features)) + +features = features[:, :, :nb_used_features] + + +# dump models to disk as we go +checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.gru_size, '{epoch:02d}')) + +if args.retrain is not None: + model.load_weights(args.retrain) + +if quantize or retrain: + #Adapting from an existing model + model.load_weights(input_model) + +model.save_weights('{}_{}_initial.h5'.format(args.output, args.gru_size)) + +loader = PLCLoader(features, batch_size) + +callbacks = [checkpoint] +if args.logdir is not None: + logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.gru_size) + tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir) + callbacks.append(tensorboard_callback) + +model.fit(loader, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)