Support for multi-GPU training

Not sure why CuDNNGRU doesn't get used by default, but we need
to explicitly use it to get things to run fast.
This commit is contained in:
Jean-Marc Valin 2021-06-13 03:50:51 -04:00
parent ebc9483b4c
commit 237245f815
2 changed files with 42 additions and 21 deletions

View file

@ -26,8 +26,10 @@
''' '''
import math import math
import tensorflow as tf
from tensorflow.keras.models import Model from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation
from tensorflow.compat.v1.keras.layers import CuDNNGRU
from tensorflow.keras import backend as K from tensorflow.keras import backend as K
from tensorflow.keras.constraints import Constraint from tensorflow.keras.constraints import Constraint
from tensorflow.keras.initializers import Initializer from tensorflow.keras.initializers import Initializer
@ -42,6 +44,12 @@ pcm_bits = 8
embed_size = 128 embed_size = 128
pcm_levels = 2**pcm_bits pcm_levels = 2**pcm_bits
def quant_regularizer(x):
Q = 128
Q_1 = 1./Q
#return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))
return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
class Sparsify(Callback): class Sparsify(Callback):
def __init__(self, t_start, t_end, interval, density): def __init__(self, t_start, t_end, interval, density):
super(Sparsify, self).__init__() super(Sparsify, self).__init__()
@ -129,9 +137,9 @@ class WeightClip(Constraint):
return {'name': self.__class__.__name__, return {'name': self.__class__.__name__,
'c': self.c} 'c': self.c}
constraint = WeightClip(0.999) constraint = WeightClip(0.992)
def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False): def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False, quantize=False):
pcm = Input(shape=(None, 3)) pcm = Input(shape=(None, 3))
feat = Input(shape=(None, nb_used_features)) feat = Input(shape=(None, nb_used_features))
pitch = Input(shape=(None, 1)) pitch = Input(shape=(None, 1))
@ -158,10 +166,18 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, train
rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1)) rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a', quant = quant_regularizer if quantize else None
recurrent_constraint = constraint)
rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b', if training:
kernel_constraint=constraint) rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a',
recurrent_constraint = constraint, recurrent_regularizer=quant)
rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b',
kernel_constraint=constraint, kernel_regularizer=quant)
else:
rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a',
recurrent_constraint = constraint, recurrent_regularizer=quant)
rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b',
kernel_constraint=constraint, kernel_regularizer=quant)
rnn_in = Concatenate()([cpcm, rep(cfeat)]) rnn_in = Concatenate()([cpcm, rep(cfeat)])
md = MDense(pcm_levels, activation='softmax', name='dual_fc') md = MDense(pcm_levels, activation='softmax', name='dual_fc')

View file

@ -49,10 +49,23 @@ nb_epochs = 120
# Try reducing batch_size if you run out of memory on your GPU # Try reducing batch_size if you run out of memory on your GPU
batch_size = 128 batch_size = 128
model, _, _ = lpcnet.new_lpcnet_model(training=True) #Set this to True to adapt an existing model (e.g. on new data)
adaptation = False
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) if adaptation:
model.summary() lr = 0.0001
decay = 0
else:
lr = 0.001
decay = 2.5e-5
opt = Adam(lr, decay=decay, beta_2=0.99)
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
with strategy.scope():
model, _, _ = lpcnet.new_lpcnet_model(training=True)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
model.summary()
feature_file = sys.argv[1] feature_file = sys.argv[1]
pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples
@ -65,7 +78,7 @@ pcm_chunk_size = frame_size*feature_chunk_size
# u for unquantised, load 16 bit PCM samples and convert to mu-law # u for unquantised, load 16 bit PCM samples and convert to mu-law
data = np.fromfile(pcm_file, dtype='uint8') data = np.fromfile(pcm_file, dtype='uint8')
nb_frames = len(data)//(4*pcm_chunk_size) nb_frames = len(data)//(4*pcm_chunk_size)//batch_size*batch_size
features = np.fromfile(feature_file, dtype='float32') features = np.fromfile(feature_file, dtype='float32')
@ -102,23 +115,15 @@ del pred
del in_exc del in_exc
# dump models to disk as we go # dump models to disk as we go
checkpoint = ModelCheckpoint('lpcnet33_384_{epoch:02d}.h5') checkpoint = ModelCheckpoint('lpcnet33e_384_{epoch:02d}.h5')
#Set this to True to adapt an existing model (e.g. on new data)
adaptation = False
if adaptation: if adaptation:
#Adapting from an existing model #Adapting from an existing model
model.load_weights('lpcnet32v_384_100.h5') model.load_weights('lpcnet33a_384_100.h5')
sparsify = lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2)) sparsify = lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2))
lr = 0.0001
decay = 0
else: else:
#Training from scratch #Training from scratch
sparsify = lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2)) sparsify = lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))
lr = 0.001
decay = 5e-5
model.compile(optimizer=Adam(lr, decay=decay, beta_2=0.99), loss='sparse_categorical_crossentropy') model.save_weights('lpcnet33e_384_00.h5');
model.save_weights('lpcnet33_384_00.h5');
model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify]) model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify])