mirror of
https://github.com/xiph/opus.git
synced 2025-05-28 14:19:13 +00:00
Support for multi-GPU training
Not sure why CuDNNGRU doesn't get used by default, but we need to explicitly use it to get things to run fast.
This commit is contained in:
parent
ebc9483b4c
commit
237245f815
2 changed files with 42 additions and 21 deletions
|
@ -26,8 +26,10 @@
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
import tensorflow as tf
|
||||||
from tensorflow.keras.models import Model
|
from tensorflow.keras.models import Model
|
||||||
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation
|
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation
|
||||||
|
from tensorflow.compat.v1.keras.layers import CuDNNGRU
|
||||||
from tensorflow.keras import backend as K
|
from tensorflow.keras import backend as K
|
||||||
from tensorflow.keras.constraints import Constraint
|
from tensorflow.keras.constraints import Constraint
|
||||||
from tensorflow.keras.initializers import Initializer
|
from tensorflow.keras.initializers import Initializer
|
||||||
|
@ -42,6 +44,12 @@ pcm_bits = 8
|
||||||
embed_size = 128
|
embed_size = 128
|
||||||
pcm_levels = 2**pcm_bits
|
pcm_levels = 2**pcm_bits
|
||||||
|
|
||||||
|
def quant_regularizer(x):
|
||||||
|
Q = 128
|
||||||
|
Q_1 = 1./Q
|
||||||
|
#return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))
|
||||||
|
return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
|
||||||
|
|
||||||
class Sparsify(Callback):
|
class Sparsify(Callback):
|
||||||
def __init__(self, t_start, t_end, interval, density):
|
def __init__(self, t_start, t_end, interval, density):
|
||||||
super(Sparsify, self).__init__()
|
super(Sparsify, self).__init__()
|
||||||
|
@ -129,9 +137,9 @@ class WeightClip(Constraint):
|
||||||
return {'name': self.__class__.__name__,
|
return {'name': self.__class__.__name__,
|
||||||
'c': self.c}
|
'c': self.c}
|
||||||
|
|
||||||
constraint = WeightClip(0.999)
|
constraint = WeightClip(0.992)
|
||||||
|
|
||||||
def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False):
|
def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False, quantize=False):
|
||||||
pcm = Input(shape=(None, 3))
|
pcm = Input(shape=(None, 3))
|
||||||
feat = Input(shape=(None, nb_used_features))
|
feat = Input(shape=(None, nb_used_features))
|
||||||
pitch = Input(shape=(None, 1))
|
pitch = Input(shape=(None, 1))
|
||||||
|
@ -158,10 +166,18 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, train
|
||||||
|
|
||||||
rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
|
rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
|
||||||
|
|
||||||
rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a',
|
quant = quant_regularizer if quantize else None
|
||||||
recurrent_constraint = constraint)
|
|
||||||
rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b',
|
if training:
|
||||||
kernel_constraint=constraint)
|
rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a',
|
||||||
|
recurrent_constraint = constraint, recurrent_regularizer=quant)
|
||||||
|
rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b',
|
||||||
|
kernel_constraint=constraint, kernel_regularizer=quant)
|
||||||
|
else:
|
||||||
|
rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a',
|
||||||
|
recurrent_constraint = constraint, recurrent_regularizer=quant)
|
||||||
|
rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b',
|
||||||
|
kernel_constraint=constraint, kernel_regularizer=quant)
|
||||||
|
|
||||||
rnn_in = Concatenate()([cpcm, rep(cfeat)])
|
rnn_in = Concatenate()([cpcm, rep(cfeat)])
|
||||||
md = MDense(pcm_levels, activation='softmax', name='dual_fc')
|
md = MDense(pcm_levels, activation='softmax', name='dual_fc')
|
||||||
|
|
|
@ -49,10 +49,23 @@ nb_epochs = 120
|
||||||
# Try reducing batch_size if you run out of memory on your GPU
|
# Try reducing batch_size if you run out of memory on your GPU
|
||||||
batch_size = 128
|
batch_size = 128
|
||||||
|
|
||||||
model, _, _ = lpcnet.new_lpcnet_model(training=True)
|
#Set this to True to adapt an existing model (e.g. on new data)
|
||||||
|
adaptation = False
|
||||||
|
|
||||||
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
if adaptation:
|
||||||
model.summary()
|
lr = 0.0001
|
||||||
|
decay = 0
|
||||||
|
else:
|
||||||
|
lr = 0.001
|
||||||
|
decay = 2.5e-5
|
||||||
|
|
||||||
|
opt = Adam(lr, decay=decay, beta_2=0.99)
|
||||||
|
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
|
||||||
|
|
||||||
|
with strategy.scope():
|
||||||
|
model, _, _ = lpcnet.new_lpcnet_model(training=True)
|
||||||
|
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
||||||
|
model.summary()
|
||||||
|
|
||||||
feature_file = sys.argv[1]
|
feature_file = sys.argv[1]
|
||||||
pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples
|
pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples
|
||||||
|
@ -65,7 +78,7 @@ pcm_chunk_size = frame_size*feature_chunk_size
|
||||||
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
||||||
|
|
||||||
data = np.fromfile(pcm_file, dtype='uint8')
|
data = np.fromfile(pcm_file, dtype='uint8')
|
||||||
nb_frames = len(data)//(4*pcm_chunk_size)
|
nb_frames = len(data)//(4*pcm_chunk_size)//batch_size*batch_size
|
||||||
|
|
||||||
features = np.fromfile(feature_file, dtype='float32')
|
features = np.fromfile(feature_file, dtype='float32')
|
||||||
|
|
||||||
|
@ -102,23 +115,15 @@ del pred
|
||||||
del in_exc
|
del in_exc
|
||||||
|
|
||||||
# dump models to disk as we go
|
# dump models to disk as we go
|
||||||
checkpoint = ModelCheckpoint('lpcnet33_384_{epoch:02d}.h5')
|
checkpoint = ModelCheckpoint('lpcnet33e_384_{epoch:02d}.h5')
|
||||||
|
|
||||||
#Set this to True to adapt an existing model (e.g. on new data)
|
|
||||||
adaptation = False
|
|
||||||
|
|
||||||
if adaptation:
|
if adaptation:
|
||||||
#Adapting from an existing model
|
#Adapting from an existing model
|
||||||
model.load_weights('lpcnet32v_384_100.h5')
|
model.load_weights('lpcnet33a_384_100.h5')
|
||||||
sparsify = lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2))
|
sparsify = lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2))
|
||||||
lr = 0.0001
|
|
||||||
decay = 0
|
|
||||||
else:
|
else:
|
||||||
#Training from scratch
|
#Training from scratch
|
||||||
sparsify = lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))
|
sparsify = lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))
|
||||||
lr = 0.001
|
|
||||||
decay = 5e-5
|
|
||||||
|
|
||||||
model.compile(optimizer=Adam(lr, decay=decay, beta_2=0.99), loss='sparse_categorical_crossentropy')
|
model.save_weights('lpcnet33e_384_00.h5');
|
||||||
model.save_weights('lpcnet33_384_00.h5');
|
|
||||||
model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify])
|
model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue