From 97dcf52a01c40d7e2b845bdd974a922c5c23d462 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Mon, 22 Oct 2018 13:40:11 -0400 Subject: [PATCH] Remove no longer used files (old wavenet and LPCNet implementations) --- dnn/test_lpcnet.py | 84 ------------------------------------------- dnn/train_lpcnet.py | 70 ------------------------------------ dnn/train_wavenet.py | 69 ----------------------------------- dnn/wavenet.py | 85 -------------------------------------------- 4 files changed, 308 deletions(-) delete mode 100755 dnn/test_lpcnet.py delete mode 100755 dnn/train_lpcnet.py delete mode 100755 dnn/train_wavenet.py delete mode 100644 dnn/wavenet.py diff --git a/dnn/test_lpcnet.py b/dnn/test_lpcnet.py deleted file mode 100755 index c20a8c01..00000000 --- a/dnn/test_lpcnet.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/python3 - -import lpcnet -import sys -import numpy as np -from keras.optimizers import Adam -from keras.callbacks import ModelCheckpoint -from ulaw import ulaw2lin, lin2ulaw -import keras.backend as K -import h5py -from adadiff import Adadiff - -#import tensorflow as tf -#from keras.backend.tensorflow_backend import set_session -#config = tf.ConfigProto() -#config.gpu_options.per_process_gpu_memory_fraction = 0.28 -#set_session(tf.Session(config=config)) - -nb_epochs = 40 -batch_size = 64 - -model, enc, dec = lpcnet.new_wavernn_model() -model.compile(optimizer=Adadiff(), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) -#model.summary() - -pcmfile = sys.argv[1] -feature_file = sys.argv[2] -frame_size = 160 -nb_features = 54 -nb_used_features = lpcnet.nb_used_features -feature_chunk_size = 15 -pcm_chunk_size = frame_size*feature_chunk_size - -data = np.fromfile(pcmfile, dtype='int8') -nb_frames = len(data)//pcm_chunk_size - -features = np.fromfile(feature_file, dtype='float32') - -data = data[:nb_frames*pcm_chunk_size] -features = features[:nb_frames*feature_chunk_size*nb_features] - -in_data = np.concatenate([data[0:1], data[:-1]])/16.; - -features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features)) - -in_data = np.reshape(in_data, (nb_frames*pcm_chunk_size, 1)) -out_data = np.reshape(data, (nb_frames*pcm_chunk_size, 1)) - - -model.load_weights('lpcnet3a_21.h5') - -order = 16 - -pcm = 0.*out_data -exc = out_data-0 -pitch = np.zeros((1, 1, 1), dtype='float32') -fexc = np.zeros((1, 1, 1), dtype='float32') -iexc = np.zeros((1, 1, 1), dtype='int16') -state = np.zeros((1, lpcnet.rnn_units), dtype='float32') -for c in range(1, nb_frames): - cfeat = enc.predict(features[c:c+1, :, :nb_used_features]) - for fr in range(1, feature_chunk_size): - f = c*feature_chunk_size + fr - a = features[c, fr, nb_used_features:] - - #print(a) - gain = 1.; - period = int(50*features[c, fr, 36]+100) - period = period - 4 - for i in range(frame_size): - pitch[0, 0, 0] = exc[f*frame_size + i - period, 0] - fexc[0, 0, 0] = 2*exc[f*frame_size + i - 1] - #fexc[0, 0, 0] = in_data[f*frame_size + i, 0] - #print(cfeat.shape) - p, state = dec.predict([fexc, cfeat[:, fr:fr+1, :], state]) - #p = np.maximum(p-0.003, 0) - p = p/(1e-5 + np.sum(p)) - #print(np.sum(p)) - iexc[0, 0, 0] = np.argmax(np.random.multinomial(1, p[0,0,:], 1))-128 - exc[f*frame_size + i] = iexc[0, 0, 0]/16. - #out_data[f*frame_size + i, 0] = iexc[0, 0, 0] - pcm[f*frame_size + i, 0] = gain*iexc[0, 0, 0] - sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1, 0]) - print(iexc[0, 0, 0], out_data[f*frame_size + i, 0], pcm[f*frame_size + i, 0]) - diff --git a/dnn/train_lpcnet.py b/dnn/train_lpcnet.py deleted file mode 100755 index 8658b01e..00000000 --- a/dnn/train_lpcnet.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/python3 - -import lpcnet -import sys -import numpy as np -from keras.optimizers import Adam -from keras.callbacks import ModelCheckpoint -from ulaw import ulaw2lin, lin2ulaw -import keras.backend as K -import h5py -from adadiff import Adadiff - -import tensorflow as tf -from keras.backend.tensorflow_backend import set_session -config = tf.ConfigProto() -config.gpu_options.per_process_gpu_memory_fraction = 0.44 -set_session(tf.Session(config=config)) - -nb_epochs = 40 -batch_size = 64 - -model, enc, dec = lpcnet.new_wavernn_model() -model.compile(optimizer=Adadiff(), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) -model.summary() - -pcmfile = sys.argv[1] -feature_file = sys.argv[2] -frame_size = 160 -nb_features = 54 -nb_used_features = lpcnet.nb_used_features -feature_chunk_size = 15 -pcm_chunk_size = frame_size*feature_chunk_size - -data = np.fromfile(pcmfile, dtype='int8') -nb_frames = len(data)//pcm_chunk_size - -features = np.fromfile(feature_file, dtype='float32') - -data = data[:nb_frames*pcm_chunk_size] -features = features[:nb_frames*feature_chunk_size*nb_features] - -in_data = np.concatenate([data[0:1], data[:-1]])/16.; - -features = np.reshape(features, (nb_frames*feature_chunk_size, nb_features)) -pitch = 1.*data -pitch[:320] = 0 -for i in range(2, nb_frames*feature_chunk_size): - period = int(50*features[i,36]+100) - period = period - 4 - pitch[i*frame_size:(i+1)*frame_size] = data[i*frame_size-period:(i+1)*frame_size-period] -in_pitch = np.reshape(pitch/16., (nb_frames, pcm_chunk_size, 1)) - -in_data = np.reshape(in_data, (nb_frames, pcm_chunk_size, 1)) -out_data = np.reshape(data, (nb_frames, pcm_chunk_size, 1)) -out_data = (out_data.astype('int16')+128).astype('uint8') -features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features)) -features = features[:, :, :nb_used_features] - - -#in_data = np.concatenate([in_data, in_pitch], axis=-1) - -#with h5py.File('in_data.h5', 'w') as f: -# f.create_dataset('data', data=in_data[:50000, :, :]) -# f.create_dataset('feat', data=features[:50000, :, :]) - -checkpoint = ModelCheckpoint('lpcnet3b_{epoch:02d}.h5') - -#model.load_weights('wavernn1c_01.h5') -model.compile(optimizer=Adam(0.001, amsgrad=True, decay=2e-4), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) -model.fit([in_data, features], out_data, batch_size=batch_size, epochs=30, validation_split=0.2, callbacks=[checkpoint]) diff --git a/dnn/train_wavenet.py b/dnn/train_wavenet.py deleted file mode 100755 index 1ac21604..00000000 --- a/dnn/train_wavenet.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/python3 - -import wavenet -import sys -import numpy as np -from keras.optimizers import Adam -from keras.callbacks import ModelCheckpoint -from ulaw import ulaw2lin, lin2ulaw -import keras.backend as K -import h5py - -import tensorflow as tf -from keras.backend.tensorflow_backend import set_session -config = tf.ConfigProto() -config.gpu_options.per_process_gpu_memory_fraction = 0.44 -set_session(tf.Session(config=config)) - -nb_epochs = 40 -batch_size = 64 - -model = wavenet.new_wavenet_model(fftnet=True) -model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) -model.summary() - -pcmfile = sys.argv[1] -feature_file = sys.argv[2] -frame_size = 160 -nb_features = 54 -nb_used_features = wavenet.nb_used_features -feature_chunk_size = 15 -pcm_chunk_size = frame_size*feature_chunk_size - -data = np.fromfile(pcmfile, dtype='int8') -nb_frames = len(data)//pcm_chunk_size - -features = np.fromfile(feature_file, dtype='float32') - -data = data[:nb_frames*pcm_chunk_size] -features = features[:nb_frames*feature_chunk_size*nb_features] - -in_data = np.concatenate([data[0:1], data[:-1]])/16.; - -features = np.reshape(features, (nb_frames*feature_chunk_size, nb_features)) -pitch = 1.*data -pitch[:320] = 0 -for i in range(2, nb_frames*feature_chunk_size): - period = int(50*features[i,36]+100) - period = period - 4 - pitch[i*frame_size:(i+1)*frame_size] = data[i*frame_size-period:(i+1)*frame_size-period] -in_pitch = np.reshape(pitch/16., (nb_frames, pcm_chunk_size, 1)) - -in_data = np.reshape(in_data, (nb_frames, pcm_chunk_size, 1)) -out_data = np.reshape(data, (nb_frames, pcm_chunk_size, 1)) -out_data = (out_data.astype('int16')+128).astype('uint8') -features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features)) -features = features[:, :, :nb_used_features] - - -#in_data = np.concatenate([in_data, in_pitch], axis=-1) - -#with h5py.File('in_data.h5', 'w') as f: -# f.create_dataset('data', data=in_data[:50000, :, :]) -# f.create_dataset('feat', data=features[:50000, :, :]) - -checkpoint = ModelCheckpoint('wavenet3c_{epoch:02d}.h5') - -#model.load_weights('wavernn1c_01.h5') -model.compile(optimizer=Adam(0.001, amsgrad=True, decay=2e-4), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) -model.fit([in_data, features], out_data, batch_size=batch_size, epochs=30, validation_split=0.2, callbacks=[checkpoint]) diff --git a/dnn/wavenet.py b/dnn/wavenet.py deleted file mode 100644 index 753d74d5..00000000 --- a/dnn/wavenet.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/python3 - -import math -from keras.models import Model -from keras.layers import Input, LSTM, CuDNNGRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Add, Multiply, Bidirectional, MaxPooling1D, Activation -from keras import backend as K -from keras.initializers import Initializer -from keras.initializers import VarianceScaling -from mdense import MDense -import numpy as np -import h5py -import sys -from causalconv import CausalConv -from gatedconv import GatedConv - -units=128 -pcm_bits = 8 -pcm_levels = 2**pcm_bits -nb_used_features = 38 - -class PCMInit(Initializer): - def __init__(self, gain=.1, seed=None): - self.gain = gain - self.seed = seed - - def __call__(self, shape, dtype=None): - num_rows = 1 - for dim in shape[:-1]: - num_rows *= dim - num_cols = shape[-1] - flat_shape = (num_rows, num_cols) - if self.seed is not None: - np.random.seed(self.seed) - a = np.random.uniform(-1.7321, 1.7321, flat_shape) - #a[:,0] = math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows - #a[:,1] = .5*a[:,0]*a[:,0]*a[:,0] - a = a + np.reshape(math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows, (num_rows, 1)) - return self.gain * a - - def get_config(self): - return { - 'gain': self.gain, - 'seed': self.seed - } - -def new_wavenet_model(fftnet=False): - pcm = Input(shape=(None, 1)) - pitch = Input(shape=(None, 1)) - feat = Input(shape=(None, nb_used_features)) - dec_feat = Input(shape=(None, 32)) - - fconv1 = Conv1D(128, 3, padding='same', activation='tanh') - fconv2 = Conv1D(32, 3, padding='same', activation='tanh') - - cfeat = fconv2(fconv1(feat)) - - rep = Lambda(lambda x: K.repeat_elements(x, 160, 1)) - - activation='tanh' - rfeat = rep(cfeat) - #tmp = Concatenate()([pcm, rfeat]) - embed = Embedding(256, units, embeddings_initializer=PCMInit()) - tmp = Reshape((-1, units))(embed(pcm)) - init = VarianceScaling(scale=1.5,mode='fan_avg',distribution='uniform') - for k in range(10): - res = tmp - dilation = 9-k if fftnet else k - tmp = Concatenate()([tmp, rfeat]) - c = GatedConv(units, 2, dilation_rate=2**dilation, activation='tanh', kernel_initializer=init) - tmp = Dense(units, activation='relu')(c(tmp)) - - '''tmp = Concatenate()([tmp, rfeat]) - c1 = CausalConv(units, 2, dilation_rate=2**dilation, activation='tanh') - c2 = CausalConv(units, 2, dilation_rate=2**dilation, activation='sigmoid') - tmp = Multiply()([c1(tmp), c2(tmp)]) - tmp = Dense(units, activation='relu')(tmp)''' - - if k != 0: - tmp = Add()([tmp, res]) - - md = MDense(pcm_levels, activation='softmax') - ulaw_prob = md(tmp) - - model = Model([pcm, feat], ulaw_prob) - return model