diff --git a/dnn/lpcnet.py b/dnn/lpcnet.py index 85fa3da6..7d40c1a5 100644 --- a/dnn/lpcnet.py +++ b/dnn/lpcnet.py @@ -41,7 +41,7 @@ class PCMInit(Initializer): } def new_wavernn_model(): - pcm = Input(shape=(None, 1)) + pcm = Input(shape=(None, 2)) pitch = Input(shape=(None, 1)) feat = Input(shape=(None, nb_used_features)) dec_feat = Input(shape=(None, 32)) @@ -61,7 +61,7 @@ def new_wavernn_model(): cpitch = pitch embed = Embedding(256, 128, embeddings_initializer=PCMInit()) - cpcm = Reshape((-1, 128))(embed(pcm)) + cpcm = Reshape((-1, 128*2))(embed(pcm)) cfeat = fconv2(fconv1(feat)) diff --git a/dnn/train_wavenet_audio.py b/dnn/train_wavenet_audio.py index b6c46c08..df8c39cb 100755 --- a/dnn/train_wavenet_audio.py +++ b/dnn/train_wavenet_audio.py @@ -51,8 +51,18 @@ features = np.reshape(features, (nb_frames*feature_chunk_size, nb_features)) pred = np.fromfile(pred_file, dtype='int16') pred = pred[:nb_frames*pcm_chunk_size] + +pred_in = 32768.*ulaw2lin(data) +for i in range(2, nb_frames*feature_chunk_size): + pred[i*frame_size:(i+1)*frame_size] = 0 + if i % 100000 == 0: + print(i) + for k in range(16): + pred[i*frame_size:(i+1)*frame_size] = pred[i*frame_size:(i+1)*frame_size] - \ + pred_in[i*frame_size-k-1:(i+1)*frame_size-k-1]*features[i, nb_features-16+k] + pred = np.minimum(127, lin2ulaw(pred/32768.)) -pred = pred + np.random.randint(-1, 1, len(data)) +#pred = pred + np.random.randint(-1, 1, len(data)) pitch = 1.*data @@ -72,7 +82,7 @@ features = features[:, :, :nb_used_features] pred = np.reshape(pred, (nb_frames, pcm_chunk_size, 1)) pred = (pred.astype('int16')+128).astype('uint8') -#in_data = np.concatenate([in_data, pred], axis=-1) +in_data = np.concatenate([in_data, pred], axis=-1) #in_data = np.concatenate([in_data, in_pitch], axis=-1)