mirror of
https://github.com/xiph/opus.git
synced 2025-05-19 18:08:29 +00:00
second RNN
This commit is contained in:
parent
3698977292
commit
495f8ea5f3
3 changed files with 18 additions and 12 deletions
|
@ -10,7 +10,8 @@ import numpy as np
|
||||||
import h5py
|
import h5py
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
rnn_units=128
|
rnn_units1=128
|
||||||
|
rnn_units2=32
|
||||||
pcm_bits = 8
|
pcm_bits = 8
|
||||||
embed_size = 128
|
embed_size = 128
|
||||||
pcm_levels = 2**pcm_bits
|
pcm_levels = 2**pcm_bits
|
||||||
|
@ -47,7 +48,8 @@ def new_wavernn_model():
|
||||||
feat = Input(shape=(None, nb_used_features))
|
feat = Input(shape=(None, nb_used_features))
|
||||||
pitch = Input(shape=(None, 1))
|
pitch = Input(shape=(None, 1))
|
||||||
dec_feat = Input(shape=(None, 128))
|
dec_feat = Input(shape=(None, 128))
|
||||||
dec_state = Input(shape=(rnn_units,))
|
dec_state1 = Input(shape=(rnn_units1,))
|
||||||
|
dec_state2 = Input(shape=(rnn_units2,))
|
||||||
|
|
||||||
fconv1 = Conv1D(128, 3, padding='same', activation='tanh')
|
fconv1 = Conv1D(128, 3, padding='same', activation='tanh')
|
||||||
fconv2 = Conv1D(102, 3, padding='same', activation='tanh')
|
fconv2 = Conv1D(102, 3, padding='same', activation='tanh')
|
||||||
|
@ -70,18 +72,21 @@ def new_wavernn_model():
|
||||||
|
|
||||||
rep = Lambda(lambda x: K.repeat_elements(x, 160, 1))
|
rep = Lambda(lambda x: K.repeat_elements(x, 160, 1))
|
||||||
|
|
||||||
rnn = CuDNNGRU(rnn_units, return_sequences=True, return_state=True)
|
rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True)
|
||||||
|
rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True)
|
||||||
rnn_in = Concatenate()([cpcm, cexc, rep(cfeat)])
|
rnn_in = Concatenate()([cpcm, cexc, rep(cfeat)])
|
||||||
md = MDense(pcm_levels, activation='softmax')
|
md = MDense(pcm_levels, activation='softmax')
|
||||||
gru_out, state = rnn(rnn_in)
|
gru_out1, _ = rnn(rnn_in)
|
||||||
ulaw_prob = md(gru_out)
|
gru_out2, _ = rnn2(gru_out1)
|
||||||
|
ulaw_prob = md(gru_out2)
|
||||||
|
|
||||||
model = Model([pcm, exc, feat, pitch], ulaw_prob)
|
model = Model([pcm, exc, feat, pitch], ulaw_prob)
|
||||||
encoder = Model([feat, pitch], cfeat)
|
encoder = Model([feat, pitch], cfeat)
|
||||||
|
|
||||||
dec_rnn_in = Concatenate()([cpcm, cexc, dec_feat])
|
dec_rnn_in = Concatenate()([cpcm, cexc, dec_feat])
|
||||||
dec_gru_out, state = rnn(dec_rnn_in, initial_state=dec_state)
|
dec_gru_out1, state1 = rnn(dec_rnn_in, initial_state=dec_state1)
|
||||||
dec_ulaw_prob = md(dec_gru_out)
|
dec_gru_out2, state2 = rnn2(dec_gru_out1, initial_state=dec_state2)
|
||||||
|
dec_ulaw_prob = md(dec_gru_out2)
|
||||||
|
|
||||||
decoder = Model([pcm, exc, dec_feat, dec_state], [dec_ulaw_prob, state])
|
decoder = Model([pcm, exc, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])
|
||||||
return model, encoder, decoder
|
return model, encoder, decoder
|
||||||
|
|
|
@ -59,14 +59,15 @@ in_data = np.reshape(in_data, (nb_frames*pcm_chunk_size, 1))
|
||||||
out_data = np.reshape(data, (nb_frames*pcm_chunk_size, 1))
|
out_data = np.reshape(data, (nb_frames*pcm_chunk_size, 1))
|
||||||
|
|
||||||
|
|
||||||
model.load_weights('wavenet4f2_30.h5')
|
model.load_weights('wavenet5d0_19.h5')
|
||||||
|
|
||||||
order = 16
|
order = 16
|
||||||
|
|
||||||
pcm = 0.*out_data
|
pcm = 0.*out_data
|
||||||
fexc = np.zeros((1, 1, 2), dtype='float32')
|
fexc = np.zeros((1, 1, 2), dtype='float32')
|
||||||
iexc = np.zeros((1, 1, 1), dtype='int16')
|
iexc = np.zeros((1, 1, 1), dtype='int16')
|
||||||
state = np.zeros((1, lpcnet.rnn_units), dtype='float32')
|
state1 = np.zeros((1, lpcnet.rnn_units1), dtype='float32')
|
||||||
|
state2 = np.zeros((1, lpcnet.rnn_units2), dtype='float32')
|
||||||
for c in range(1, nb_frames):
|
for c in range(1, nb_frames):
|
||||||
cfeat = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
|
cfeat = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
|
||||||
for fr in range(1, feature_chunk_size):
|
for fr in range(1, feature_chunk_size):
|
||||||
|
@ -82,7 +83,7 @@ for c in range(1, nb_frames):
|
||||||
pred = -sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1, 0])
|
pred = -sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1, 0])
|
||||||
fexc[0, 0, 1] = lin2ulaw(pred)
|
fexc[0, 0, 1] = lin2ulaw(pred)
|
||||||
|
|
||||||
p, state = dec.predict([fexc, iexc, cfeat[:, fr:fr+1, :], state])
|
p, state1, state2 = dec.predict([fexc, iexc, cfeat[:, fr:fr+1, :], state1, state2])
|
||||||
#p = p*p
|
#p = p*p
|
||||||
#p = p/(1e-18 + np.sum(p))
|
#p = p/(1e-18 + np.sum(p))
|
||||||
p = np.maximum(p-0.001, 0).astype('float64')
|
p = np.maximum(p-0.001, 0).astype('float64')
|
||||||
|
|
|
@ -86,7 +86,7 @@ periods = (50*features[:,:,36:37]+100).astype('int16')
|
||||||
|
|
||||||
in_data = np.concatenate([in_data, pred], axis=-1)
|
in_data = np.concatenate([in_data, pred], axis=-1)
|
||||||
|
|
||||||
checkpoint = ModelCheckpoint('wavenet5b_{epoch:02d}.h5')
|
checkpoint = ModelCheckpoint('wavenet5d0_{epoch:02d}.h5')
|
||||||
|
|
||||||
#model.load_weights('wavenet4f2_30.h5')
|
#model.load_weights('wavenet4f2_30.h5')
|
||||||
model.compile(optimizer=Adam(0.001, amsgrad=True, decay=2e-4), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
model.compile(optimizer=Adam(0.001, amsgrad=True, decay=2e-4), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue