Dumping 16-bit linear training data
This commit is contained in:
parent
a3ef596822
commit
144b7311bc
7 changed files with 58 additions and 65 deletions
|
@ -75,28 +75,20 @@ void compute_noise(int *noise, float noise_std) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file, int nframes, int e2e) {
|
void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file, int nframes) {
|
||||||
int i, k;
|
int i, k;
|
||||||
for (k=0;k<nframes;k++) {
|
for (k=0;k<nframes;k++) {
|
||||||
unsigned char data[4*FRAME_SIZE];
|
short data[2*FRAME_SIZE];
|
||||||
for (i=0;i<FRAME_SIZE;i++) {
|
for (i=0;i<FRAME_SIZE;i++) {
|
||||||
float p=0;
|
float p=0;
|
||||||
float e;
|
float e;
|
||||||
int j;
|
int j;
|
||||||
for (j=0;j<LPC_ORDER;j++) p -= st->features[k][NB_BANDS+2+j]*st->sig_mem[j];
|
for (j=0;j<LPC_ORDER;j++) p -= st->features[k][NB_BANDS+2+j]*st->sig_mem[j];
|
||||||
e = lin2ulaw(pcm[k*FRAME_SIZE+i] - p);
|
e = lin2ulaw(pcm[k*FRAME_SIZE+i] - p);
|
||||||
/* Signal. */
|
/* Signal in. */
|
||||||
data[4*i] = lin2ulaw(st->sig_mem[0]);
|
data[2*i] = st->sig_mem[0];
|
||||||
/* Prediction. */
|
/* Signal out. */
|
||||||
data[4*i+1] = lin2ulaw(p);
|
data[2*i+1] = pcm[k*FRAME_SIZE+i];
|
||||||
/* Excitation in. */
|
|
||||||
data[4*i+2] = st->exc_mem;
|
|
||||||
/* Excitation out. */
|
|
||||||
if (e2e) {
|
|
||||||
data[4*i+3] = lin2ulaw(pcm[k*FRAME_SIZE+i]);
|
|
||||||
} else {
|
|
||||||
data[4*i+3] = e;
|
|
||||||
}
|
|
||||||
/* Simulate error on excitation. */
|
/* Simulate error on excitation. */
|
||||||
e += noise[k*FRAME_SIZE+i];
|
e += noise[k*FRAME_SIZE+i];
|
||||||
e = IMIN(255, IMAX(0, e));
|
e = IMIN(255, IMAX(0, e));
|
||||||
|
@ -119,7 +111,6 @@ static short float2short(float x)
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
int i;
|
int i;
|
||||||
char *argv0;
|
char *argv0;
|
||||||
int e2e=0;
|
|
||||||
int count=0;
|
int count=0;
|
||||||
static const float a_hp[2] = {-1.99599, 0.99600};
|
static const float a_hp[2] = {-1.99599, 0.99600};
|
||||||
static const float b_hp[2] = {-2, 1};
|
static const float b_hp[2] = {-2, 1};
|
||||||
|
@ -151,11 +142,6 @@ int main(int argc, char **argv) {
|
||||||
srand(getpid());
|
srand(getpid());
|
||||||
st = lpcnet_encoder_create();
|
st = lpcnet_encoder_create();
|
||||||
argv0=argv[0];
|
argv0=argv[0];
|
||||||
if (argc > 2 && strcmp(argv[1], "-end2end")==0) {
|
|
||||||
e2e = 1;
|
|
||||||
argv++;
|
|
||||||
argc--;
|
|
||||||
}
|
|
||||||
if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
|
if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
|
||||||
if (argc == 5 && strcmp(argv[1], "-qtrain")==0) {
|
if (argc == 5 && strcmp(argv[1], "-qtrain")==0) {
|
||||||
training = 1;
|
training = 1;
|
||||||
|
@ -281,7 +267,7 @@ int main(int argc, char **argv) {
|
||||||
|
|
||||||
if (!quantize) {
|
if (!quantize) {
|
||||||
process_single_frame(st, ffeat);
|
process_single_frame(st, ffeat);
|
||||||
if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1, e2e);
|
if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1);
|
||||||
}
|
}
|
||||||
st->pcount++;
|
st->pcount++;
|
||||||
/* Running on groups of 4 frames. */
|
/* Running on groups of 4 frames. */
|
||||||
|
@ -289,7 +275,7 @@ int main(int argc, char **argv) {
|
||||||
if (quantize) {
|
if (quantize) {
|
||||||
unsigned char buf[8];
|
unsigned char buf[8];
|
||||||
process_superframe(st, buf, ffeat, encode, quantize);
|
process_superframe(st, buf, ffeat, encode, quantize);
|
||||||
if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm, 4, e2e);
|
if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm, 4);
|
||||||
}
|
}
|
||||||
st->pcount = 0;
|
st->pcount = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tensorflow.keras.utils import Sequence
|
from tensorflow.keras.utils import Sequence
|
||||||
|
from ulaw import lin2ulaw
|
||||||
|
|
||||||
def lpc2rc(lpc):
|
def lpc2rc(lpc):
|
||||||
#print("shape is = ", lpc.shape)
|
#print("shape is = ", lpc.shape)
|
||||||
|
@ -12,13 +13,13 @@ def lpc2rc(lpc):
|
||||||
return rc
|
return rc
|
||||||
|
|
||||||
class LPCNetLoader(Sequence):
|
class LPCNetLoader(Sequence):
|
||||||
def __init__(self, data, features, periods, batch_size, lpc_out=False):
|
def __init__(self, data, features, periods, batch_size, e2e=False):
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
self.nb_batches = np.minimum(np.minimum(data.shape[0], features.shape[0]), periods.shape[0])//self.batch_size
|
self.nb_batches = np.minimum(np.minimum(data.shape[0], features.shape[0]), periods.shape[0])//self.batch_size
|
||||||
self.data = data[:self.nb_batches*self.batch_size, :]
|
self.data = data[:self.nb_batches*self.batch_size, :]
|
||||||
self.features = features[:self.nb_batches*self.batch_size, :]
|
self.features = features[:self.nb_batches*self.batch_size, :]
|
||||||
self.periods = periods[:self.nb_batches*self.batch_size, :]
|
self.periods = periods[:self.nb_batches*self.batch_size, :]
|
||||||
self.lpc_out = lpc_out
|
self.e2e = e2e
|
||||||
self.on_epoch_end()
|
self.on_epoch_end()
|
||||||
|
|
||||||
def on_epoch_end(self):
|
def on_epoch_end(self):
|
||||||
|
@ -27,15 +28,18 @@ class LPCNetLoader(Sequence):
|
||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index):
|
||||||
data = self.data[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
|
data = self.data[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
|
||||||
in_data = data[: , :, :3]
|
in_data = data[: , :, :1]
|
||||||
out_data = data[: , :, 3:4]
|
out_data = data[: , :, 1:]
|
||||||
features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :-16]
|
features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :-16]
|
||||||
periods = self.periods[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
|
periods = self.periods[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
|
||||||
outputs = [out_data]
|
outputs = [out_data]
|
||||||
if self.lpc_out:
|
inputs = [in_data, features, periods]
|
||||||
lpc = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], 2:-2, -16:]
|
lpc = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], 2:-2, -16:]
|
||||||
|
if self.e2e:
|
||||||
outputs.append(lpc2rc(lpc))
|
outputs.append(lpc2rc(lpc))
|
||||||
return ([in_data, features, periods], outputs)
|
else:
|
||||||
|
inputs.append(lpc)
|
||||||
|
return (inputs, outputs)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return self.nb_batches
|
return self.nb_batches
|
||||||
|
|
|
@ -252,7 +252,7 @@ with h5py.File(filename, "r") as f:
|
||||||
cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape)
|
cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape)
|
||||||
e2e = 'rc2lpc' in f['model_weights']
|
e2e = 'rc2lpc' in f['model_weights']
|
||||||
|
|
||||||
model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = flag_e2e, cond_size=cond_size)
|
model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = e2e, cond_size=cond_size)
|
||||||
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
||||||
#model.summary()
|
#model.summary()
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ def res_from_sigloss():
|
||||||
def loss(y_true,y_pred):
|
def loss(y_true,y_pred):
|
||||||
p = y_pred[:,:,0:1]
|
p = y_pred[:,:,0:1]
|
||||||
model_out = y_pred[:,:,1:]
|
model_out = y_pred[:,:,1:]
|
||||||
e_gt = tf_l2u(tf_u2l(y_true) - tf_u2l(p))
|
e_gt = tf_l2u(y_true - p)
|
||||||
e_gt = tf.round(e_gt)
|
e_gt = tf.round(e_gt)
|
||||||
e_gt = tf.cast(e_gt,'int32')
|
e_gt = tf.cast(e_gt,'int32')
|
||||||
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,model_out)
|
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,model_out)
|
||||||
|
@ -24,9 +24,10 @@ def res_from_sigloss():
|
||||||
# Also adds a probability compensation (to account for matching cross entropy in the linear domain), weighted by gamma
|
# Also adds a probability compensation (to account for matching cross entropy in the linear domain), weighted by gamma
|
||||||
def interp_mulaw(gamma = 1):
|
def interp_mulaw(gamma = 1):
|
||||||
def loss(y_true,y_pred):
|
def loss(y_true,y_pred):
|
||||||
|
y_true = tf.cast(y_true, 'float32')
|
||||||
p = y_pred[:,:,0:1]
|
p = y_pred[:,:,0:1]
|
||||||
model_out = y_pred[:,:,1:]
|
model_out = y_pred[:,:,1:]
|
||||||
e_gt = tf_l2u(tf_u2l(y_true) - tf_u2l(p))
|
e_gt = tf_l2u(y_true - p)
|
||||||
prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0))
|
prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0))
|
||||||
alpha = e_gt - tf.math.floor(e_gt)
|
alpha = e_gt - tf.math.floor(e_gt)
|
||||||
alpha = tf.tile(alpha,[1,1,256])
|
alpha = tf.tile(alpha,[1,1,256])
|
||||||
|
@ -42,7 +43,7 @@ def interp_mulaw(gamma = 1):
|
||||||
def metric_oginterploss(y_true,y_pred):
|
def metric_oginterploss(y_true,y_pred):
|
||||||
p = y_pred[:,:,0:1]
|
p = y_pred[:,:,0:1]
|
||||||
model_out = y_pred[:,:,1:]
|
model_out = y_pred[:,:,1:]
|
||||||
e_gt = tf_l2u(tf_u2l(y_true) - tf_u2l(p))
|
e_gt = tf_l2u(y_true - p)
|
||||||
prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0))
|
prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0))
|
||||||
alpha = e_gt - tf.math.floor(e_gt)
|
alpha = e_gt - tf.math.floor(e_gt)
|
||||||
alpha = tf.tile(alpha,[1,1,256])
|
alpha = tf.tile(alpha,[1,1,256])
|
||||||
|
@ -57,7 +58,7 @@ def metric_oginterploss(y_true,y_pred):
|
||||||
def metric_icel(y_true, y_pred):
|
def metric_icel(y_true, y_pred):
|
||||||
p = y_pred[:,:,0:1]
|
p = y_pred[:,:,0:1]
|
||||||
model_out = y_pred[:,:,1:]
|
model_out = y_pred[:,:,1:]
|
||||||
e_gt = tf_l2u(tf_u2l(y_true) - tf_u2l(p))
|
e_gt = tf_l2u(y_true - p)
|
||||||
alpha = e_gt - tf.math.floor(e_gt)
|
alpha = e_gt - tf.math.floor(e_gt)
|
||||||
alpha = tf.tile(alpha,[1,1,256])
|
alpha = tf.tile(alpha,[1,1,256])
|
||||||
e_gt = tf.cast(e_gt,'int32')
|
e_gt = tf.cast(e_gt,'int32')
|
||||||
|
@ -68,9 +69,10 @@ def metric_icel(y_true, y_pred):
|
||||||
|
|
||||||
# Non-interpolated (rounded) cross entropy loss metric
|
# Non-interpolated (rounded) cross entropy loss metric
|
||||||
def metric_cel(y_true, y_pred):
|
def metric_cel(y_true, y_pred):
|
||||||
|
y_true = tf.cast(y_true, 'float32')
|
||||||
p = y_pred[:,:,0:1]
|
p = y_pred[:,:,0:1]
|
||||||
model_out = y_pred[:,:,1:]
|
model_out = y_pred[:,:,1:]
|
||||||
e_gt = tf_l2u(tf_u2l(y_true) - tf_u2l(p))
|
e_gt = tf_l2u(y_true - p)
|
||||||
e_gt = tf.round(e_gt)
|
e_gt = tf.round(e_gt)
|
||||||
e_gt = tf.cast(e_gt,'int32')
|
e_gt = tf.cast(e_gt,'int32')
|
||||||
e_gt = tf.clip_by_value(e_gt,0,255)
|
e_gt = tf.clip_by_value(e_gt,0,255)
|
||||||
|
@ -80,7 +82,7 @@ def metric_cel(y_true, y_pred):
|
||||||
# Variance metric of the output excitation
|
# Variance metric of the output excitation
|
||||||
def metric_exc_sd(y_true,y_pred):
|
def metric_exc_sd(y_true,y_pred):
|
||||||
p = y_pred[:,:,0:1]
|
p = y_pred[:,:,0:1]
|
||||||
e_gt = tf_l2u(tf_u2l(y_true) - tf_u2l(p))
|
e_gt = tf_l2u(y_true - p)
|
||||||
sd_egt = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)(e_gt,128)
|
sd_egt = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)(e_gt,128)
|
||||||
return sd_egt
|
return sd_egt
|
||||||
|
|
||||||
|
|
|
@ -230,8 +230,9 @@ class WeightClip(Constraint):
|
||||||
|
|
||||||
constraint = WeightClip(0.992)
|
constraint = WeightClip(0.992)
|
||||||
|
|
||||||
def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_size=128, training=False, adaptation=False, quantize=False, flag_e2e = False, cond_size=128):
|
def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_size=128, training=False, adaptation=False, quantize=False, flag_e2e = False, cond_size=128, lpc_order=16):
|
||||||
pcm = Input(shape=(None, 3), batch_size=batch_size)
|
pcm = Input(shape=(None, 1), batch_size=batch_size)
|
||||||
|
dpcm = Input(shape=(None, 3), batch_size=batch_size)
|
||||||
feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
|
feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
|
||||||
pitch = Input(shape=(None, 1), batch_size=batch_size)
|
pitch = Input(shape=(None, 1), batch_size=batch_size)
|
||||||
dec_feat = Input(shape=(None, cond_size))
|
dec_feat = Input(shape=(None, cond_size))
|
||||||
|
@ -257,20 +258,19 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_s
|
||||||
|
|
||||||
cfeat = fdense2(fdense1(cfeat))
|
cfeat = fdense2(fdense1(cfeat))
|
||||||
|
|
||||||
if not flag_e2e:
|
Input_extractor = Lambda(lambda x: K.expand_dims(x[0][:,:,x[1]],axis = -1))
|
||||||
embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig')
|
error_calc = Lambda(lambda x: tf_l2u(x[0] - tf.roll(x[1],1,axis = 1)))
|
||||||
cpcm = Reshape((-1, embed_size*3))(embed(pcm))
|
if flag_e2e:
|
||||||
else:
|
|
||||||
Input_extractor = Lambda(lambda x: K.expand_dims(x[0][:,:,x[1]],axis = -1))
|
|
||||||
error_calc = Lambda(lambda x: tf_l2u(tf_u2l(x[0]) - tf.roll(tf_u2l(x[1]),1,axis = 1)))
|
|
||||||
lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat)
|
lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat)
|
||||||
tensor_preds = diff_pred(name = "lpc2preds")([Input_extractor([pcm,0]),lpcoeffs])
|
else:
|
||||||
past_errors = error_calc([Input_extractor([pcm,0]),tensor_preds])
|
lpcoeffs = Input(shape=(None, lpc_order), batch_size=batch_size)
|
||||||
embed = diff_Embed(name='embed_sig',initializer = PCMInit())
|
tensor_preds = diff_pred(name = "lpc2preds")([Input_extractor([pcm,0]),lpcoeffs])
|
||||||
cpcm = Concatenate()([Input_extractor([pcm,0]),tensor_preds,past_errors])
|
past_errors = error_calc([Input_extractor([pcm,0]),tensor_preds])
|
||||||
cpcm = Reshape((-1, embed_size*3))(embed(cpcm))
|
embed = diff_Embed(name='embed_sig',initializer = PCMInit())
|
||||||
cpcm_decoder = Concatenate()([Input_extractor([pcm,0]),Input_extractor([pcm,1]),Input_extractor([pcm,2])])
|
cpcm = Concatenate()([tf_l2u(Input_extractor([pcm,0])),tf_l2u(tensor_preds),past_errors])
|
||||||
cpcm_decoder = Reshape((-1, embed_size*3))(embed(cpcm_decoder))
|
cpcm = Reshape((-1, embed_size*3))(embed(cpcm))
|
||||||
|
cpcm_decoder = Concatenate()([Input_extractor([dpcm,0]),Input_extractor([dpcm,1]),Input_extractor([dpcm,2])])
|
||||||
|
cpcm_decoder = Reshape((-1, embed_size*3))(embed(cpcm_decoder))
|
||||||
|
|
||||||
|
|
||||||
rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
|
rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
|
||||||
|
@ -301,10 +301,10 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_s
|
||||||
md.trainable=False
|
md.trainable=False
|
||||||
embed.Trainable=False
|
embed.Trainable=False
|
||||||
|
|
||||||
|
m_out = Concatenate(name='pdf')([tensor_preds,ulaw_prob])
|
||||||
if not flag_e2e:
|
if not flag_e2e:
|
||||||
model = Model([pcm, feat, pitch], ulaw_prob)
|
model = Model([pcm, feat, pitch, lpcoeffs], m_out)
|
||||||
else:
|
else:
|
||||||
m_out = Concatenate(name='pdf')([tensor_preds,ulaw_prob])
|
|
||||||
model = Model([pcm, feat, pitch], [m_out, cfeat])
|
model = Model([pcm, feat, pitch], [m_out, cfeat])
|
||||||
model.rnn_units1 = rnn_units1
|
model.rnn_units1 = rnn_units1
|
||||||
model.rnn_units2 = rnn_units2
|
model.rnn_units2 = rnn_units2
|
||||||
|
@ -321,5 +321,8 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_s
|
||||||
dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]), initial_state=dec_state2)
|
dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]), initial_state=dec_state2)
|
||||||
dec_ulaw_prob = Lambda(tree_to_pdf_infer)(md(dec_gru_out2))
|
dec_ulaw_prob = Lambda(tree_to_pdf_infer)(md(dec_gru_out2))
|
||||||
|
|
||||||
decoder = Model([pcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])
|
if flag_e2e:
|
||||||
|
decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])
|
||||||
|
else:
|
||||||
|
decoder = Model([pcm, dec_feat, dec_state1, dec_state2, lpcoeffs], [dec_ulaw_prob, state1, state2])
|
||||||
return model, encoder, decoder
|
return model, encoder, decoder
|
||||||
|
|
|
@ -30,7 +30,7 @@ def tf_u2l(u):
|
||||||
# The inputs xt and lpc conform with the shapes in lpcnet.py (the '2400' is coded keeping this in mind)
|
# The inputs xt and lpc conform with the shapes in lpcnet.py (the '2400' is coded keeping this in mind)
|
||||||
class diff_pred(Layer):
|
class diff_pred(Layer):
|
||||||
def call(self, inputs, lpcoeffs_N = 16, frame_size = 160):
|
def call(self, inputs, lpcoeffs_N = 16, frame_size = 160):
|
||||||
xt = tf_u2l(inputs[0])
|
xt = inputs[0]
|
||||||
lpc = inputs[1]
|
lpc = inputs[1]
|
||||||
|
|
||||||
rept = Lambda(lambda x: K.repeat_elements(x , frame_size, 1))
|
rept = Lambda(lambda x: K.repeat_elements(x , frame_size, 1))
|
||||||
|
@ -39,7 +39,7 @@ class diff_pred(Layer):
|
||||||
|
|
||||||
pred = -Multiply()([rept(lpc),cX(zpX(xt))])
|
pred = -Multiply()([rept(lpc),cX(zpX(xt))])
|
||||||
|
|
||||||
return tf_l2u(K.sum(pred,axis = 2,keepdims = True))
|
return K.sum(pred,axis = 2,keepdims = True)
|
||||||
|
|
||||||
# Differentiable Transformations (RC <-> LPC) computed using the Levinson Durbin Recursion
|
# Differentiable Transformations (RC <-> LPC) computed using the Levinson Durbin Recursion
|
||||||
class diff_rc2lpc(Layer):
|
class diff_rc2lpc(Layer):
|
||||||
|
|
|
@ -125,7 +125,7 @@ strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
|
||||||
with strategy.scope():
|
with strategy.scope():
|
||||||
model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=args.grua_size, rnn_units2=args.grub_size, batch_size=batch_size, training=True, quantize=quantize, flag_e2e = flag_e2e, cond_size=args.cond_size)
|
model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=args.grua_size, rnn_units2=args.grub_size, batch_size=batch_size, training=True, quantize=quantize, flag_e2e = flag_e2e, cond_size=args.cond_size)
|
||||||
if not flag_e2e:
|
if not flag_e2e:
|
||||||
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics='sparse_categorical_crossentropy')
|
model.compile(optimizer=opt, loss=metric_cel, metrics=metric_cel)
|
||||||
else:
|
else:
|
||||||
model.compile(optimizer=opt, loss = [interp_mulaw(gamma=gamma), loss_matchlar()], loss_weights = [1.0, 2.0], metrics={'pdf':[metric_cel,metric_icel,metric_exc_sd,metric_oginterploss]})
|
model.compile(optimizer=opt, loss = [interp_mulaw(gamma=gamma), loss_matchlar()], loss_weights = [1.0, 2.0], metrics={'pdf':[metric_cel,metric_icel,metric_exc_sd,metric_oginterploss]})
|
||||||
model.summary()
|
model.summary()
|
||||||
|
@ -140,19 +140,17 @@ pcm_chunk_size = frame_size*feature_chunk_size
|
||||||
|
|
||||||
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
||||||
|
|
||||||
data = np.memmap(pcm_file, dtype='uint8', mode='r')
|
data = np.memmap(pcm_file, dtype='int16', mode='r')
|
||||||
nb_frames = (len(data)//(4*pcm_chunk_size)-1)//batch_size*batch_size
|
nb_frames = (len(data)//(2*pcm_chunk_size)-1)//batch_size*batch_size
|
||||||
|
|
||||||
features = np.memmap(feature_file, dtype='float32', mode='r')
|
features = np.memmap(feature_file, dtype='float32', mode='r')
|
||||||
|
|
||||||
# limit to discrete number of frames
|
# limit to discrete number of frames
|
||||||
data = data[4*2*frame_size:]
|
data = data[2*2*frame_size:]
|
||||||
data = data[:nb_frames*4*pcm_chunk_size]
|
data = data[:nb_frames*2*pcm_chunk_size]
|
||||||
|
|
||||||
|
|
||||||
data = np.reshape(data, (nb_frames, pcm_chunk_size, 4))
|
data = np.reshape(data, (nb_frames, pcm_chunk_size, 2))
|
||||||
#in_data = data[:,:,:3]
|
|
||||||
#out_exc = data[:,:,3:4]
|
|
||||||
|
|
||||||
#print("ulaw std = ", np.std(out_exc))
|
#print("ulaw std = ", np.std(out_exc))
|
||||||
|
|
||||||
|
@ -187,7 +185,7 @@ else:
|
||||||
|
|
||||||
model.save_weights('{}_{}_initial.h5'.format(args.output, args.grua_size))
|
model.save_weights('{}_{}_initial.h5'.format(args.output, args.grua_size))
|
||||||
|
|
||||||
loader = LPCNetLoader(data, features, periods, batch_size, lpc_out=flag_e2e)
|
loader = LPCNetLoader(data, features, periods, batch_size, e2e=flag_e2e)
|
||||||
|
|
||||||
callbacks = [checkpoint, sparsify, grub_sparsify]
|
callbacks = [checkpoint, sparsify, grub_sparsify]
|
||||||
if args.logdir is not None:
|
if args.logdir is not None:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue