mirror of
https://github.com/xiph/opus.git
synced 2025-06-06 07:21:03 +00:00
auto-detect end-to-end models
This commit is contained in:
parent
d5b6087f48
commit
a3ef596822
2 changed files with 26 additions and 10 deletions
|
@ -75,7 +75,7 @@ void compute_noise(int *noise, float noise_std) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file, int nframes) {
|
void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file, int nframes, int e2e) {
|
||||||
int i, k;
|
int i, k;
|
||||||
for (k=0;k<nframes;k++) {
|
for (k=0;k<nframes;k++) {
|
||||||
unsigned char data[4*FRAME_SIZE];
|
unsigned char data[4*FRAME_SIZE];
|
||||||
|
@ -92,11 +92,11 @@ void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *f
|
||||||
/* Excitation in. */
|
/* Excitation in. */
|
||||||
data[4*i+2] = st->exc_mem;
|
data[4*i+2] = st->exc_mem;
|
||||||
/* Excitation out. */
|
/* Excitation out. */
|
||||||
#ifdef END2END
|
if (e2e) {
|
||||||
data[4*i+3] = lin2ulaw(pcm[k*FRAME_SIZE+i]);
|
data[4*i+3] = lin2ulaw(pcm[k*FRAME_SIZE+i]);
|
||||||
#else
|
} else {
|
||||||
data[4*i+3] = e;
|
data[4*i+3] = e;
|
||||||
#endif
|
}
|
||||||
/* Simulate error on excitation. */
|
/* Simulate error on excitation. */
|
||||||
e += noise[k*FRAME_SIZE+i];
|
e += noise[k*FRAME_SIZE+i];
|
||||||
e = IMIN(255, IMAX(0, e));
|
e = IMIN(255, IMAX(0, e));
|
||||||
|
@ -118,6 +118,8 @@ static short float2short(float x)
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
int i;
|
int i;
|
||||||
|
char *argv0;
|
||||||
|
int e2e=0;
|
||||||
int count=0;
|
int count=0;
|
||||||
static const float a_hp[2] = {-1.99599, 0.99600};
|
static const float a_hp[2] = {-1.99599, 0.99600};
|
||||||
static const float b_hp[2] = {-2, 1};
|
static const float b_hp[2] = {-2, 1};
|
||||||
|
@ -148,6 +150,12 @@ int main(int argc, char **argv) {
|
||||||
int quantize = 0;
|
int quantize = 0;
|
||||||
srand(getpid());
|
srand(getpid());
|
||||||
st = lpcnet_encoder_create();
|
st = lpcnet_encoder_create();
|
||||||
|
argv0=argv[0];
|
||||||
|
if (argc > 2 && strcmp(argv[1], "-end2end")==0) {
|
||||||
|
e2e = 1;
|
||||||
|
argv++;
|
||||||
|
argc--;
|
||||||
|
}
|
||||||
if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
|
if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
|
||||||
if (argc == 5 && strcmp(argv[1], "-qtrain")==0) {
|
if (argc == 5 && strcmp(argv[1], "-qtrain")==0) {
|
||||||
training = 1;
|
training = 1;
|
||||||
|
@ -168,8 +176,8 @@ int main(int argc, char **argv) {
|
||||||
decode = 1;
|
decode = 1;
|
||||||
}
|
}
|
||||||
if (training == -1) {
|
if (training == -1) {
|
||||||
fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv[0]);
|
fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv0);
|
||||||
fprintf(stderr, " or %s -test <speech> <features out>\n", argv[0]);
|
fprintf(stderr, " or %s -test <speech> <features out>\n", argv0);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
f1 = fopen(argv[2], "r");
|
f1 = fopen(argv[2], "r");
|
||||||
|
@ -273,7 +281,7 @@ int main(int argc, char **argv) {
|
||||||
|
|
||||||
if (!quantize) {
|
if (!quantize) {
|
||||||
process_single_frame(st, ffeat);
|
process_single_frame(st, ffeat);
|
||||||
if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1);
|
if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1, e2e);
|
||||||
}
|
}
|
||||||
st->pcount++;
|
st->pcount++;
|
||||||
/* Running on groups of 4 frames. */
|
/* Running on groups of 4 frames. */
|
||||||
|
@ -281,7 +289,7 @@ int main(int argc, char **argv) {
|
||||||
if (quantize) {
|
if (quantize) {
|
||||||
unsigned char buf[8];
|
unsigned char buf[8];
|
||||||
process_superframe(st, buf, ffeat, encode, quantize);
|
process_superframe(st, buf, ffeat, encode, quantize);
|
||||||
if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm, 4);
|
if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm, 4, e2e);
|
||||||
}
|
}
|
||||||
st->pcount = 0;
|
st->pcount = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -250,6 +250,7 @@ with h5py.File(filename, "r") as f:
|
||||||
units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape)
|
units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape)
|
||||||
units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape)
|
units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape)
|
||||||
cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape)
|
cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape)
|
||||||
|
e2e = 'rc2lpc' in f['model_weights']
|
||||||
|
|
||||||
model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = flag_e2e, cond_size=cond_size)
|
model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = flag_e2e, cond_size=cond_size)
|
||||||
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
||||||
|
@ -276,6 +277,13 @@ f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\
|
||||||
hf.write('/*This file is automatically generated from a Keras model*/\n\n')
|
hf.write('/*This file is automatically generated from a Keras model*/\n\n')
|
||||||
hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "nnet.h"\n\n')
|
hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "nnet.h"\n\n')
|
||||||
|
|
||||||
|
if e2e:
|
||||||
|
hf.write('/* This is an end-to-end model */\n')
|
||||||
|
hf.write('#define END2END\n\n')
|
||||||
|
else:
|
||||||
|
hf.write('/* This is *not* an end-to-end model */\n')
|
||||||
|
hf.write('/* #define END2END */\n\n')
|
||||||
|
|
||||||
embed_size = lpcnet.embed_size
|
embed_size = lpcnet.embed_size
|
||||||
|
|
||||||
E = model.get_layer('embed_sig').get_weights()[0]
|
E = model.get_layer('embed_sig').get_weights()[0]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue