mirror of
https://github.com/xiph/opus.git
synced 2025-05-31 23:57:42 +00:00
Remove the need for useless exc and pred files
This commit is contained in:
parent
b05f950e38
commit
91d90676e1
3 changed files with 11 additions and 21 deletions
|
@ -19,7 +19,7 @@ This software is also a useful resource as an open source starting point for Wav
|
||||||
|
|
||||||
1. Then, run the resulting executable:
|
1. Then, run the resulting executable:
|
||||||
```
|
```
|
||||||
./dump_data input.s16 exc.s8 features.f32 pred.s16 pcm.s16
|
./dump_data input.s16 features.f32 pcm.s16
|
||||||
```
|
```
|
||||||
|
|
||||||
where the first file contains 16 kHz 16-bit raw PCM audio (no header)
|
where the first file contains 16 kHz 16-bit raw PCM audio (no header)
|
||||||
|
@ -29,7 +29,7 @@ always use ±5% or 10% resampling to augment your data).
|
||||||
|
|
||||||
1. Now that you have your files, you can do the training with:
|
1. Now that you have your files, you can do the training with:
|
||||||
```
|
```
|
||||||
./train_lpcnet.py exc.s8 features.f32 pred.s16 pcm.s16
|
./train_lpcnet.py features.f32 pcm.s16
|
||||||
```
|
```
|
||||||
and it will generate a wavenet*.h5 file for each iteration. If it stops with a
|
and it will generate a wavenet*.h5 file for each iteration. If it stops with a
|
||||||
"Failed to allocate RNN reserve space" message try reducing the *batch\_size* variable in train_wavenet_audio.py.
|
"Failed to allocate RNN reserve space" message try reducing the *batch\_size* variable in train_wavenet_audio.py.
|
||||||
|
|
|
@ -579,24 +579,20 @@ int main(int argc, char **argv) {
|
||||||
float mem_preemph=0;
|
float mem_preemph=0;
|
||||||
float x[FRAME_SIZE];
|
float x[FRAME_SIZE];
|
||||||
FILE *f1;
|
FILE *f1;
|
||||||
FILE *fexc;
|
|
||||||
FILE *ffeat;
|
FILE *ffeat;
|
||||||
FILE *fpred;
|
|
||||||
FILE *fpcm;
|
FILE *fpcm;
|
||||||
signed char iexc[FRAME_SIZE];
|
signed char iexc[FRAME_SIZE];
|
||||||
short pred[FRAME_SIZE];
|
short pred[FRAME_SIZE];
|
||||||
short pcm[FRAME_SIZE];
|
short pcm[FRAME_SIZE];
|
||||||
DenoiseState *st;
|
DenoiseState *st;
|
||||||
st = rnnoise_create();
|
st = rnnoise_create();
|
||||||
if (argc!=6) {
|
if (argc!=4) {
|
||||||
fprintf(stderr, "usage: %s <speech> <exc out> <features out> <prediction out> <pcm out> \n", argv[0]);
|
fprintf(stderr, "usage: %s <speech> <features out>\n", argv[0]);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
f1 = fopen(argv[1], "r");
|
f1 = fopen(argv[1], "r");
|
||||||
fexc = fopen(argv[2], "w");
|
ffeat = fopen(argv[2], "w");
|
||||||
ffeat = fopen(argv[3], "w");
|
fpcm = fopen(argv[3], "w");
|
||||||
fpred = fopen(argv[4], "w");
|
|
||||||
fpcm = fopen(argv[5], "w");
|
|
||||||
while (1) {
|
while (1) {
|
||||||
kiss_fft_cpx X[FREQ_SIZE], P[WINDOW_SIZE];
|
kiss_fft_cpx X[FREQ_SIZE], P[WINDOW_SIZE];
|
||||||
float Ex[NB_BANDS], Ep[NB_BANDS];
|
float Ex[NB_BANDS], Ep[NB_BANDS];
|
||||||
|
@ -617,17 +613,14 @@ int main(int argc, char **argv) {
|
||||||
preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||||
|
|
||||||
compute_frame_features(st, iexc, pred, pcm, X, P, Ex, Ep, Exp, features, x);
|
compute_frame_features(st, iexc, pred, pcm, X, P, Ex, Ep, Exp, features, x);
|
||||||
#if 1
|
|
||||||
fwrite(iexc, sizeof(signed char), FRAME_SIZE, fexc);
|
|
||||||
fwrite(features, sizeof(float), NB_FEATURES, ffeat);
|
fwrite(features, sizeof(float), NB_FEATURES, ffeat);
|
||||||
fwrite(pred, sizeof(short), FRAME_SIZE, fpred);
|
|
||||||
fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);
|
fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);
|
||||||
#endif
|
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
//fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
|
//fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
|
||||||
fclose(f1);
|
fclose(f1);
|
||||||
fclose(fexc);
|
fclose(ffeat);
|
||||||
|
fclose(fpcm);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -56,10 +56,8 @@ model, _, _ = lpcnet.new_lpcnet_model()
|
||||||
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
||||||
model.summary()
|
model.summary()
|
||||||
|
|
||||||
exc_file = sys.argv[1] # not used at present
|
feature_file = sys.argv[1]
|
||||||
feature_file = sys.argv[2]
|
pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples
|
||||||
pred_file = sys.argv[3] # LPC predictor samples. Not used at present, see below
|
|
||||||
pcm_file = sys.argv[4] # 16 bit unsigned short PCM samples
|
|
||||||
frame_size = 160
|
frame_size = 160
|
||||||
nb_features = 55
|
nb_features = 55
|
||||||
nb_used_features = model.nb_used_features
|
nb_used_features = model.nb_used_features
|
||||||
|
@ -96,8 +94,7 @@ features = np.reshape(features, (nb_frames*feature_chunk_size, nb_features))
|
||||||
# Note: the LPC predictor output is now calculated by the loop below, this code was
|
# Note: the LPC predictor output is now calculated by the loop below, this code was
|
||||||
# for an ealier version that implemented the prediction filter in C
|
# for an ealier version that implemented the prediction filter in C
|
||||||
|
|
||||||
upred = np.fromfile(pred_file, dtype='int16')
|
upred = np.zeros((nb_frames*pcm_chunk_size,), dtype='int16')
|
||||||
upred = upred[:nb_frames*pcm_chunk_size]
|
|
||||||
|
|
||||||
# Use 16th order LPC to generate LPC prediction output upred[] and (in
|
# Use 16th order LPC to generate LPC prediction output upred[] and (in
|
||||||
# mu-law form) pred[]
|
# mu-law form) pred[]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue