Add pitch feature computation

This commit is contained in:
Jean-Marc Valin 2023-09-23 12:45:20 -04:00
parent 96d89e99d8
commit 0100cd95de
No known key found for this signature in database
GPG key ID: 531A52533318F00A
3 changed files with 57 additions and 1 deletions

View file

@ -134,6 +134,7 @@ int main(int argc, char **argv) {
float noise_std=0; float noise_std=0;
int training = -1; int training = -1;
int burg = 0; int burg = 0;
int pitch = 0;
srand(getpid()); srand(getpid());
st = lpcnet_encoder_create(); st = lpcnet_encoder_create();
argv0=argv[0]; argv0=argv[0];
@ -145,6 +146,14 @@ int main(int argc, char **argv) {
burg = 1; burg = 1;
training = 0; training = 0;
} }
if (argc == 5 && strcmp(argv[1], "-ptrain")==0) {
pitch = 1;
training = 1;
}
if (argc == 4 && strcmp(argv[1], "-ptest")==0) {
pitch = 1;
training = 0;
}
if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1; if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0; if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
if (training == -1) { if (training == -1) {
@ -239,7 +248,18 @@ int main(int argc, char **argv) {
compute_noise(noisebuf, noise_std); compute_noise(noisebuf, noise_std);
} }
if (pitch) {
signed char pitch_features[PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES];
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
pitch_features[i] = floor(.5 + 127.f*st->xcorr_features[i]);
}
for (i=0;i<PITCH_IF_FEATURES;i++) {
pitch_features[i+PITCH_MAX_PERIOD-PITCH_MIN_PERIOD] = floor(.5 + 127.f*st->if_features[i]);
}
fwrite(pitch_features, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES, 1, ffeat);
} else {
process_single_frame(st, ffeat); process_single_frame(st, ffeat);
}
if (fpcm) write_audio(st, pcm, noisebuf, fpcm); if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
/*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/ /*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/
for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]); for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);

View file

@ -41,6 +41,7 @@
#include "lpcnet_private.h" #include "lpcnet_private.h"
#include "lpcnet.h" #include "lpcnet.h"
#include "os_support.h" #include "os_support.h"
#include "_kiss_fft_guts.h"
int lpcnet_encoder_get_size() { int lpcnet_encoder_get_size() {
@ -104,6 +105,19 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
static const float lp_a[2] = {-1.54220f, 0.70781f}; static const float lp_a[2] = {-1.54220f, 0.70781f};
OPUS_COPY(aligned_in, &st->analysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET); OPUS_COPY(aligned_in, &st->analysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
frame_analysis(st, X, Ex, in); frame_analysis(st, X, Ex, in);
st->if_features[0] = MAX16(-1, MIN16(1, (1.f/64)*(10.f*log10(1e-15 + X[0].r*X[0].r)-6)));
for (i=1;i<PITCH_IF_MAX_FREQ;i++) {
kiss_fft_cpx prod;
float norm_1;
C_MULC(prod, X[i], st->prev_if[i]);
norm_1 = 1.f/sqrt(1e-15 + prod.r*prod.r + prod.i*prod.i);
C_MULBYSCALAR(prod, norm_1);
st->if_features[3*i-2] = prod.r;
st->if_features[3*i-1] = prod.i;
st->if_features[3*i] = MAX16(-1, MIN16(1, (1.f/64)*(10.f*log10(1e-15 + X[i].r*X[i].r + X[i].i*X[i].i)-6)));
}
OPUS_COPY(st->prev_if, X, PITCH_IF_MAX_FREQ);
/*for (i=0;i<88;i++) printf("%f ", st->if_features[i]);printf("\n");*/
logMax = -2; logMax = -2;
follow = -2; follow = -2;
for (i=0;i<NB_BANDS;i++) { for (i=0;i<NB_BANDS;i++) {
@ -133,6 +147,22 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
/*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/ /*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/
} }
biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE); biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE);
{
double ener1;
float *buf = st->exc_buf;
celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, st->arch);
ener0 = celt_inner_prod_c(&buf[PITCH_MAX_PERIOD], &buf[PITCH_MAX_PERIOD], FRAME_SIZE);
ener1 = celt_inner_prod_c(&buf[0], &buf[0], FRAME_SIZE-1);
/*printf("%f\n", st->frame_weight[sub]);*/
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
ener1 += buf[i+FRAME_SIZE-1]*buf[i+FRAME_SIZE-1];
ener = 1 + ener0 + ener1;
st->xcorr_features[i] = 2*xcorr[i] / ener;
ener1 -= buf[i]*buf[i];
/*printf("%f ", st->xcorr_features[i]);*/
}
/*printf("\n");*/
}
/* Cross-correlation on half-frames. */ /* Cross-correlation on half-frames. */
for (sub=0;sub<2;sub++) { for (sub=0;sub<2;sub++) {
int off = sub*FRAME_SIZE/2; int off = sub*FRAME_SIZE/2;

View file

@ -17,6 +17,9 @@
#define PLC_MAX_FEC 100 #define PLC_MAX_FEC 100
#define MAX_FEATURE_BUFFER_SIZE 4 #define MAX_FEATURE_BUFFER_SIZE 4
#define PITCH_IF_MAX_FREQ 30
#define PITCH_IF_FEATURES (3*PITCH_IF_MAX_FREQ - 2)
struct LPCNetState { struct LPCNetState {
LPCNetModel model; LPCNetModel model;
int arch; int arch;
@ -44,6 +47,9 @@ struct LPCNetEncState{
int arch; int arch;
float analysis_mem[OVERLAP_SIZE]; float analysis_mem[OVERLAP_SIZE];
float mem_preemph; float mem_preemph;
kiss_fft_cpx prev_if[PITCH_IF_MAX_FREQ];
float if_features[PITCH_IF_FEATURES];
float xcorr_features[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
float pitch_mem[LPC_ORDER]; float pitch_mem[LPC_ORDER];
float pitch_filt; float pitch_filt;
float xc[2][PITCH_MAX_PERIOD+1]; float xc[2][PITCH_MAX_PERIOD+1];