From 96d89e99d8ef413455fb8a5eb9e5e583a981ed96 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Fri, 15 Sep 2023 16:55:23 -0400 Subject: [PATCH] Band-limiting the voicing parameter --- dnn/lpcnet_enc.c | 30 ++++++++++++++++++++++++++++++ dnn/lpcnet_private.h | 2 ++ 2 files changed, 32 insertions(+) diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c index 70d8debe..cfd04d2c 100644 --- a/dnn/lpcnet_enc.c +++ b/dnn/lpcnet_enc.c @@ -74,6 +74,19 @@ static void frame_analysis(LPCNetEncState *st, kiss_fft_cpx *X, float *Ex, const lpcn_compute_band_energy(Ex, X); } +static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) { + int i; + for (i=0;ianalysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET); frame_analysis(st, X, Ex, in); logMax = -2; @@ -102,6 +118,7 @@ void compute_frame_features(LPCNetEncState *st, const float *in) { lpc_from_cepstrum(st->lpc, st->features); for (i=0;ifeatures[NB_BANDS+2+i] = st->lpc[i]; OPUS_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD); + OPUS_MOVE(st->lp_buf, &st->lp_buf[FRAME_SIZE], PITCH_MAX_PERIOD); OPUS_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET); for (i=0;ilpc[j]*st->pitch_mem[j]; OPUS_MOVE(st->pitch_mem+1, st->pitch_mem, LPC_ORDER-1); st->pitch_mem[0] = aligned_in[i]; + st->lp_buf[PITCH_MAX_PERIOD+i] = sum; st->exc_buf[PITCH_MAX_PERIOD+i] = sum + .7f*st->pitch_filt; st->pitch_filt = sum; /*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/ } + biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE); /* Cross-correlation on half-frames. */ for (sub=0;sub<2;sub++) { int off = sub*FRAME_SIZE/2; @@ -206,6 +225,17 @@ void process_single_frame(LPCNetEncState *st, FILE *ffeat) { best_i = pitch_prev[sub][best_i]; } frame_corr /= 2; + if (0) { + float xy, xx, yy; + int pitch = (best[2]+best[3])/2; + xx = celt_inner_prod_c(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE); + yy = celt_inner_prod_c(&st->lp_buf[PITCH_MAX_PERIOD-pitch], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE); + xy = celt_inner_prod_c(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE); + //printf("%f %f\n", frame_corr, xy/sqrt(1e-15+xx*yy)); + frame_corr = xy/sqrt(1+xx*yy); + //frame_corr = MAX32(0, xy/sqrt(1+xx*yy)); + frame_corr = log(1.f+exp(5.f*frame_corr))/log(1+exp(5.f)); + } st->features[NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2]+best[3]))-200); st->features[NB_BANDS + 1] = frame_corr-.5f; if (ffeat) { diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h index da048e7e..0bd8cad3 100644 --- a/dnn/lpcnet_private.h +++ b/dnn/lpcnet_private.h @@ -49,6 +49,8 @@ struct LPCNetEncState{ float xc[2][PITCH_MAX_PERIOD+1]; float frame_weight[2]; float exc_buf[PITCH_BUF_SIZE]; + float lp_buf[PITCH_BUF_SIZE]; + float lp_mem[4]; float pitch_max_path[2][PITCH_MAX_PERIOD]; float pitch_max_path_all; int best_i;