Optimizes _celt_autocorr() by using pitch_xcorr()

Computes most of the auto-correlation by reusing pitch_xcorr(). We only
need lag*(lag-1)/2 MACs to complete the calculations.
To do this, pitch_xcorr() was modified so that it no longer truncates the
length to a multiple of 4. Also, the xcorr didn't need the floor at -1.
As a side benefit, this speeds up the PLC, which uses a higher order LPC
filter.
This commit is contained in:
Jean-Marc Valin 2013-05-25 02:14:25 -04:00
parent fbf99981a6
commit e8e57a32f6
3 changed files with 58 additions and 22 deletions

View file

@ -32,6 +32,7 @@
#include "celt_lpc.h" #include "celt_lpc.h"
#include "stack_alloc.h" #include "stack_alloc.h"
#include "mathops.h" #include "mathops.h"
#include "pitch.h"
void _celt_lpc( void _celt_lpc(
opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */
@ -147,6 +148,7 @@ void _celt_autocorr(
{ {
opus_val32 d; opus_val32 d;
int i; int i;
int fastN=n-lag;
VARDECL(opus_val16, xx); VARDECL(opus_val16, xx);
SAVE_STACK; SAVE_STACK;
ALLOC(xx, n, opus_val16); ALLOC(xx, n, opus_val16);
@ -177,11 +179,12 @@ void _celt_autocorr(
xx[i] = VSHR32(xx[i], shift); xx[i] = VSHR32(xx[i], shift);
} }
#endif #endif
pitch_xcorr(xx, xx, ac, fastN, lag+1);
while (lag>=0) while (lag>=0)
{ {
for (i = lag, d = 0; i < n; i++) for (i = lag+fastN, d = 0; i < n; i++)
d = MAC16_16(d, xx[i], xx[i-lag]); d = MAC16_16(d, xx[i], xx[i-lag]);
ac[lag] = d; ac[lag] += d;
/*printf ("%f ", ac[lag]);*/ /*printf ("%f ", ac[lag]);*/
lag--; lag--;
} }

View file

@ -217,11 +217,12 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
#if 0 /* This is a simple version of the pitch correlation that should work #if 0 /* This is a simple version of the pitch correlation that should work
well on DSPs like Blackfin and TI C5x/C6x */ well on DSPs like Blackfin and TI C5x/C6x */
static void pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch
#ifdef FIXED_POINT #ifdef FIXED_POINT
,opus_val32 *maxval opus_val32
#else
void
#endif #endif
) pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
{ {
int i, j; int i, j;
#ifdef FIXED_POINT #ifdef FIXED_POINT
@ -232,30 +233,29 @@ static void pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len
opus_val32 sum = 0; opus_val32 sum = 0;
for (j=0;j<len;j++) for (j=0;j<len;j++)
sum = MAC16_16(sum, x[j],y[i+j]); sum = MAC16_16(sum, x[j],y[i+j]);
xcorr[i] = MAX32(-1, sum); xcorr[i] = sum;
#ifdef FIXED_POINT #ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum); maxcorr = MAX32(maxcorr, sum);
#endif #endif
} }
#ifdef FIXED_POINT #ifdef FIXED_POINT
*maxval = maxcorr; return maxcorr;
#endif #endif
} }
#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */ #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch
#ifdef FIXED_POINT #ifdef FIXED_POINT
,opus_val32 *maxval opus_val32
#else
void
#endif #endif
) pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
{ {
int i,j; int i,j;
#ifdef FIXED_POINT #ifdef FIXED_POINT
opus_val32 maxcorr=1; opus_val32 maxcorr=1;
#endif #endif
/* Truncate slightly if len is not a multiple of 4. */
len -= len&3;
for (i=0;i<max_pitch-3;i+=4) for (i=0;i<max_pitch-3;i+=4)
{ {
/* Compute correlation*/ /* Compute correlation*/
@ -271,7 +271,7 @@ static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int l
y0=*y++; y0=*y++;
y1=*y++; y1=*y++;
y2=*y++; y2=*y++;
for (j=0;j<len;j+=4) for (j=0;j<len-3;j+=4)
{ {
opus_val16 tmp; opus_val16 tmp;
tmp = *x++; tmp = *x++;
@ -299,10 +299,37 @@ static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int l
sum3 = MAC16_16(sum3,tmp,y1); sum3 = MAC16_16(sum3,tmp,y1);
sum4 = MAC16_16(sum4,tmp,y2); sum4 = MAC16_16(sum4,tmp,y2);
} }
xcorr[i]=MAX32(-1, sum1); if (j++<len)
xcorr[i+1]=MAX32(-1, sum2); {
xcorr[i+2]=MAX32(-1, sum3); opus_val16 tmp = *x++;
xcorr[i+3]=MAX32(-1, sum4); y3=*y++;
sum1 = MAC16_16(sum1,tmp,y0);
sum2 = MAC16_16(sum2,tmp,y1);
sum3 = MAC16_16(sum3,tmp,y2);
sum4 = MAC16_16(sum4,tmp,y3);
}
if (j++<len)
{
opus_val16 tmp=*x++;
y0=*y++;
sum1 = MAC16_16(sum1,tmp,y1);
sum2 = MAC16_16(sum2,tmp,y2);
sum3 = MAC16_16(sum3,tmp,y3);
sum4 = MAC16_16(sum4,tmp,y0);
}
if (j<len)
{
opus_val16 tmp=*x++;
y1=*y++;
sum1 = MAC16_16(sum1,tmp,y2);
sum2 = MAC16_16(sum2,tmp,y3);
sum3 = MAC16_16(sum3,tmp,y0);
sum4 = MAC16_16(sum4,tmp,y1);
}
xcorr[i]=sum1;
xcorr[i+1]=sum2;
xcorr[i+2]=sum3;
xcorr[i+3]=sum4;
#ifdef FIXED_POINT #ifdef FIXED_POINT
sum1 = MAX32(sum1, sum2); sum1 = MAX32(sum1, sum2);
sum3 = MAX32(sum3, sum4); sum3 = MAX32(sum3, sum4);
@ -316,13 +343,13 @@ static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int l
opus_val32 sum = 0; opus_val32 sum = 0;
for (j=0;j<len;j++) for (j=0;j<len;j++)
sum = MAC16_16(sum, _x[j],_y[i+j]); sum = MAC16_16(sum, _x[j],_y[i+j]);
xcorr[i] = MAX32(-1, sum); xcorr[i] = sum;
#ifdef FIXED_POINT #ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum); maxcorr = MAX32(maxcorr, sum);
#endif #endif
} }
#ifdef FIXED_POINT #ifdef FIXED_POINT
*maxval = maxcorr; return maxcorr;
#endif #endif
} }
@ -378,11 +405,10 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
/* Coarse search with 4x decimation */ /* Coarse search with 4x decimation */
pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2
#ifdef FIXED_POINT #ifdef FIXED_POINT
,&maxcorr maxcorr =
#endif #endif
); pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);
find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
#ifdef FIXED_POINT #ifdef FIXED_POINT

View file

@ -45,4 +45,11 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
int N, int *T0, int prev_period, opus_val16 prev_gain); int N, int *T0, int prev_period, opus_val16 prev_gain);
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch);
#endif #endif