mirror of
https://github.com/xiph/opus.git
synced 2025-05-30 23:27:42 +00:00
Creates xcorr_kernel() that gets used by pitch_xcorr, celt_fir and celt_iir.
This commit is contained in:
parent
2fe4700f76
commit
068cbd89bf
3 changed files with 102 additions and 176 deletions
125
celt/celt_lpc.c
125
celt/celt_lpc.c
|
@ -124,49 +124,12 @@ void celt_fir(const opus_val16 *_x,
|
|||
celt_assert((ord&3)==0);
|
||||
for (i=0;i<N-3;i+=4)
|
||||
{
|
||||
opus_val32 sum1=0;
|
||||
opus_val32 sum2=0;
|
||||
opus_val32 sum3=0;
|
||||
opus_val32 sum4=0;
|
||||
const opus_val16 *xx = x+i;
|
||||
const opus_val16 *z = rnum;
|
||||
opus_val16 y_0, y_1, y_2, y_3;
|
||||
y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
|
||||
y_0=*xx++;
|
||||
y_1=*xx++;
|
||||
y_2=*xx++;
|
||||
for (j=0;j<ord-3;j+=4)
|
||||
{
|
||||
opus_val16 tmp;
|
||||
tmp = *z++;
|
||||
y_3=*xx++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_0);
|
||||
sum2 = MAC16_16(sum2,tmp,y_1);
|
||||
sum3 = MAC16_16(sum3,tmp,y_2);
|
||||
sum4 = MAC16_16(sum4,tmp,y_3);
|
||||
tmp=*z++;
|
||||
y_0=*xx++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_1);
|
||||
sum2 = MAC16_16(sum2,tmp,y_2);
|
||||
sum3 = MAC16_16(sum3,tmp,y_3);
|
||||
sum4 = MAC16_16(sum4,tmp,y_0);
|
||||
tmp=*z++;
|
||||
y_1=*xx++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_2);
|
||||
sum2 = MAC16_16(sum2,tmp,y_3);
|
||||
sum3 = MAC16_16(sum3,tmp,y_0);
|
||||
sum4 = MAC16_16(sum4,tmp,y_1);
|
||||
tmp=*z++;
|
||||
y_2=*xx++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_3);
|
||||
sum2 = MAC16_16(sum2,tmp,y_0);
|
||||
sum3 = MAC16_16(sum3,tmp,y_1);
|
||||
sum4 = MAC16_16(sum4,tmp,y_2);
|
||||
}
|
||||
_y[i ] = ADD16(_x[i ], ROUND16(sum1, SIG_SHIFT));
|
||||
_y[i+1] = ADD16(_x[i+1], ROUND16(sum2, SIG_SHIFT));
|
||||
_y[i+2] = ADD16(_x[i+2], ROUND16(sum3, SIG_SHIFT));
|
||||
_y[i+3] = ADD16(_x[i+3], ROUND16(sum4, SIG_SHIFT));
|
||||
opus_val32 sum[4]={0,0,0,0};
|
||||
xcorr_kernel(rnum, x+i, sum, ord);
|
||||
_y[i ] = ADD16(_x[i ], ROUND16(sum[0], SIG_SHIFT));
|
||||
_y[i+1] = ADD16(_x[i+1], ROUND16(sum[1], SIG_SHIFT));
|
||||
_y[i+2] = ADD16(_x[i+2], ROUND16(sum[2], SIG_SHIFT));
|
||||
_y[i+3] = ADD16(_x[i+3], ROUND16(sum[3], SIG_SHIFT));
|
||||
}
|
||||
for (;i<N;i++)
|
||||
{
|
||||
|
@ -219,64 +182,26 @@ void celt_iir(const opus_val32 *_x,
|
|||
y[i]=0;
|
||||
for (i=0;i<N-3;i+=4)
|
||||
{
|
||||
opus_val32 sum1=0;
|
||||
opus_val32 sum2=0;
|
||||
opus_val32 sum3=0;
|
||||
opus_val32 sum4=0;
|
||||
const opus_val16 *yy = y+i;
|
||||
const opus_val16 *z = rden;
|
||||
opus_val16 y_0, y_1, y_2, y_3;
|
||||
sum1 = _x[i ];
|
||||
sum2 = _x[i+1];
|
||||
sum3 = _x[i+2];
|
||||
sum4 = _x[i+3];
|
||||
y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
|
||||
y_0=*yy++;
|
||||
y_1=*yy++;
|
||||
y_2=*yy++;
|
||||
for (j=0;j<ord-3;j+=4)
|
||||
{
|
||||
opus_val16 tmp;
|
||||
tmp = *z++;
|
||||
y_3=*yy++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_0);
|
||||
sum2 = MAC16_16(sum2,tmp,y_1);
|
||||
sum3 = MAC16_16(sum3,tmp,y_2);
|
||||
sum4 = MAC16_16(sum4,tmp,y_3);
|
||||
tmp=*z++;
|
||||
y_0=*yy++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_1);
|
||||
sum2 = MAC16_16(sum2,tmp,y_2);
|
||||
sum3 = MAC16_16(sum3,tmp,y_3);
|
||||
sum4 = MAC16_16(sum4,tmp,y_0);
|
||||
tmp=*z++;
|
||||
y_1=*yy++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_2);
|
||||
sum2 = MAC16_16(sum2,tmp,y_3);
|
||||
sum3 = MAC16_16(sum3,tmp,y_0);
|
||||
sum4 = MAC16_16(sum4,tmp,y_1);
|
||||
tmp=*z++;
|
||||
y_2=*yy++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_3);
|
||||
sum2 = MAC16_16(sum2,tmp,y_0);
|
||||
sum3 = MAC16_16(sum3,tmp,y_1);
|
||||
sum4 = MAC16_16(sum4,tmp,y_2);
|
||||
}
|
||||
y[i+ord ] = -ROUND16(sum1,SIG_SHIFT);
|
||||
_y[i ] = sum1;
|
||||
sum2 = MAC16_16(sum2, y[i+ord ], den[0]);
|
||||
y[i+ord+1] = -ROUND16(sum2,SIG_SHIFT);
|
||||
_y[i+1] = sum2;
|
||||
sum3 = MAC16_16(sum3, y[i+ord+1], den[0]);
|
||||
sum3 = MAC16_16(sum3, y[i+ord ], den[1]);
|
||||
y[i+ord+2] = -ROUND16(sum3,SIG_SHIFT);
|
||||
_y[i+2] = sum3;
|
||||
/* Unroll by 4 as if it were an FIR filter */
|
||||
opus_val32 sum[4]={_x[i],_x[i+1],_x[i+2],_x[i+3]};
|
||||
xcorr_kernel(rden, y+i, sum, ord);
|
||||
|
||||
sum4 = MAC16_16(sum4, y[i+ord+2], den[0]);
|
||||
sum4 = MAC16_16(sum4, y[i+ord+1], den[1]);
|
||||
sum4 = MAC16_16(sum4, y[i+ord ], den[2]);
|
||||
y[i+ord+3] = -ROUND16(sum4,SIG_SHIFT);
|
||||
_y[i+3] = sum4;
|
||||
/* Patch up the result to compensate for the fact that this is an IIR */
|
||||
y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT);
|
||||
_y[i ] = sum[0];
|
||||
sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
|
||||
y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
|
||||
_y[i+1] = sum[1];
|
||||
sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
|
||||
sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
|
||||
y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
|
||||
_y[i+2] = sum[2];
|
||||
|
||||
sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
|
||||
sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
|
||||
sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
|
||||
y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
|
||||
_y[i+3] = sum[3];
|
||||
}
|
||||
for (;i<N;i++)
|
||||
{
|
||||
|
|
86
celt/pitch.c
86
celt/pitch.c
|
@ -258,83 +258,17 @@ pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_
|
|||
#endif
|
||||
for (i=0;i<max_pitch-3;i+=4)
|
||||
{
|
||||
/* Compute correlation*/
|
||||
/*corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);*/
|
||||
opus_val32 sum1=0;
|
||||
opus_val32 sum2=0;
|
||||
opus_val32 sum3=0;
|
||||
opus_val32 sum4=0;
|
||||
const opus_val16 *y = _y+i;
|
||||
const opus_val16 *x = _x;
|
||||
opus_val16 y_0, y_1, y_2, y_3;
|
||||
y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
|
||||
y_0=*y++;
|
||||
y_1=*y++;
|
||||
y_2=*y++;
|
||||
for (j=0;j<len-3;j+=4)
|
||||
{
|
||||
opus_val16 tmp;
|
||||
tmp = *x++;
|
||||
y_3=*y++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_0);
|
||||
sum2 = MAC16_16(sum2,tmp,y_1);
|
||||
sum3 = MAC16_16(sum3,tmp,y_2);
|
||||
sum4 = MAC16_16(sum4,tmp,y_3);
|
||||
tmp=*x++;
|
||||
y_0=*y++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_1);
|
||||
sum2 = MAC16_16(sum2,tmp,y_2);
|
||||
sum3 = MAC16_16(sum3,tmp,y_3);
|
||||
sum4 = MAC16_16(sum4,tmp,y_0);
|
||||
tmp=*x++;
|
||||
y_1=*y++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_2);
|
||||
sum2 = MAC16_16(sum2,tmp,y_3);
|
||||
sum3 = MAC16_16(sum3,tmp,y_0);
|
||||
sum4 = MAC16_16(sum4,tmp,y_1);
|
||||
tmp=*x++;
|
||||
y_2=*y++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_3);
|
||||
sum2 = MAC16_16(sum2,tmp,y_0);
|
||||
sum3 = MAC16_16(sum3,tmp,y_1);
|
||||
sum4 = MAC16_16(sum4,tmp,y_2);
|
||||
}
|
||||
if (j++<len)
|
||||
{
|
||||
opus_val16 tmp = *x++;
|
||||
y_3=*y++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_0);
|
||||
sum2 = MAC16_16(sum2,tmp,y_1);
|
||||
sum3 = MAC16_16(sum3,tmp,y_2);
|
||||
sum4 = MAC16_16(sum4,tmp,y_3);
|
||||
}
|
||||
if (j++<len)
|
||||
{
|
||||
opus_val16 tmp=*x++;
|
||||
y_0=*y++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_1);
|
||||
sum2 = MAC16_16(sum2,tmp,y_2);
|
||||
sum3 = MAC16_16(sum3,tmp,y_3);
|
||||
sum4 = MAC16_16(sum4,tmp,y_0);
|
||||
}
|
||||
if (j<len)
|
||||
{
|
||||
opus_val16 tmp=*x++;
|
||||
y_1=*y++;
|
||||
sum1 = MAC16_16(sum1,tmp,y_2);
|
||||
sum2 = MAC16_16(sum2,tmp,y_3);
|
||||
sum3 = MAC16_16(sum3,tmp,y_0);
|
||||
sum4 = MAC16_16(sum4,tmp,y_1);
|
||||
}
|
||||
xcorr[i]=sum1;
|
||||
xcorr[i+1]=sum2;
|
||||
xcorr[i+2]=sum3;
|
||||
xcorr[i+3]=sum4;
|
||||
opus_val32 sum[4]={0,0,0,0};
|
||||
xcorr_kernel(_x, _y+i, sum, len);
|
||||
xcorr[i]=sum[0];
|
||||
xcorr[i+1]=sum[1];
|
||||
xcorr[i+2]=sum[2];
|
||||
xcorr[i+3]=sum[3];
|
||||
#ifdef FIXED_POINT
|
||||
sum1 = MAX32(sum1, sum2);
|
||||
sum3 = MAX32(sum3, sum4);
|
||||
sum1 = MAX32(sum1, sum3);
|
||||
maxcorr = MAX32(maxcorr, sum1);
|
||||
sum[0] = MAX32(sum[0], sum[1]);
|
||||
sum[2] = MAX32(sum[2], sum[3]);
|
||||
sum[0] = MAX32(sum[0], sum[2]);
|
||||
maxcorr = MAX32(maxcorr, sum[0]);
|
||||
#endif
|
||||
}
|
||||
/* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
|
||||
|
|
67
celt/pitch.h
67
celt/pitch.h
|
@ -45,6 +45,73 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
|
|||
opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
|
||||
int N, int *T0, int prev_period, opus_val16 prev_gain);
|
||||
|
||||
/* OPT: This is the kernel you really want to optimize. It gets used a lot
|
||||
by the prefilter and by the PLC. */
|
||||
static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
|
||||
{
|
||||
int j;
|
||||
opus_val16 y_0, y_1, y_2, y_3;
|
||||
y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
|
||||
y_0=*y++;
|
||||
y_1=*y++;
|
||||
y_2=*y++;
|
||||
for (j=0;j<len-3;j+=4)
|
||||
{
|
||||
opus_val16 tmp;
|
||||
tmp = *x++;
|
||||
y_3=*y++;
|
||||
sum[0] = MAC16_16(sum[0],tmp,y_0);
|
||||
sum[1] = MAC16_16(sum[1],tmp,y_1);
|
||||
sum[2] = MAC16_16(sum[2],tmp,y_2);
|
||||
sum[3] = MAC16_16(sum[3],tmp,y_3);
|
||||
tmp=*x++;
|
||||
y_0=*y++;
|
||||
sum[0] = MAC16_16(sum[0],tmp,y_1);
|
||||
sum[1] = MAC16_16(sum[1],tmp,y_2);
|
||||
sum[2] = MAC16_16(sum[2],tmp,y_3);
|
||||
sum[3] = MAC16_16(sum[3],tmp,y_0);
|
||||
tmp=*x++;
|
||||
y_1=*y++;
|
||||
sum[0] = MAC16_16(sum[0],tmp,y_2);
|
||||
sum[1] = MAC16_16(sum[1],tmp,y_3);
|
||||
sum[2] = MAC16_16(sum[2],tmp,y_0);
|
||||
sum[3] = MAC16_16(sum[3],tmp,y_1);
|
||||
tmp=*x++;
|
||||
y_2=*y++;
|
||||
sum[0] = MAC16_16(sum[0],tmp,y_3);
|
||||
sum[1] = MAC16_16(sum[1],tmp,y_0);
|
||||
sum[2] = MAC16_16(sum[2],tmp,y_1);
|
||||
sum[3] = MAC16_16(sum[3],tmp,y_2);
|
||||
}
|
||||
if (j++<len)
|
||||
{
|
||||
opus_val16 tmp = *x++;
|
||||
y_3=*y++;
|
||||
sum[0] = MAC16_16(sum[0],tmp,y_0);
|
||||
sum[1] = MAC16_16(sum[1],tmp,y_1);
|
||||
sum[2] = MAC16_16(sum[2],tmp,y_2);
|
||||
sum[3] = MAC16_16(sum[3],tmp,y_3);
|
||||
}
|
||||
if (j++<len)
|
||||
{
|
||||
opus_val16 tmp=*x++;
|
||||
y_0=*y++;
|
||||
sum[0] = MAC16_16(sum[0],tmp,y_1);
|
||||
sum[1] = MAC16_16(sum[1],tmp,y_2);
|
||||
sum[2] = MAC16_16(sum[2],tmp,y_3);
|
||||
sum[3] = MAC16_16(sum[3],tmp,y_0);
|
||||
}
|
||||
if (j<len)
|
||||
{
|
||||
opus_val16 tmp=*x++;
|
||||
y_1=*y++;
|
||||
sum[0] = MAC16_16(sum[0],tmp,y_2);
|
||||
sum[1] = MAC16_16(sum[1],tmp,y_3);
|
||||
sum[2] = MAC16_16(sum[2],tmp,y_0);
|
||||
sum[3] = MAC16_16(sum[3],tmp,y_1);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
opus_val32
|
||||
#else
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue