Use more MAC16_16's and unroll a loop.
This splits out the non-arch-specific portions of a patch written by Aurélien Zanelli <aurelien.zanelli@parrot.com http://lists.xiph.org/pipermail/opus/2013-May/002088.html I also added support for odd n, for custom modes. 0.25% speedup on 96 kbps stereo encode+decode on a Cortex A8.
This commit is contained in:
parent
2040606f4a
commit
85ede2c6aa
1 changed files with 10 additions and 5 deletions
|
@ -101,7 +101,7 @@ void celt_fir(const opus_val16 *x,
|
||||||
opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
|
opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
|
||||||
for (j=0;j<ord;j++)
|
for (j=0;j<ord;j++)
|
||||||
{
|
{
|
||||||
sum += MULT16_16(num[j],mem[j]);
|
sum = MAC16_16(sum,num[j],mem[j]);
|
||||||
}
|
}
|
||||||
for (j=ord-1;j>=1;j--)
|
for (j=ord-1;j>=1;j--)
|
||||||
{
|
{
|
||||||
|
@ -161,11 +161,16 @@ void _celt_autocorr(
|
||||||
}
|
}
|
||||||
#ifdef FIXED_POINT
|
#ifdef FIXED_POINT
|
||||||
{
|
{
|
||||||
opus_val32 ac0=0;
|
opus_val32 ac0;
|
||||||
int shift;
|
int shift;
|
||||||
for(i=0;i<n;i++)
|
int n2;
|
||||||
|
ac0 = 1+n;
|
||||||
|
if (n&1) ac0 += SHR32(MULT16_16(xx[0],xx[0]),9);
|
||||||
|
for(i=(n&1);i<n;i+=2)
|
||||||
|
{
|
||||||
ac0 += SHR32(MULT16_16(xx[i],xx[i]),9);
|
ac0 += SHR32(MULT16_16(xx[i],xx[i]),9);
|
||||||
ac0 += 1+n;
|
ac0 += SHR32(MULT16_16(xx[i+1],xx[i+1]),9);
|
||||||
|
}
|
||||||
|
|
||||||
shift = celt_ilog2(ac0)-30+10;
|
shift = celt_ilog2(ac0)-30+10;
|
||||||
shift = (shift+1)/2;
|
shift = (shift+1)/2;
|
||||||
|
@ -176,7 +181,7 @@ void _celt_autocorr(
|
||||||
while (lag>=0)
|
while (lag>=0)
|
||||||
{
|
{
|
||||||
for (i = lag, d = 0; i < n; i++)
|
for (i = lag, d = 0; i < n; i++)
|
||||||
d += xx[i] * xx[i-lag];
|
d = MAC16_16(d, xx[i], xx[i-lag]);
|
||||||
ac[lag] = d;
|
ac[lag] = d;
|
||||||
/*printf ("%f ", ac[lag]);*/
|
/*printf ("%f ", ac[lag]);*/
|
||||||
lag--;
|
lag--;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue