Support 32-bit window and comb filter

This commit is contained in:
Jean-Marc Valin 2024-05-28 13:03:10 -04:00
parent 38e535a1e7
commit c79a9bd1dd
No known key found for this signature in database
GPG key ID: 8D2952BBB52C646D
17 changed files with 160 additions and 81 deletions

View file

@ -89,6 +89,7 @@ int resampling_factor(opus_int32 rate)
return ret;
}
#if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C)
/* This version should be faster on ARM */
#ifdef OPUS_ARM_ASM
@ -96,7 +97,7 @@ int resampling_factor(opus_int32 rate)
static
#endif
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
celt_coef g10, celt_coef g11, celt_coef g12)
{
opus_val32 x0, x1, x2, x3, x4;
int i;
@ -108,33 +109,33 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
{
opus_val32 t;
x0=SHL32(x[i-T+2],1);
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
t = MAC_COEF_32_ARM(x[i], g10, x2);
t = MAC_COEF_32_ARM(t, g11, ADD32(x1,x3));
t = MAC_COEF_32_ARM(t, g12, ADD32(x0,x4));
t = SATURATE(t, SIG_SAT);
y[i] = t;
x4=SHL32(x[i-T+3],1);
t = MAC16_32_Q16(x[i+1], g10, x1);
t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
t = MAC_COEF_32_ARM(x[i+1], g10, x1);
t = MAC_COEF_32_ARM(t, g11, ADD32(x0,x2));
t = MAC_COEF_32_ARM(t, g12, ADD32(x4,x3));
t = SATURATE(t, SIG_SAT);
y[i+1] = t;
x3=SHL32(x[i-T+4],1);
t = MAC16_32_Q16(x[i+2], g10, x0);
t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
t = MAC_COEF_32_ARM(x[i+2], g10, x0);
t = MAC_COEF_32_ARM(t, g11, ADD32(x4,x1));
t = MAC_COEF_32_ARM(t, g12, ADD32(x3,x2));
t = SATURATE(t, SIG_SAT);
y[i+2] = t;
x2=SHL32(x[i-T+5],1);
t = MAC16_32_Q16(x[i+3], g10, x4);
t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
t = MAC_COEF_32_ARM(x[i+3], g10, x4);
t = MAC_COEF_32_ARM(t, g11, ADD32(x3,x0));
t = MAC_COEF_32_ARM(t, g12, ADD32(x2,x1));
t = SATURATE(t, SIG_SAT);
y[i+3] = t;
x1=SHL32(x[i-T+6],1);
t = MAC16_32_Q16(x[i+4], g10, x3);
t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
t = MAC_COEF_32_ARM(x[i+4], g10, x3);
t = MAC_COEF_32_ARM(t, g11, ADD32(x2,x4));
t = MAC_COEF_32_ARM(t, g12, ADD32(x1,x0));
t = SATURATE(t, SIG_SAT);
y[i+4] = t;
}
@ -143,9 +144,9 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
{
opus_val32 t;
x0=SHL32(x[i-T+2],1);
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
t = MAC_COEF_32_ARM(x[i], g10, x2);
t = MAC_COEF_32_ARM(t, g11, ADD32(x1,x3));
t = MAC_COEF_32_ARM(t, g12, ADD32(x0,x4));
t = SATURATE(t, SIG_SAT);
y[i] = t;
x4=x3;
@ -160,7 +161,7 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
static
#endif
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
celt_coef g10, celt_coef g11, celt_coef g12)
{
opus_val32 x0, x1, x2, x3, x4;
int i;
@ -172,9 +173,9 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
{
x0=x[i-T+2];
y[i] = x[i]
+ MULT16_32_Q15(g10,x2)
+ MULT16_32_Q15(g11,ADD32(x1,x3))
+ MULT16_32_Q15(g12,ADD32(x0,x4));
+ MULT_COEF_32(g10,x2)
+ MULT_COEF_32(g11,ADD32(x1,x3))
+ MULT_COEF_32(g12,ADD32(x0,x4));
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3;
x3=x2;
@ -189,11 +190,11 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
#ifndef OVERRIDE_comb_filter
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap, int arch)
const celt_coef *window, int overlap, int arch)
{
int i;
/* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
opus_val16 g00, g01, g02, g10, g11, g12;
celt_coef g00, g01, g02, g10, g11, g12;
opus_val32 x0, x1, x2, x3, x4;
static const opus_val16 gains[3][3] = {
{QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
@ -211,12 +212,12 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
to have then be at least 2 to avoid processing garbage data. */
T0 = IMAX(T0, COMBFILTER_MINPERIOD);
T1 = IMAX(T1, COMBFILTER_MINPERIOD);
g00 = MULT16_16_P15(g0, gains[tapset0][0]);
g01 = MULT16_16_P15(g0, gains[tapset0][1]);
g02 = MULT16_16_P15(g0, gains[tapset0][2]);
g10 = MULT16_16_P15(g1, gains[tapset1][0]);
g11 = MULT16_16_P15(g1, gains[tapset1][1]);
g12 = MULT16_16_P15(g1, gains[tapset1][2]);
g00 = MULT_COEF_TAPS(g0, gains[tapset0][0]);
g01 = MULT_COEF_TAPS(g0, gains[tapset0][1]);
g02 = MULT_COEF_TAPS(g0, gains[tapset0][2]);
g10 = MULT_COEF_TAPS(g1, gains[tapset1][0]);
g11 = MULT_COEF_TAPS(g1, gains[tapset1][1]);
g12 = MULT_COEF_TAPS(g1, gains[tapset1][2]);
x1 = x[-T1+1];
x2 = x[-T1 ];
x3 = x[-T1-1];
@ -226,16 +227,16 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
overlap=0;
for (i=0;i<overlap;i++)
{
opus_val16 f;
celt_coef f;
x0=x[i-T1+2];
f = MULT16_16_Q15(window[i],window[i]);
f = MULT_COEF(window[i],window[i]);
y[i] = x[i]
+ MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
+ MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
+ MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
+ MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
+ MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
+ MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
+ MULT_COEF_32(MULT_COEF((COEF_ONE-f),g00),x[i-T0])
+ MULT_COEF_32(MULT_COEF((COEF_ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
+ MULT_COEF_32(MULT_COEF((COEF_ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
+ MULT_COEF_32(MULT_COEF(f,g10),x2)
+ MULT_COEF_32(MULT_COEF(f,g11),ADD32(x1,x3))
+ MULT_COEF_32(MULT_COEF(f,g12),ADD32(x0,x4));
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3;
x3=x2;