optimisation: simplified the "full gain" case of alg_quant() to remove some
32-bit muls.
This commit is contained in:
parent
ed317c94c3
commit
d568303134
2 changed files with 34 additions and 14 deletions
|
@ -85,6 +85,7 @@ static inline int find_max32(celt_word32_t *x, int len)
|
||||||
#ifndef FIXED_POINT
|
#ifndef FIXED_POINT
|
||||||
|
|
||||||
#define celt_sqrt(x) ((float)sqrt(x))
|
#define celt_sqrt(x) ((float)sqrt(x))
|
||||||
|
#define celt_psqrt(x) ((float)sqrt(x))
|
||||||
#define celt_rsqrt(x) (1.f/celt_sqrt(x))
|
#define celt_rsqrt(x) (1.f/celt_sqrt(x))
|
||||||
#define celt_acos acos
|
#define celt_acos acos
|
||||||
#define celt_exp exp
|
#define celt_exp exp
|
||||||
|
@ -153,6 +154,22 @@ static inline celt_word32_t celt_sqrt(celt_word32_t x)
|
||||||
return rt;
|
return rt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Sqrt approximation (QX input, QX/2 output) that assumes that the input is
|
||||||
|
strictly positive */
|
||||||
|
static inline celt_word32_t celt_psqrt(celt_word32_t x)
|
||||||
|
{
|
||||||
|
int k;
|
||||||
|
celt_word16_t n;
|
||||||
|
celt_word32_t rt;
|
||||||
|
const celt_word16_t C[5] = {23174, 11584, -3011, 1570, -557};
|
||||||
|
k = (celt_ilog2(x)>>1)-7;
|
||||||
|
x = VSHR32(x, (k<<1));
|
||||||
|
n = x-32768;
|
||||||
|
rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
|
||||||
|
MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
|
||||||
|
rt = VSHR32(rt,7-k);
|
||||||
|
return rt;
|
||||||
|
}
|
||||||
|
|
||||||
#define L1 32767
|
#define L1 32767
|
||||||
#define L2 -7651
|
#define L2 -7651
|
||||||
|
|
31
libcelt/vq.c
31
libcelt/vq.c
|
@ -167,9 +167,9 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
|
||||||
/* Select sign based on X[j] alone */
|
/* Select sign based on X[j] alone */
|
||||||
s = signx[j]*magnitude;
|
s = signx[j]*magnitude;
|
||||||
/* Temporary sums of the new pulse(s) */
|
/* Temporary sums of the new pulse(s) */
|
||||||
Rxy = SHR32(xy + MULT16_16(s,X[j]),rshift);
|
Rxy = EXTRACT16(SHR32(xy + MULT16_16(s,X[j]),rshift));
|
||||||
/* We're multiplying y[j] by two so we don't have to do it here */
|
/* We're multiplying y[j] by two so we don't have to do it here */
|
||||||
Ryy = SHR32(yy + MULT16_16(s,y[j]),rshift);
|
Ryy = EXTRACT16(SHR32(yy + MULT16_16(s,y[j]),rshift));
|
||||||
|
|
||||||
/* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
|
/* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
|
||||||
Rxy is positive because the sign is pre-computed) */
|
Rxy is positive because the sign is pre-computed) */
|
||||||
|
@ -189,27 +189,30 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
|
||||||
celt_word32_t best_num = -VERY_LARGE32;
|
celt_word32_t best_num = -VERY_LARGE32;
|
||||||
for (j=0;j<N;j++)
|
for (j=0;j<N;j++)
|
||||||
{
|
{
|
||||||
celt_word32_t Rxy, Ryy, Ryp;
|
celt_word16_t Rxy, Ryy, Ryp;
|
||||||
celt_word32_t num;
|
celt_word32_t num;
|
||||||
/* Select sign based on X[j] alone */
|
/* Select sign based on X[j] alone */
|
||||||
s = signx[j]*magnitude;
|
s = signx[j]*magnitude;
|
||||||
/* Temporary sums of the new pulse(s) */
|
/* Temporary sums of the new pulse(s) */
|
||||||
Rxy = xy + MULT16_16(s,X[j]);
|
Rxy = ROUND16(xy + MULT16_16(s,X[j]), 14);
|
||||||
/* We're multiplying y[j] by two so we don't have to do it here */
|
/* We're multiplying y[j] by two so we don't have to do it here */
|
||||||
Ryy = yy + MULT16_16(s,y[j]);
|
Ryy = ROUND16(yy + MULT16_16(s,y[j]), 14);
|
||||||
Ryp = yp + MULT16_16(s,P[j]);
|
Ryp = ROUND16(yp + MULT16_16(s,P[j]), 14);
|
||||||
|
|
||||||
/* Compute the gain such that ||p + g*y|| = 1 */
|
/* Compute the gain such that ||p + g*y|| = 1 */
|
||||||
g = MULT16_32_Q15(
|
g = SHR32(MULT16_32_Q15(
|
||||||
celt_sqrt(MULT16_16(ROUND16(Ryp,14),ROUND16(Ryp,14)) + Ryy -
|
celt_psqrt(MULT16_16(Ryp,Ryp) +
|
||||||
MULT16_16(ROUND16(Ryy,14),Rpp))
|
MULT16_16(Ryy,QCONST16(1.f,14)-Rpp))
|
||||||
- ROUND16(Ryp,14),
|
- Ryp,
|
||||||
celt_rcp(SHR32(Ryy,12)));
|
celt_rcp(Ryy)),4);
|
||||||
/* Knowing that gain, what's the error: (x-g*y)^2
|
/* Knowing that gain, what's the error: (x-g*y)^2
|
||||||
(result is negated and we discard x^2 because it's constant) */
|
(result is negated and we discard x^2 because it's constant) */
|
||||||
/* score = 2.f*g*Rxy - 1.f*g*g*Ryy*NORM_SCALING_1;*/
|
/* score = 2*g*Rxy - g*g*Ryy;*/
|
||||||
num = 2*MULT16_32_Q14(ROUND16(Rxy,14),g)
|
#ifdef FIXED_POINT
|
||||||
- MULT16_32_Q14(EXTRACT16(MULT16_32_Q14(ROUND16(Ryy,14),g)),g);
|
num = MULT16_16(Rxy,g) - SHL32(MULT16_16(MULT16_16_Q15(Ryy,g),g),2);
|
||||||
|
#else
|
||||||
|
num = 2*g*Rxy - g*g*Ryy;
|
||||||
|
#endif
|
||||||
if (num >= best_num)
|
if (num >= best_num)
|
||||||
{
|
{
|
||||||
best_num = num;
|
best_num = num;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue