optimisations: caching sign of x in alg_quant(), changed celt_div()/celt_rcp()
to assume denominator is positive.
This commit is contained in:
parent
208ae6e33d
commit
49134381d0
3 changed files with 16 additions and 13 deletions
|
@ -42,7 +42,7 @@ static long long celt_mips = 0;
|
||||||
#define MIPS_INC celt_mips++,
|
#define MIPS_INC celt_mips++,
|
||||||
|
|
||||||
#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
|
#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
|
||||||
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
|
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
|
||||||
|
|
||||||
#define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
|
#define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
|
||||||
#define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
|
#define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
|
||||||
|
|
|
@ -204,20 +204,14 @@ static inline celt_word32_t celt_exp2(celt_word16_t x)
|
||||||
/** Reciprocal approximation (Q15 input, Q16 output) */
|
/** Reciprocal approximation (Q15 input, Q16 output) */
|
||||||
static inline celt_word32_t celt_rcp(celt_word32_t x)
|
static inline celt_word32_t celt_rcp(celt_word32_t x)
|
||||||
{
|
{
|
||||||
int i, neg=0;
|
int i;
|
||||||
celt_word16_t n, frac;
|
celt_word16_t n, frac;
|
||||||
const celt_word16_t C[5] = {21848, -7251, 2403, -934, 327};
|
const celt_word16_t C[5] = {21848, -7251, 2403, -934, 327};
|
||||||
if (x<0)
|
celt_assert2(x>0, "celt_rcp() only defined for positive values");
|
||||||
{
|
|
||||||
neg = 1;
|
|
||||||
x = NEG16(x);
|
|
||||||
}
|
|
||||||
i = celt_ilog2(x);
|
i = celt_ilog2(x);
|
||||||
n = VSHR32(x,i-16)-SHL32(EXTEND32(3),15);
|
n = VSHR32(x,i-16)-SHL32(EXTEND32(3),15);
|
||||||
frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
|
frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
|
||||||
MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
|
MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
|
||||||
if (neg)
|
|
||||||
frac = -frac;
|
|
||||||
return VSHR32(EXTEND32(frac),i-16);
|
return VSHR32(EXTEND32(frac),i-16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
17
libcelt/vq.c
17
libcelt/vq.c
|
@ -98,7 +98,6 @@ struct NBest {
|
||||||
celt_word32_t score;
|
celt_word32_t score;
|
||||||
int sign;
|
int sign;
|
||||||
int pos;
|
int pos;
|
||||||
int orig;
|
|
||||||
celt_word32_t xy;
|
celt_word32_t xy;
|
||||||
celt_word32_t yy;
|
celt_word32_t yy;
|
||||||
celt_word32_t yp;
|
celt_word32_t yp;
|
||||||
|
@ -110,6 +109,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
|
||||||
VARDECL(celt_norm_t, _ny);
|
VARDECL(celt_norm_t, _ny);
|
||||||
VARDECL(int, _iy);
|
VARDECL(int, _iy);
|
||||||
VARDECL(int, _iny);
|
VARDECL(int, _iny);
|
||||||
|
VARDECL(int, signx);
|
||||||
celt_norm_t *y, *ny;
|
celt_norm_t *y, *ny;
|
||||||
int *iy, *iny;
|
int *iy, *iny;
|
||||||
int i, j;
|
int i, j;
|
||||||
|
@ -130,11 +130,21 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
|
||||||
ALLOC(_ny, N, celt_norm_t);
|
ALLOC(_ny, N, celt_norm_t);
|
||||||
ALLOC(_iy, N, int);
|
ALLOC(_iy, N, int);
|
||||||
ALLOC(_iny, N, int);
|
ALLOC(_iny, N, int);
|
||||||
|
ALLOC(signx, N, int);
|
||||||
|
|
||||||
y = _y;
|
y = _y;
|
||||||
ny = _ny;
|
ny = _ny;
|
||||||
iy = _iy;
|
iy = _iy;
|
||||||
iny = _iny;
|
iny = _iny;
|
||||||
|
|
||||||
|
for (j=0;j<N;j++)
|
||||||
|
{
|
||||||
|
if (X[j]>0)
|
||||||
|
signx[j]=1;
|
||||||
|
else
|
||||||
|
signx[j]=-1;
|
||||||
|
}
|
||||||
|
|
||||||
for (j=0;j<N;j++)
|
for (j=0;j<N;j++)
|
||||||
{
|
{
|
||||||
Rpp = MAC16_16(Rpp, P[j],P[j]);
|
Rpp = MAC16_16(Rpp, P[j],P[j]);
|
||||||
|
@ -174,7 +184,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
|
||||||
celt_word16_t s;
|
celt_word16_t s;
|
||||||
|
|
||||||
/* Select sign based on X[j] alone */
|
/* Select sign based on X[j] alone */
|
||||||
if (X[j]>0) sign=1; else sign=-1;
|
sign = signx[j];
|
||||||
s = SHL16(sign*pulsesAtOnce, yshift);
|
s = SHL16(sign*pulsesAtOnce, yshift);
|
||||||
|
|
||||||
/* Updating the sums of the new pulse(s) */
|
/* Updating the sums of the new pulse(s) */
|
||||||
|
@ -204,7 +214,6 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
|
||||||
{
|
{
|
||||||
nbest.score = score;
|
nbest.score = score;
|
||||||
nbest.pos = j;
|
nbest.pos = j;
|
||||||
nbest.orig = 0;
|
|
||||||
nbest.sign = sign;
|
nbest.sign = sign;
|
||||||
nbest.xy = Rxy;
|
nbest.xy = Rxy;
|
||||||
nbest.yy = Ryy;
|
nbest.yy = Ryy;
|
||||||
|
@ -212,7 +221,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
celt_assert2(nbest[0]->score > -VERY_LARGE32, "Could not find any match in VQ codebook. Something got corrupted somewhere.");
|
celt_assert2(nbest.score > -VERY_LARGE32, "Could not find any match in VQ codebook. Something got corrupted somewhere.");
|
||||||
|
|
||||||
/* Only now that we've made the final choice, update ny/iny and others */
|
/* Only now that we've made the final choice, update ny/iny and others */
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue