From 49134381d07579f4d1d15039c66f9e883bc6bd10 Mon Sep 17 00:00:00 2001
From: Jean-Marc Valin <Jean-Marc.Valin@csiro.au>
Date: Tue, 25 Mar 2008 16:07:05 +1100
Subject: [PATCH] optimisations: caching sign of x in alg_quant(), changed
 celt_div()/celt_rcp() to assume denominator is positive.

---
 libcelt/fixed_debug.h |  2 +-
 libcelt/mathops.h     | 10 ++--------
 libcelt/vq.c          | 17 +++++++++++++----
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/libcelt/fixed_debug.h b/libcelt/fixed_debug.h
index bffad156..1e0e1ea6 100644
--- a/libcelt/fixed_debug.h
+++ b/libcelt/fixed_debug.h
@@ -42,7 +42,7 @@ static long long celt_mips = 0;
 #define MIPS_INC celt_mips++,
 
 #define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
-#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
 
 #define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
 #define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
diff --git a/libcelt/mathops.h b/libcelt/mathops.h
index 61f540cb..d67db37b 100644
--- a/libcelt/mathops.h
+++ b/libcelt/mathops.h
@@ -204,20 +204,14 @@ static inline celt_word32_t celt_exp2(celt_word16_t x)
 /** Reciprocal approximation (Q15 input, Q16 output) */
 static inline celt_word32_t celt_rcp(celt_word32_t x)
 {
-   int i, neg=0;
+   int i;
    celt_word16_t n, frac;
    const celt_word16_t C[5] = {21848, -7251, 2403, -934, 327};
-   if (x<0)
-   {
-      neg = 1;
-      x = NEG16(x);
-   }
+   celt_assert2(x>0, "celt_rcp() only defined for positive values");
    i = celt_ilog2(x);
    n = VSHR32(x,i-16)-SHL32(EXTEND32(3),15);
    frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], 
                 MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
-   if (neg)
-      frac = -frac;
    return VSHR32(EXTEND32(frac),i-16);
 }
 
diff --git a/libcelt/vq.c b/libcelt/vq.c
index 3e4de85c..f6389d29 100644
--- a/libcelt/vq.c
+++ b/libcelt/vq.c
@@ -98,7 +98,6 @@ struct NBest {
    celt_word32_t score;
    int sign;
    int pos;
-   int orig;
    celt_word32_t xy;
    celt_word32_t yy;
    celt_word32_t yp;
@@ -110,6 +109,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
    VARDECL(celt_norm_t, _ny);
    VARDECL(int, _iy);
    VARDECL(int, _iny);
+   VARDECL(int, signx);
    celt_norm_t *y, *ny;
    int *iy, *iny;
    int i, j;
@@ -130,11 +130,21 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
    ALLOC(_ny, N, celt_norm_t);
    ALLOC(_iy, N, int);
    ALLOC(_iny, N, int);
+   ALLOC(signx, N, int);
+
    y = _y;
    ny = _ny;
    iy = _iy;
    iny = _iny;
    
+   for (j=0;j<N;j++)
+   {
+      if (X[j]>0)
+         signx[j]=1;
+      else
+         signx[j]=-1;
+   }
+   
    for (j=0;j<N;j++)
    {
       Rpp = MAC16_16(Rpp, P[j],P[j]);
@@ -174,7 +184,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
          celt_word16_t s;
          
          /* Select sign based on X[j] alone */
-         if (X[j]>0) sign=1; else sign=-1;
+         sign = signx[j];
          s = SHL16(sign*pulsesAtOnce, yshift);
 
          /* Updating the sums of the new pulse(s) */
@@ -204,7 +214,6 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
          {
             nbest.score = score;
             nbest.pos = j;
-            nbest.orig = 0;
             nbest.sign = sign;
             nbest.xy = Rxy;
             nbest.yy = Ryy;
@@ -212,7 +221,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
          }
       }
 
-      celt_assert2(nbest[0]->score > -VERY_LARGE32, "Could not find any match in VQ codebook. Something got corrupted somewhere.");
+      celt_assert2(nbest.score > -VERY_LARGE32, "Could not find any match in VQ codebook. Something got corrupted somewhere.");
 
       /* Only now that we've made the final choice, update ny/iny and others */
       {