Eliminate the loop when decoding the split angle.

Use a closed-form formula for the search instead. This requires an integer sqrt, so it is not actually closed-form, but the number of iterations is O(qb) instead of O(2**qb).
2010-07-27 15:09:51 -07:00 · 2010-07-27 15:09:51 -07:00 · 68242ac58c
commit 68242ac58c
parent 5bdb7dbafc
4 changed files with 42 additions and 39 deletions
--- a/libcelt/bands.c
+++ b/libcelt/bands.c
@ -664,21 +664,23 @@ static void quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_
               ec_encode((ec_enc*)ec, fl, fl+fs, ft);
            } else {
               int fl=0;
-               int j, fm;
+               int fm;
               fm = ec_decode((ec_dec*)ec, ft);
-               j=0;
-               while (1)
+
+               if (fm < ((1<<qb>>1)*((1<<qb>>1) + 1)>>1))
               {
-                  if (fm < fl+fs)
-                     break;
-                  fl+=fs;
-                  if (j<(1<<qb>>1))
-                     fs++;
-                  else
-                     fs--;
-                  j++;
+                  itheta = (isqrt32(8*(celt_uint32)fm + 1) - 1)>>1;
+                  fs = itheta + 1;
+                  fl = itheta*(itheta + 1)>>1;
               }
-               itheta = j;
+               else
+               {
+                  itheta = (2*((1<<qb) + 1)
+                   - isqrt32(8*(celt_uint32)(ft - fm - 1) + 1))>>1;
+                  fs = (1<<qb) + 1 - itheta;
+                  fl = ft - (((1<<qb) + 1 - itheta)*((1<<qb) + 2 - itheta)>>1);
+               }
+
               ec_dec_update((ec_dec*)ec, fl, fl+fs, ft);
            }
            qalloc = log2_frac(ft,BITRES) - log2_frac(fs,BITRES) + 1;
--- a/libcelt/cwrs.c
+++ b/libcelt/cwrs.c
@ -148,33 +148,6 @@ static inline celt_uint32 imusdiv32even(celt_uint32 _a,celt_uint32 _b,
   (_a*(_b&mask)+one-(_c&mask)>>shift)-1)*inv&MASK32;
 }

-/*Compute floor(sqrt(_val)) with exact arithmetic.
-  This has been tested on all possible 32-bit inputs.*/
-static unsigned isqrt32(celt_uint32 _val){
-  unsigned b;
-  unsigned g;
-  int      bshift;
-  /*Uses the second method from
-     http://www.azillionmonkeys.com/qed/sqroot.html
-    The main idea is to search for the largest binary digit b such that
-     (g+b)*(g+b) <= _val, and add it to the solution g.*/
-  g=0;
-  bshift=EC_ILOG(_val)-1>>1;
-  b=1U<<bshift;
-  do{
-    celt_uint32 t;
-    t=((celt_uint32)g<<1)+b<<bshift;
-    if(t<=_val){
-      g+=b;
-      _val-=t;
-    }
-    b>>=1;
-    bshift--;
-  }
-  while(bshift>=0);
-  return g;
-}
-
 #endif /* SMALL_FOOTPRINT */

 /*Although derived separately, the pulse vector coding scheme is equivalent to
--- a/libcelt/mathops.c
+++ b/libcelt/mathops.c
@ -41,6 +41,33 @@

 #include "mathops.h"

+/*Compute floor(sqrt(_val)) with exact arithmetic.
+  This has been tested on all possible 32-bit inputs.*/
+unsigned isqrt32(celt_uint32 _val){
+  unsigned b;
+  unsigned g;
+  int      bshift;
+  /*Uses the second method from
+     http://www.azillionmonkeys.com/qed/sqroot.html
+    The main idea is to search for the largest binary digit b such that
+     (g+b)*(g+b) <= _val, and add it to the solution g.*/
+  g=0;
+  bshift=EC_ILOG(_val)-1>>1;
+  b=1U<<bshift;
+  do{
+    celt_uint32 t;
+    t=((celt_uint32)g<<1)+b<<bshift;
+    if(t<=_val){
+      g+=b;
+      _val-=t;
+    }
+    b>>=1;
+    bshift--;
+  }
+  while(bshift>=0);
+  return g;
+}
+
 #ifdef FIXED_POINT

 celt_word32 frac_div32(celt_word32 a, celt_word32 b)
--- a/libcelt/mathops.h
+++ b/libcelt/mathops.h
@ -45,6 +45,7 @@
 /* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
 #define FRAC_MUL16(a,b) ((16384+((celt_int32)(celt_int16)(a)*(celt_int16)(b)))>>15)

+unsigned isqrt32(celt_uint32 _val);

 #ifndef FIXED_POINT