Making decode_pulses() compute the L2-norm on the fly

2014-01-21 15:17:01 -05:00 · 2014-01-21 15:17:01 -05:00 · 6218cba258
commit 6218cba258
parent 17b197837f
3 changed files with 31 additions and 19 deletions
--- a/celt/cwrs.c
+++ b/celt/cwrs.c
@ -460,10 +460,12 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
  ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
 }
-static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
+static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
  opus_uint32 p;
  int         s;
  int         k0;
  opus_int16  val;
  opus_val32  yy=0;
  celt_assert(_k>0);
  celt_assert(_n>1);
  while(_n>2){
@ -487,7 +489,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
      }
      else for(p=row[_k];p>_i;p=row[_k])_k--;
      _i-=p;
-      *_y++=(k0-_k+s)^s;
+      val=(k0-_k+s)^s;
      *_y++=val;
      yy=MAC16_16(yy,val,val);
    }
    /*Lots of dimensions case:*/
    else{
@ -507,7 +511,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
        do p=CELT_PVQ_U_ROW[--_k][_n];
        while(p>_i);
        _i-=p;
-        *_y++=(k0-_k+s)^s;
+        val=(k0-_k+s)^s;
        *_y++=val;
        yy=MAC16_16(yy,val,val);
      }
    }
    _n--;
@ -519,14 +525,19 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
  k0=_k;
  _k=(_i+1)>>1;
  if(_k)_i-=2*_k-1;
-  *_y++=(k0-_k+s)^s;
+  val=(k0-_k+s)^s;
  *_y++=val;
  yy=MAC16_16(yy,val,val);
  /*_n==1*/
  s=-(int)_i;
-  *_y=(_k+s)^s;
+  val=(_k+s)^s;
  *_y=val;
  yy=MAC16_16(yy,val,val);
  return yy;
 }
-void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
-  cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
+  return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
 }
 #else /* SMALL_FOOTPRINT */
@ -591,8 +602,10 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
  _y: Returns the vector of pulses.
  _u: Must contain entries [0..._k+1] of row _n of U() on input.
      Its contents will be destructively modified.*/
-static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
+static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
  int j;
  opus_int16 val;
  opus_val32 yy=0;
  celt_assert(_n>0);
  j=0;
  do{
@ -607,10 +620,13 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
    while(p>_i)p=_u[--_k];
    _i-=p;
    yj-=_k;
-    _y[j]=(yj+s)^s;
+    val=(yj+s)^s;
    _y[j]=val;
    yy=MAC16_16(yy,val,val);
    uprev(_u,_k+2,0);
  }
  while(++j<_n);
  return yy;
 }
 /*Returns the index of the given combination of K elements chosen from a set
@ -685,13 +701,15 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
  RESTORE_STACK;
 }
-void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
  VARDECL(opus_uint32,u);
  int ret;
  SAVE_STACK;
  celt_assert(_k>0);
  ALLOC(u,_k+2U,opus_uint32);
-  cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
+  ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
  RESTORE_STACK;
  return ret;
 }
 #endif /* SMALL_FOOTPRINT */
--- a/celt/cwrs.h
+++ b/celt/cwrs.h
@ -43,6 +43,6 @@ void get_required_bits(opus_int16 *bits, int N, int K, int frac);
 void encode_pulses(const int *_y, int N, int K, ec_enc *enc);
-void decode_pulses(int *_y, int N, int K, ec_dec *dec);
+opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec);
 #endif /* CWRS_H */
--- a/celt/vq.c
+++ b/celt/vq.c
@ -325,7 +325,6 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
      ec_dec *dec, opus_val16 gain)
 {
   int i;
   opus_val32 Ryy;
   unsigned collapse_mask;
   VARDECL(int, iy);
@ -334,12 +333,7 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
   ALLOC(iy, N, int);
-   decode_pulses(iy, N, K, dec);
+   Ryy = decode_pulses(iy, N, K, dec);
   Ryy = 0;
   i=0;
   do {
      Ryy = MAC16_16(Ryy, iy[i], iy[i]);
   } while (++i < N);
   normalise_residual(iy, X, N, Ryy, gain);
   exp_rotation(X, N, -1, B, K, spread);
   collapse_mask = extract_collapse_mask(iy, N, B);