From 06390d082dcdfa8addb3dde337543bc0f0ebae44 Mon Sep 17 00:00:00 2001
From: tterribe <tterribe@0101bb08-14d6-0310-b084-bc0e0c8e3800>
Date: Fri, 11 Jan 2008 03:13:50 +0000
Subject: [PATCH] Add oc_enc_tell to report an estimate of the number of bits
 used so far.

Remove the special case for 0 in EC_ILOG, as we never pass it 0 and this
 generates slightly better code.

Update ec_enc_bits64 to split the encoded values along word boundaries...
 this should generate slightly better code, as well as fix a subtle bug (the
 proper bits were not being masked out of the low part).
However, this will render previous streams that used this function undecodable
 (to my knowledge, no one is actually using it yet).

git-svn-id: http://svn.xiph.org/trunk/ghost@14391 0101bb08-14d6-0310-b084-bc0e0c8e3800
---
 libentcode/ecintrin.h | 14 ++++++++------
 libentcode/ectest.c   | 33 ++++++++++++++++++++++++++++++++-
 libentcode/entcode.c  |  4 ++--
 libentcode/entenc.c   | 12 +++++-------
 libentcode/entenc.h   |  6 ++++++
 libentcode/mfrngenc.c | 14 ++++++++++++++
 libentcode/rangeenc.c | 14 ++++++++++++++
 7 files changed, 81 insertions(+), 16 deletions(-)

diff --git a/libentcode/ecintrin.h b/libentcode/ecintrin.h
index 4c00596a..f9a960f9 100644
--- a/libentcode/ecintrin.h
+++ b/libentcode/ecintrin.h
@@ -61,9 +61,10 @@
 #endif
 #if defined(EC_CLZ)
 /*Note that __builtin_clz is not defined when _x==0, according to the gcc
-   documentation (and that of the BSR instruction that implements it on x86),
-   so we have to special-case it.*/
-# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x)&-!!(_x))
+   documentation (and that of the BSR instruction that implements it on x86).
+  The majority of the time we can never pass it zero.
+  When we need to, it can be special cased.*/
+# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x))
 #else
 # define EC_ILOG(_x) (ec_ilog(_x))
 #endif
@@ -81,9 +82,10 @@
 #endif
 #if defined(EC_CLZ64)
 /*Note that __builtin_clz is not defined when _x==0, according to the gcc
-   documentation (and that of the BSR instruction that implements it on x86),
-   so we have to special-case it.*/
-# define EC_ILOG64(_x) (EC_CLZ64_0-EC_CLZ64(_x)&-!!(_x))
+   documentation (and that of the BSR instruction that implements it on x86).
+  The majority of the time we can never pass it zero.
+  When we need to, it can be special cased.*/
+# define EC_ILOG64(_x) (EC_CLZ64_0-EC_CLZ64(_x))
 #else
 # define EC_ILOG64(_x) (ec_ilog64(_x))
 #endif
diff --git a/libentcode/ectest.c b/libentcode/ectest.c
index c254722d..3f64288c 100644
--- a/libentcode/ectest.c
+++ b/libentcode/ectest.c
@@ -1,6 +1,8 @@
 #include <stdio.h>
+#include <math.h>
 #include "probenc.h"
 #include "probdec.h"
+#include "bitrenc.h"
 
 int main(int _argc,char **_argv){
   ec_byte_buffer buf;
@@ -8,32 +10,54 @@ int main(int _argc,char **_argv){
   ec_dec         dec;
   ec_probmod     mod;
   ec_uint64      sym64;
+  long           nbits;
+  double         entropy;
   int            ft;
   int            ftb;
   int            sym;
   int            sz;
   int            s;
   int            i;
+  entropy=0;
   /*Testing encoding of raw bit values.*/
   ec_byte_writeinit(&buf);
   ec_enc_init(&enc,&buf);
   for(ft=0;ft<1024;ft++){
     for(i=0;i<ft;i++){
+      entropy+=log(ft)*M_LOG2E;
       ec_enc_uint(&enc,i,ft);
+      entropy+=log(ft)*M_LOG2E+30;
       ec_enc_uint64(&enc,(ec_uint64)i<<30|i,(ec_uint64)ft<<30);
     }
   }
   /*Testing encoding of raw bit values.*/
   for(ftb=0;ftb<16;ftb++){
     for(i=0;i<(1<<ftb);i++){
+      long nbits;
+      long nbits2;
+      entropy+=ftb;
+      nbits=ec_enc_tell(&enc);
       ec_enc_bits(&enc,i,ftb);
+      nbits2=ec_enc_tell(&enc);
+      if(nbits2-nbits!=ftb){
+        fprintf(stderr,"Used %li bits to encode %i bits directly.\n",
+         nbits2-nbits,ftb);
+      }
+      entropy+=ftb+30;
+      nbits=nbits2;
       ec_enc_bits64(&enc,(ec_uint64)i<<30|i,ftb+30);
+      nbits2=ec_enc_tell(&enc);
+      if(nbits2-nbits!=ftb+30){
+        fprintf(stderr,"Used %li bits to encode %i bits directly.\n",
+         nbits2-nbits,ftb+30);
+      }
     }
   }
   for(sz=1;sz<256;sz++){
     ec_probmod_init_full(&mod,sz,1,sz+(sz>>1),NULL);
     for(i=0;i<sz;i++){
       s=((unsigned)(i*45678901+7))%sz;
+      entropy+=(log(mod.ft)-log(ec_bitree_get_freq(mod.bitree,s)))*M_LOG2E;
       ec_probmod_write(&mod,&enc,s);
     }
     ec_probmod_clear(&mod);
@@ -42,12 +66,19 @@ int main(int _argc,char **_argv){
     ec_probmod_init_full(&mod,sz,1,sz+(sz>>1),NULL);
     for(i=0;i<sz;i++){
       s=((unsigned)(i*45678901+7))%sz;
+      entropy+=(log(ec_bitree_get_cumul(mod.bitree,EC_MINI(s+6,sz))-
+       ec_bitree_get_cumul(mod.bitree,EC_MAXI(s-5,0)))-
+       log(ec_bitree_get_freq(mod.bitree,s)))*M_LOG2E;
       ec_probmod_write_range(&mod,&enc,s,EC_MAXI(s-5,0),EC_MINI(s+6,sz));
     }
     ec_probmod_clear(&mod);
   }
+  nbits=ec_enc_tell(&enc);
   ec_enc_done(&enc);
-  fprintf(stderr,"Encoded to %li bytes.\n",(long)(buf.ptr-buf.buf));
+  fprintf(stderr,
+   "Encoded %0.2lf bits of entropy to %li bits (%0.3lf%% wasted).\n",
+   entropy,nbits,100*(nbits-entropy)/nbits);
+  fprintf(stderr,"Packed to %li bytes.\n",(long)(buf.ptr-buf.buf));
   ec_byte_readinit(&buf,ec_byte_get_buffer(&buf),ec_byte_bytes(&buf));
   ec_dec_init(&dec,&buf);
   for(ft=0;ft<1024;ft++){
diff --git a/libentcode/entcode.c b/libentcode/entcode.c
index 3ace831f..e1ca492d 100644
--- a/libentcode/entcode.c
+++ b/libentcode/entcode.c
@@ -18,7 +18,7 @@ unsigned char *ec_byte_get_buffer(ec_byte_buffer *_b){
 
 int ec_ilog(ec_uint32 _v){
 #if defined(EC_CLZ)
-  return EC_CLZ0-EC_CLZ(_v)&-!!_v;
+  return EC_CLZ0-EC_CLZ(_v);
 #else
   /*On a Pentium M, this branchless version tested as the fastest on
      1,000,000,000 random 32-bit integers, edging out a similar version with
@@ -45,7 +45,7 @@ int ec_ilog(ec_uint32 _v){
 
 int ec_ilog64(ec_uint64 _v){
 #if defined(EC_CLZ64)
-  return EC_CLZ64_0-EC_CLZ64(_v)&-!!_v;
+  return EC_CLZ64_0-EC_CLZ64(_v);
 #else
   ec_uint32 v;
   int       ret;
diff --git a/libentcode/entenc.c b/libentcode/entenc.c
index 188aa424..62ef8844 100644
--- a/libentcode/entenc.c
+++ b/libentcode/entenc.c
@@ -75,12 +75,10 @@ void ec_enc_bits(ec_enc *_this,ec_uint32 _fl,int _ftb){
 }
 
 void ec_enc_bits64(ec_enc *_this,ec_uint64 _fl,int _ftb){
-  ec_uint32 fl;
-  ec_uint32 ft;
   if(_ftb>32){
-    _ftb-=32;
-    fl=(ec_uint32)(_fl>>_ftb)&0xFFFFFFFF;
-    ec_enc_bits(_this,fl,32);
+    ec_enc_bits(_this,(ec_uint32)(_fl>>32),_ftb-32);
+    _ftb=32;
+    _fl&=0xFFFFFFFF;
   }
   ec_enc_bits(_this,(ec_uint32)_fl,_ftb);
 }
@@ -91,7 +89,7 @@ void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft){
   unsigned  fl;
   int       ftb;
   _ft--;
-  ftb=EC_ILOG(_ft);
+  ftb=EC_ILOG(_ft)&-!!_ft;
   while(ftb>EC_UNIT_BITS){
     ftb-=EC_UNIT_BITS;
     ft=(_ft>>ftb)+1;
@@ -114,7 +112,7 @@ void ec_enc_uint64(ec_enc *_this,ec_uint64 _fl,ec_uint64 _ft){
   unsigned  fl;
   int       ftb;
   _ft--;
-  ftb=EC_ILOG64(_ft);
+  ftb=EC_ILOG64(_ft)&-!!_ft;
   while(ftb>EC_UNIT_BITS){
     ftb-=EC_UNIT_BITS;
     ft=(unsigned)(_ft>>ftb)+1;
diff --git a/libentcode/entenc.h b/libentcode/entenc.h
index 1ba891cf..3179bcc2 100644
--- a/libentcode/entenc.h
+++ b/libentcode/entenc.h
@@ -63,6 +63,12 @@ void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft);
        This must be at least one, and no more than 2**64-1.*/
 void ec_enc_uint64(ec_enc *_this,ec_uint64 _fl,ec_uint64 _ft);
 
+/*Returns the number of bits "used" by the encoded symbols so far.
+  The actual number of bits may be larger, due to rounding to whole bytes, or
+   smaller, due to trailing zeros that can be stripped.
+  Return: the number of bits.*/
+long ec_enc_tell(ec_enc *_this);
+
 /*Indicates that there are no more symbols to encode.
   All reamining output bytes are flushed to the output buffer.
   ec_enc_init() must be called before the encoder can be used again.*/
diff --git a/libentcode/mfrngenc.c b/libentcode/mfrngenc.c
index ec861414..5d222a91 100644
--- a/libentcode/mfrngenc.c
+++ b/libentcode/mfrngenc.c
@@ -119,6 +119,19 @@ void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
   ec_enc_normalize(_this);
 }
 
+long ec_enc_tell(ec_enc *_this){
+  long nbits;
+  nbits=ec_byte_bytes(_this->buf)+(_this->rem>=0)+_this->ext<<3;
+  /*To handle the non-integral number of bits still left in the encoder state,
+     we compute the number of bits of low that must be encoded to ensure that
+     the value is inside the range for any possible subsequent bits.
+    Note that this is subtly different than the actual value we would end the
+     stream with, which tries to make as many of the trailing bits zeros as
+     possible.*/
+  nbits+=EC_CODE_BITS-EC_ILOG(_this->rng);
+  return nbits;
+}
+
 void ec_enc_done(ec_enc *_this){
   /*We compute the integer in the current interval that has the largest number
      of trailing zeros, and write that to the stream.
@@ -148,6 +161,7 @@ void ec_enc_done(ec_enc *_this){
     unsigned char *buf;
     /*Flush it into the output buffer.*/
     ec_enc_carry_out(_this,0);
+    _this->rem=-1;
     /*We may be able to drop some redundant bytes from the end.*/
     buf=ec_byte_get_buffer(_this->buf);
     p=buf+ec_byte_bytes(_this->buf)-1;
diff --git a/libentcode/rangeenc.c b/libentcode/rangeenc.c
index dcd9db5b..5833da64 100644
--- a/libentcode/rangeenc.c
+++ b/libentcode/rangeenc.c
@@ -91,6 +91,19 @@ void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
   ec_enc_normalize(_this);
 }
 
+long ec_enc_tell(ec_enc *_this){
+  long nbits;
+  nbits=ec_byte_bytes(_this->buf)+(_this->rem>=0)+_this->ext<<3;
+  /*To handle the non-integral number of bits still left in the encoder state,
+     we compute the number of bits of low that must be encoded to ensure that
+     the value is inside the range for any possible subsequent bits.
+    Note that this is subtly different than the actual value we would end the
+     stream with, which tries to make as many of the trailing bits zeros as
+     possible.*/
+  nbits+=EC_CODE_BITS-EC_ILOG(_this->rng);
+  return nbits;
+}
+
 void ec_enc_done(ec_enc *_this){
   /*We compute the integer in the current interval that has the largest number
      of trailing zeros, and write that to the stream.
@@ -120,6 +133,7 @@ void ec_enc_done(ec_enc *_this){
     unsigned char *buf;
     /*Flush it into the output buffer.*/
     ec_enc_carry_out(_this,0);
+    _this->rem=-1;
     /*We may be able to drop some redundant bytes from the end.*/
     buf=ec_byte_get_buffer(_this->buf);
     p=buf+ec_byte_bytes(_this->buf)-1;