Using a table on ARM for unsigned division by small (<=256) integers.

Saves 0.6% for 64 kb/s and 1.8% for 128 kb/s when decoding on arm7tdmi.
This commit is contained in:
Jean-Marc Valin 2014-01-20 16:32:16 -05:00
parent 379af35fd4
commit ec5d01cbe4
4 changed files with 62 additions and 2 deletions

View file

@ -34,6 +34,12 @@
# include <stddef.h>
# include "ecintrin.h"
extern const opus_uint32 SMALL_DIV_TABLE[129];
#ifdef OPUS_ARM_ASM
#define USE_SMALL_DIV_TABLE
#endif
/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
larger type, you can speed up the decoder by using it here.*/
typedef opus_uint32 ec_window;
@ -114,4 +120,20 @@ static OPUS_INLINE int ec_tell(ec_ctx *_this){
rounding error is in the positive direction).*/
opus_uint32 ec_tell_frac(ec_ctx *_this);
/* Tested exhaustively for all n and for 1<=d<=256 */
static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
#ifdef USE_SMALL_DIV_TABLE
if (d>256)
return n/d;
else {
opus_uint32 t, q;
t = EC_ILOG(d&-d);
q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32;
return q+(n-q*d >= d);
}
#else
return n/d;
#endif
}
#endif