mirror of
https://github.com/xiph/opus.git
synced 2025-05-27 13:49:13 +00:00
Using a table on ARM for unsigned division by small (<=256) integers.
Saves 0.6% for 64 kb/s and 1.8% for 128 kb/s when decoding on arm7tdmi.
This commit is contained in:
parent
379af35fd4
commit
ec5d01cbe4
4 changed files with 62 additions and 2 deletions
|
@ -91,3 +91,41 @@ opus_uint32 ec_tell_frac(ec_ctx *_this){
|
|||
}
|
||||
return nbits-l;
|
||||
}
|
||||
|
||||
#ifdef USE_SMALL_DIV_TABLE
|
||||
/* Result of 2^32/(2*i+1), except for i=0. */
|
||||
const opus_uint32 SMALL_DIV_TABLE[129] = {
|
||||
0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924,
|
||||
0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111,
|
||||
0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C,
|
||||
0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084,
|
||||
0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906,
|
||||
0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A,
|
||||
0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A,
|
||||
0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104,
|
||||
0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1,
|
||||
0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2,
|
||||
0x0329161F, 0x03159721, 0x03030303, 0x02F14990,
|
||||
0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46,
|
||||
0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597,
|
||||
0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17,
|
||||
0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902,
|
||||
0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810,
|
||||
0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC,
|
||||
0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30,
|
||||
0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364,
|
||||
0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14,
|
||||
0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F,
|
||||
0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE,
|
||||
0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6,
|
||||
0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3,
|
||||
0x01539094, 0x01501501, 0x014CAB88, 0x0149539E,
|
||||
0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A,
|
||||
0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190,
|
||||
0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227,
|
||||
0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4,
|
||||
0x01194538, 0x0116E068, 0x011485F0, 0x0112358E,
|
||||
0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3,
|
||||
0x01073260, 0x0105197F, 0x0103091B, 0x01010101
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -34,6 +34,12 @@
|
|||
# include <stddef.h>
|
||||
# include "ecintrin.h"
|
||||
|
||||
extern const opus_uint32 SMALL_DIV_TABLE[129];
|
||||
|
||||
#ifdef OPUS_ARM_ASM
|
||||
#define USE_SMALL_DIV_TABLE
|
||||
#endif
|
||||
|
||||
/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
|
||||
larger type, you can speed up the decoder by using it here.*/
|
||||
typedef opus_uint32 ec_window;
|
||||
|
@ -114,4 +120,20 @@ static OPUS_INLINE int ec_tell(ec_ctx *_this){
|
|||
rounding error is in the positive direction).*/
|
||||
opus_uint32 ec_tell_frac(ec_ctx *_this);
|
||||
|
||||
/* Tested exhaustively for all n and for 1<=d<=256 */
|
||||
static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
|
||||
#ifdef USE_SMALL_DIV_TABLE
|
||||
if (d>256)
|
||||
return n/d;
|
||||
else {
|
||||
opus_uint32 t, q;
|
||||
t = EC_ILOG(d&-d);
|
||||
q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32;
|
||||
return q+(n-q*d >= d);
|
||||
}
|
||||
#else
|
||||
return n/d;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -138,7 +138,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){
|
|||
|
||||
unsigned ec_decode(ec_dec *_this,unsigned _ft){
|
||||
unsigned s;
|
||||
_this->ext=_this->rng/_ft;
|
||||
_this->ext=celt_udiv(_this->rng,_ft);
|
||||
s=(unsigned)(_this->val/_this->ext);
|
||||
return _ft-EC_MINI(s+1,_ft);
|
||||
}
|
||||
|
|
|
@ -127,7 +127,7 @@ void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){
|
|||
|
||||
void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
|
||||
opus_uint32 r;
|
||||
r=_this->rng/_ft;
|
||||
r=celt_udiv(_this->rng,_ft);
|
||||
if(_fl>0){
|
||||
_this->val+=_this->rng-IMUL32(r,(_ft-_fl));
|
||||
_this->rng=IMUL32(r,(_fh-_fl));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue