Reduce the number of branches around ncwrs* and ucwrs* calls with k==0.
This slightly reduces the executable size and might improve performance on platforms without good branch prediction.
This commit is contained in:
parent
222494f3ea
commit
711eade6d5
1 changed files with 20 additions and 12 deletions
|
@ -253,33 +253,39 @@ static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b,
|
||||||
/*Compute U(2,_k).
|
/*Compute U(2,_k).
|
||||||
Note that this may be called with _k=32768 (maxK[2]+1).*/
|
Note that this may be called with _k=32768 (maxK[2]+1).*/
|
||||||
static inline unsigned ucwrs2(unsigned _k){
|
static inline unsigned ucwrs2(unsigned _k){
|
||||||
return _k?_k+(_k-1):0;
|
celt_assert(_k>0);
|
||||||
|
return _k+(_k-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*Compute V(2,_k).*/
|
/*Compute V(2,_k).*/
|
||||||
static inline opus_uint32 ncwrs2(int _k){
|
static inline opus_uint32 ncwrs2(int _k){
|
||||||
return _k?4*(opus_uint32)_k:1;
|
celt_assert(_k>0);
|
||||||
|
return 4*(opus_uint32)_k;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*Compute U(3,_k).
|
/*Compute U(3,_k).
|
||||||
Note that this may be called with _k=32768 (maxK[3]+1).*/
|
Note that this may be called with _k=32768 (maxK[3]+1).*/
|
||||||
static inline opus_uint32 ucwrs3(unsigned _k){
|
static inline opus_uint32 ucwrs3(unsigned _k){
|
||||||
return _k?(2*(opus_uint32)_k-2)*_k+1:0;
|
celt_assert(_k>0);
|
||||||
|
return (2*(opus_uint32)_k-2)*_k+1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*Compute V(3,_k).*/
|
/*Compute V(3,_k).*/
|
||||||
static inline opus_uint32 ncwrs3(int _k){
|
static inline opus_uint32 ncwrs3(int _k){
|
||||||
return _k?2*(2*(unsigned)_k*(opus_uint32)_k+1):1;
|
celt_assert(_k>0);
|
||||||
|
return 2*(2*(unsigned)_k*(opus_uint32)_k+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*Compute U(4,_k).*/
|
/*Compute U(4,_k).*/
|
||||||
static inline opus_uint32 ucwrs4(int _k){
|
static inline opus_uint32 ucwrs4(int _k){
|
||||||
return _k?imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1):0;
|
celt_assert(_k>0);
|
||||||
|
return imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*Compute V(4,_k).*/
|
/*Compute V(4,_k).*/
|
||||||
static inline opus_uint32 ncwrs4(int _k){
|
static inline opus_uint32 ncwrs4(int _k){
|
||||||
return _k?((_k*(opus_uint32)_k+2)*_k)/3<<3:1;
|
celt_assert(_k>0);
|
||||||
|
return ((_k*(opus_uint32)_k+2)*_k)/3<<3;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* SMALL_FOOTPRINT */
|
#endif /* SMALL_FOOTPRINT */
|
||||||
|
@ -382,7 +388,7 @@ static inline void cwrsi2(int _k,opus_uint32 _i,int *_y){
|
||||||
_i-=p&s;
|
_i-=p&s;
|
||||||
yj=_k;
|
yj=_k;
|
||||||
_k=_i+1>>1;
|
_k=_i+1>>1;
|
||||||
p=ucwrs2(_k);
|
p=_k?ucwrs2(_k):0;
|
||||||
_i-=p;
|
_i-=p;
|
||||||
yj-=_k;
|
yj-=_k;
|
||||||
_y[0]=yj+s^s;
|
_y[0]=yj+s^s;
|
||||||
|
@ -403,7 +409,7 @@ static void cwrsi3(int _k,opus_uint32 _i,int *_y){
|
||||||
/*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
|
/*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
|
||||||
_i<2147418113=U(3,32768)).*/
|
_i<2147418113=U(3,32768)).*/
|
||||||
_k=_i>0?isqrt32(2*_i-1)+1>>1:0;
|
_k=_i>0?isqrt32(2*_i-1)+1>>1:0;
|
||||||
p=ucwrs3(_k);
|
p=_k?ucwrs3(_k):0;
|
||||||
_i-=p;
|
_i-=p;
|
||||||
yj-=_k;
|
yj-=_k;
|
||||||
_y[0]=yj+s^s;
|
_y[0]=yj+s^s;
|
||||||
|
@ -430,7 +436,7 @@ static void cwrsi4(int _k,opus_uint32 _i,int *_y){
|
||||||
kr=_k;
|
kr=_k;
|
||||||
for(;;){
|
for(;;){
|
||||||
_k=kl+kr>>1;
|
_k=kl+kr>>1;
|
||||||
p=ucwrs4(_k);
|
p=_k?ucwrs4(_k):0;
|
||||||
if(p<_i){
|
if(p<_i){
|
||||||
if(_k>=kr)break;
|
if(_k>=kr)break;
|
||||||
kl=_k+1;
|
kl=_k+1;
|
||||||
|
@ -492,7 +498,7 @@ static inline opus_uint32 icwrs2(const int *_y,int *_k){
|
||||||
opus_uint32 i;
|
opus_uint32 i;
|
||||||
int k;
|
int k;
|
||||||
i=icwrs1(_y+1,&k);
|
i=icwrs1(_y+1,&k);
|
||||||
i+=ucwrs2(k);
|
i+=k?ucwrs2(k):0;
|
||||||
k+=abs(_y[0]);
|
k+=abs(_y[0]);
|
||||||
if(_y[0]<0)i+=ucwrs2(k+1U);
|
if(_y[0]<0)i+=ucwrs2(k+1U);
|
||||||
*_k=k;
|
*_k=k;
|
||||||
|
@ -507,7 +513,7 @@ static inline opus_uint32 icwrs3(const int *_y,int *_k){
|
||||||
opus_uint32 i;
|
opus_uint32 i;
|
||||||
int k;
|
int k;
|
||||||
i=icwrs2(_y+1,&k);
|
i=icwrs2(_y+1,&k);
|
||||||
i+=ucwrs3(k);
|
i+=k?ucwrs3(k):0;
|
||||||
k+=abs(_y[0]);
|
k+=abs(_y[0]);
|
||||||
if(_y[0]<0)i+=ucwrs3(k+1U);
|
if(_y[0]<0)i+=ucwrs3(k+1U);
|
||||||
*_k=k;
|
*_k=k;
|
||||||
|
@ -522,7 +528,7 @@ static inline opus_uint32 icwrs4(const int *_y,int *_k){
|
||||||
opus_uint32 i;
|
opus_uint32 i;
|
||||||
int k;
|
int k;
|
||||||
i=icwrs3(_y+1,&k);
|
i=icwrs3(_y+1,&k);
|
||||||
i+=ucwrs4(k);
|
i+=k?ucwrs4(k):0;
|
||||||
k+=abs(_y[0]);
|
k+=abs(_y[0]);
|
||||||
if(_y[0]<0)i+=ucwrs4(k+1);
|
if(_y[0]<0)i+=ucwrs4(k+1);
|
||||||
*_k=k;
|
*_k=k;
|
||||||
|
@ -584,6 +590,7 @@ void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
|
||||||
|
|
||||||
void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
|
void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
|
||||||
opus_uint32 i;
|
opus_uint32 i;
|
||||||
|
celt_assert(_k>0);
|
||||||
#ifndef SMALL_FOOTPRINT
|
#ifndef SMALL_FOOTPRINT
|
||||||
switch(_n){
|
switch(_n){
|
||||||
case 2:{
|
case 2:{
|
||||||
|
@ -616,6 +623,7 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
|
||||||
|
|
||||||
void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
|
void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
|
||||||
{
|
{
|
||||||
|
celt_assert(_k>0);
|
||||||
#ifndef SMALL_FOOTPRINT
|
#ifndef SMALL_FOOTPRINT
|
||||||
switch(_n){
|
switch(_n){
|
||||||
case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;
|
case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue