Split cwrsi() by pulses vs. dimensions.
This lets us cut out a bunch of work in the large _n, small _k case where most of the dimensions won't have any pulses. It also gets rid of all remaining usage of CELT_PVQ_U() in cwrsi(), leaving just a single test instead of lots of mins and maxes, and makes a bunch of the jump threading more obvious. This is a 1.6% decoder speedup on a 96 kbps comp48-stereo encode on a Cortex A8.
This commit is contained in:
parent
63f744d583
commit
ce15e65319
1 changed files with 35 additions and 20 deletions
47
celt/cwrs.c
47
celt/cwrs.c
|
@ -467,34 +467,49 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
|
||||||
celt_assert(_k>0);
|
celt_assert(_k>0);
|
||||||
celt_assert(_n>1);
|
celt_assert(_n>1);
|
||||||
while(_n>2){
|
while(_n>2){
|
||||||
|
opus_uint32 q;
|
||||||
|
/*Lots of pulses case:*/
|
||||||
|
if(_k>=_n){
|
||||||
|
const opus_uint32 *row;
|
||||||
|
row=CELT_PVQ_U_ROW[_n];
|
||||||
/*Are the pulses in this dimension negative?*/
|
/*Are the pulses in this dimension negative?*/
|
||||||
p=CELT_PVQ_U(_n,_k+1);
|
p=row[_k+1];
|
||||||
s=-(_i>=p);
|
s=-(_i>=p);
|
||||||
_i-=p&s;
|
_i-=p&s;
|
||||||
/*Count how many pulses were placed in this dimension.*/
|
/*Count how many pulses were placed in this dimension.*/
|
||||||
k0=_k;
|
k0=_k;
|
||||||
p=CELT_PVQ_U(_n,_k);
|
|
||||||
if(_k>_n){
|
|
||||||
const opus_uint32 *row;
|
|
||||||
opus_uint32 q;
|
|
||||||
row=CELT_PVQ_U_ROW[_n];
|
|
||||||
q=row[_n];
|
q=row[_n];
|
||||||
if(q>_i){
|
if(q>_i){
|
||||||
celt_assert(p>q);
|
celt_assert(p>q);
|
||||||
/*Setting p=q is unnecessary, but it helps the optimizer prove p>_i,
|
|
||||||
allowing it to jump straight past the initial test in the second
|
|
||||||
loop below.
|
|
||||||
Once it's removed that first comparison, a smart compiler should be
|
|
||||||
able to figure out that the result of this assignment isn't used and
|
|
||||||
optimize it away anyway.*/
|
|
||||||
p=q;
|
|
||||||
_k=_n;
|
_k=_n;
|
||||||
|
do p=CELT_PVQ_U_ROW[--_k][_n];
|
||||||
|
while(p>_i);
|
||||||
}
|
}
|
||||||
else for(;p>_i;p=row[_k])_k--;
|
else for(p=row[_k];p>_i;p=row[_k])_k--;
|
||||||
}
|
|
||||||
for(;p>_i;p=CELT_PVQ_U_ROW[_k][_n])_k--;
|
|
||||||
_i-=p;
|
_i-=p;
|
||||||
*_y++=(k0-_k+s)^s;
|
*_y++=(k0-_k+s)^s;
|
||||||
|
}
|
||||||
|
/*Lots of dimensions case:*/
|
||||||
|
else{
|
||||||
|
/*Are there any pulses in this dimension at all?*/
|
||||||
|
p=CELT_PVQ_U_ROW[_k][_n];
|
||||||
|
q=CELT_PVQ_U_ROW[_k+1][_n];
|
||||||
|
if(p<=_i&&_i<q){
|
||||||
|
_i-=p;
|
||||||
|
*_y++=0;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
/*Are the pulses in this dimension negative?*/
|
||||||
|
s=-(_i>=q);
|
||||||
|
_i-=q&s;
|
||||||
|
/*Count how many pulses were placed in this dimension.*/
|
||||||
|
k0=_k;
|
||||||
|
do p=CELT_PVQ_U_ROW[--_k][_n];
|
||||||
|
while(p>_i);
|
||||||
|
_i-=p;
|
||||||
|
*_y++=(k0-_k+s)^s;
|
||||||
|
}
|
||||||
|
}
|
||||||
_n--;
|
_n--;
|
||||||
}
|
}
|
||||||
/*_n==2*/
|
/*_n==2*/
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue