Transient/VBR tuning, give more bits to frames where pitch changes
This commit is contained in:
parent
971b055090
commit
2a9fdbc93d
3 changed files with 105 additions and 110 deletions
213
celt/celt.c
213
celt/celt.c
|
@ -293,7 +293,7 @@ static inline opus_val16 SIG2WORD16(celt_sig x)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int transient_analysis(const opus_val32 * restrict in, int len, int C,
|
static int transient_analysis(const opus_val32 * restrict in, int len, int C,
|
||||||
int overlap, opus_val16 *tf_estimate, int *tf_chan)
|
int overlap, opus_val16 *tf_estimate, int *tf_chan, AnalysisInfo *analysis)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
VARDECL(opus_val16, tmp);
|
VARDECL(opus_val16, tmp);
|
||||||
|
@ -301,13 +301,19 @@ static int transient_analysis(const opus_val32 * restrict in, int len, int C,
|
||||||
int is_transient = 0;
|
int is_transient = 0;
|
||||||
int block;
|
int block;
|
||||||
int c, N;
|
int c, N;
|
||||||
opus_val16 maxbin, minbin[3];
|
opus_val16 maxbin;
|
||||||
opus_val32 L1, L2, tf_tmp, tf_max;
|
int tf_max;
|
||||||
VARDECL(opus_val16, bins);
|
VARDECL(opus_val16, bins);
|
||||||
|
opus_val16 T1, T2, T3, T4, T5;
|
||||||
|
opus_val16 follower;
|
||||||
|
int metric=0;
|
||||||
|
int fmetric=0, bmetric=0;
|
||||||
|
int count1, count2, count3, count4, count5;;
|
||||||
|
|
||||||
SAVE_STACK;
|
SAVE_STACK;
|
||||||
ALLOC(tmp, len, opus_val16);
|
ALLOC(tmp, len, opus_val16);
|
||||||
|
|
||||||
block = overlap/8;
|
block = overlap/4;
|
||||||
N=len/block-1;
|
N=len/block-1;
|
||||||
ALLOC(bins, N, opus_val16);
|
ALLOC(bins, N, opus_val16);
|
||||||
|
|
||||||
|
@ -318,111 +324,97 @@ static int transient_analysis(const opus_val32 * restrict in, int len, int C,
|
||||||
mem0=0;
|
mem0=0;
|
||||||
mem1=0;
|
mem1=0;
|
||||||
for (i=0;i<len;i++)
|
for (i=0;i<len;i++)
|
||||||
tmp[i] = SHR32(in[i*C+c],SIG_SHIFT);
|
tmp[i] = SHR32(in[i+c*len],SIG_SHIFT);
|
||||||
|
|
||||||
/* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
|
/* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
|
||||||
for (i=0;i<len;i++)
|
for (i=0;i<len;i++)
|
||||||
{
|
{
|
||||||
opus_val32 x,y;
|
opus_val32 x,y;
|
||||||
x = tmp[i];
|
x = tmp[i];
|
||||||
y = ADD32(mem0, x);
|
y = ADD32(mem0, x);
|
||||||
#ifdef FIXED_POINT
|
#ifdef FIXED_POINT
|
||||||
mem0 = mem1 + y - SHL32(x,1);
|
mem0 = mem1 + y - SHL32(x,1);
|
||||||
mem1 = x - SHR32(y,1);
|
mem1 = x - SHR32(y,1);
|
||||||
#else
|
#else
|
||||||
mem0 = mem1 + y - 2*x;
|
mem0 = mem1 + y - 2*x;
|
||||||
mem1 = x - .5f*y;
|
mem1 = x - .5f*y;
|
||||||
#endif
|
#endif
|
||||||
tmp[i] = EXTRACT16(SHR32(y,2));
|
tmp[i] = EXTRACT16(SHR32(y,2));
|
||||||
}
|
}
|
||||||
/* First few samples are bad because we don't propagate the memory */
|
/* First few samples are bad because we don't propagate the memory */
|
||||||
for (i=0;i<12;i++)
|
for (i=0;i<12;i++)
|
||||||
tmp[i] = 0;
|
tmp[i] = 0;
|
||||||
|
|
||||||
maxbin=0;
|
maxbin=0;
|
||||||
minbin[0] = minbin[1] = minbin[2] = 32768;
|
for (i=0;i<N;i++)
|
||||||
for (i=0;i<N;i++)
|
|
||||||
{
|
|
||||||
int j;
|
|
||||||
opus_val16 max_abs=0;
|
|
||||||
for (j=0;j<2*block;j++)
|
|
||||||
max_abs = MAX16(max_abs, ABS16(tmp[i*block+j]));
|
|
||||||
bins[i] = max_abs;
|
|
||||||
maxbin = MAX16(maxbin, bins[i]);
|
|
||||||
if (bins[i] < minbin[2])
|
|
||||||
{
|
{
|
||||||
if (bins[i] < minbin[1])
|
int j;
|
||||||
{
|
opus_val16 max_abs=0;
|
||||||
if (bins[i] < minbin[0])
|
for (j=0;j<2*block;j++)
|
||||||
{
|
max_abs = MAX16(max_abs, ABS16(tmp[i*block+j]));
|
||||||
minbin[2] = minbin[1];
|
//printf("%f ", max_abs);
|
||||||
minbin[1] = minbin[0];
|
bins[i] = max_abs;
|
||||||
minbin[0] = bins[i];
|
maxbin = MAX16(maxbin, bins[i]);
|
||||||
} else {
|
|
||||||
minbin[2] = minbin[1];
|
|
||||||
minbin[1] = bins[i];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
minbin[2] = bins[i];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
//printf("%f ", maxbin/minbin[2]);
|
|
||||||
if (maxbin > 15*minbin[2])
|
|
||||||
is_transient = 1;
|
|
||||||
L1=0;
|
|
||||||
L2=0;
|
|
||||||
for (i=0;i<N;i++)
|
|
||||||
{
|
|
||||||
int j;
|
|
||||||
int conseq=0;
|
|
||||||
opus_val16 t1, t2, t3;
|
|
||||||
opus_val16 tmp_bin;
|
|
||||||
|
|
||||||
tmp_bin = bins[i]+MULT16_16_Q15(QCONST16(.05f,15),maxbin);
|
T1 = QCONST16(.09f, 15);
|
||||||
L1 += EXTEND32(tmp_bin);
|
T2 = QCONST16(.12f, 15);
|
||||||
L2 += SHR32(MULT16_16(tmp_bin, tmp_bin), 4);
|
T3 = QCONST16(.18f, 15);
|
||||||
t1 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
|
T4 = QCONST16(.28f, 15);
|
||||||
t2 = MULT16_16_Q15(QCONST16(.3f, 15), bins[i]);
|
T5 = QCONST16(.4f, 15);
|
||||||
t3 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
|
|
||||||
for (j=0;j<i;j++)
|
follower = 0;
|
||||||
|
count1=count2=count3=count4=count5=0;
|
||||||
|
for (i=0;i<N;i++)
|
||||||
{
|
{
|
||||||
if (bins[j] < t1)
|
follower = MAX16(bins[i], MULT16_16_Q15(QCONST16(0.97f, 15), follower));
|
||||||
conseq++;
|
if (bins[i] < MULT16_16_Q15(T1, follower))
|
||||||
if (bins[j] < t2)
|
count1++;
|
||||||
conseq++;
|
if (bins[i] < MULT16_16_Q15(T2, follower))
|
||||||
else
|
count2++;
|
||||||
conseq = 0;
|
if (bins[i] < MULT16_16_Q15(T3, follower))
|
||||||
|
count3++;
|
||||||
|
if (bins[i] < MULT16_16_Q15(T4, follower))
|
||||||
|
count4++;
|
||||||
|
if (bins[i] < MULT16_16_Q15(T5, follower))
|
||||||
|
count5++;
|
||||||
}
|
}
|
||||||
if (conseq>=12)
|
fmetric = (5*count1 + 4*count2 + 3*count3 + 2*count4 + count5)/2;
|
||||||
is_transient=1;
|
follower=0;
|
||||||
conseq = 0;
|
count1=count2=count3=count4=count5=0;
|
||||||
for (j=i+1;j<N;j++)
|
for (i=N-1;i>=0;i--)
|
||||||
{
|
{
|
||||||
if (bins[j] < t3)
|
follower = MAX16(bins[i], MULT16_16_Q15(QCONST16(0.97f, 15), follower));
|
||||||
conseq++;
|
if (bins[i] < MULT16_16_Q15(T1, follower))
|
||||||
else
|
count1++;
|
||||||
conseq = 0;
|
if (bins[i] < MULT16_16_Q15(T2, follower))
|
||||||
|
count2++;
|
||||||
|
if (bins[i] < MULT16_16_Q15(T3, follower))
|
||||||
|
count3++;
|
||||||
|
if (bins[i] < MULT16_16_Q15(T4, follower))
|
||||||
|
count4++;
|
||||||
|
if (bins[i] < MULT16_16_Q15(T5, follower))
|
||||||
|
count5++;
|
||||||
}
|
}
|
||||||
if (conseq>=28)
|
bmetric = 5*count1 + 4*count2 + 3*count3 + 2*count4 + count5;
|
||||||
|
metric = fmetric+bmetric;
|
||||||
|
|
||||||
|
//if (metric>40)
|
||||||
|
if (metric>20+50*MAX16(analysis->tonality, analysis->noisiness))
|
||||||
is_transient=1;
|
is_transient=1;
|
||||||
|
|
||||||
|
if (metric>tf_max)
|
||||||
|
{
|
||||||
|
*tf_chan = c;
|
||||||
|
tf_max = metric;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/* sqrt(L2*N)/L1 */
|
*tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20);
|
||||||
tf_tmp = SHL32(DIV32( SHL32(EXTEND32(celt_sqrt(SHR16(L2,4) * N)), 14), ADD32(EPSILON, L1)), 4);
|
|
||||||
tf_tmp = 1+MIN16(1,MAX16(0, 1-10*minbin[2]/(1+maxbin)));
|
|
||||||
if (tf_tmp>tf_max)
|
|
||||||
{
|
|
||||||
*tf_chan = c;
|
|
||||||
tf_max = tf_tmp;
|
|
||||||
}
|
|
||||||
*tf_estimate = MAX16(*tf_estimate, EXTRACT16(MIN32(QCONST32(1.99, 14), tf_tmp)));
|
|
||||||
}
|
|
||||||
*tf_estimate = MAX16(QCONST16(1.f, 14), *tf_estimate);
|
|
||||||
RESTORE_STACK;
|
RESTORE_STACK;
|
||||||
#ifdef FUZZING
|
#ifdef FUZZING
|
||||||
is_transient = rand()&0x1;
|
is_transient = rand()&0x1;
|
||||||
#endif
|
#endif
|
||||||
//printf("%d %f\n", is_transient, *tf_estimate);
|
//printf("%d %f %f %f %f\n", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);
|
||||||
return is_transient;
|
return is_transient;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -827,7 +819,7 @@ static void init_caps(const CELTMode *m,int *cap,int LM,int C)
|
||||||
|
|
||||||
static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
|
static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
|
||||||
const opus_val16 *bandLogE, int end, int LM, int C, int N0,
|
const opus_val16 *bandLogE, int end, int LM, int C, int N0,
|
||||||
AnalysisInfo *analysis, opus_val16 *stereo_saving)
|
AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
opus_val32 diff=0;
|
opus_val32 diff=0;
|
||||||
|
@ -884,7 +876,8 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
|
||||||
trim_index++;
|
trim_index++;
|
||||||
if (diff < -QCONST16(10.f, DB_SHIFT))
|
if (diff < -QCONST16(10.f, DB_SHIFT))
|
||||||
trim_index++;
|
trim_index++;
|
||||||
trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), (diff+QCONST16(1.f, DB_SHIFT))/16 ));
|
trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), (diff+QCONST16(1.f, DB_SHIFT))/6 ));
|
||||||
|
trim -= 2*(tf_estimate-1);
|
||||||
#ifndef FIXED_POINT
|
#ifndef FIXED_POINT
|
||||||
if (analysis->valid)
|
if (analysis->valid)
|
||||||
{
|
{
|
||||||
|
@ -899,8 +892,8 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
|
||||||
trim_index++;*/
|
trim_index++;*/
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/*printf("%d %f\n", trim_index, trim);*/
|
/*printf("%d %f ", trim_index, trim);*/
|
||||||
/*trim_index = floor(.5+trim);*/
|
trim_index = floor(.5+trim);
|
||||||
if (trim_index<0)
|
if (trim_index<0)
|
||||||
trim_index = 0;
|
trim_index = 0;
|
||||||
if (trim_index>10)
|
if (trim_index>10)
|
||||||
|
@ -995,6 +988,7 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
|
||||||
int tf_chan = 0;
|
int tf_chan = 0;
|
||||||
opus_val16 tf_estimate=0;
|
opus_val16 tf_estimate=0;
|
||||||
opus_val16 stereo_saving = 0;
|
opus_val16 stereo_saving = 0;
|
||||||
|
int pitch_change=0;
|
||||||
ALLOC_STACK;
|
ALLOC_STACK;
|
||||||
|
|
||||||
if (nbCompressedBytes<2 || pcm==NULL)
|
if (nbCompressedBytes<2 || pcm==NULL)
|
||||||
|
@ -1195,6 +1189,10 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
|
||||||
if (pitch_index > COMBFILTER_MAXPERIOD-2)
|
if (pitch_index > COMBFILTER_MAXPERIOD-2)
|
||||||
pitch_index = COMBFILTER_MAXPERIOD-2;
|
pitch_index = COMBFILTER_MAXPERIOD-2;
|
||||||
gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
|
gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
|
||||||
|
if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3
|
||||||
|
&& (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
|
||||||
|
pitch_change = 1;
|
||||||
|
//printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);
|
||||||
if (st->loss_rate>2)
|
if (st->loss_rate>2)
|
||||||
gain1 = HALF32(gain1);
|
gain1 = HALF32(gain1);
|
||||||
if (st->loss_rate>4)
|
if (st->loss_rate>4)
|
||||||
|
@ -1293,7 +1291,7 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
|
||||||
if (st->complexity > 1)
|
if (st->complexity > 1)
|
||||||
{
|
{
|
||||||
isTransient = transient_analysis(in, N+st->overlap, CC,
|
isTransient = transient_analysis(in, N+st->overlap, CC,
|
||||||
st->overlap, &tf_estimate, &tf_chan);
|
st->overlap, &tf_estimate, &tf_chan, &st->analysis);
|
||||||
if (isTransient)
|
if (isTransient)
|
||||||
shortBlocks = M;
|
shortBlocks = M;
|
||||||
}
|
}
|
||||||
|
@ -1465,7 +1463,7 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
|
||||||
if (tell+(6<<BITRES) <= total_bits - total_boost)
|
if (tell+(6<<BITRES) <= total_bits - total_boost)
|
||||||
{
|
{
|
||||||
alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE,
|
alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE,
|
||||||
st->end, LM, C, N, &st->analysis, &stereo_saving);
|
st->end, LM, C, N, &st->analysis, &stereo_saving, tf_estimate);
|
||||||
ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
|
ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
|
||||||
tell = ec_tell_frac(enc);
|
tell = ec_tell_frac(enc);
|
||||||
}
|
}
|
||||||
|
@ -1530,29 +1528,24 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
|
||||||
if (C==2)
|
if (C==2)
|
||||||
target -= MIN32(target/3, stereo_saving*(st->mode->eBands[intensity]<<LM<<BITRES));
|
target -= MIN32(target/3, stereo_saving*(st->mode->eBands[intensity]<<LM<<BITRES));
|
||||||
#endif
|
#endif
|
||||||
|
target += (coded_bins<<BITRES)*.05;
|
||||||
|
target -= (coded_bins<<BITRES)*.13;
|
||||||
|
target *= .96;
|
||||||
|
|
||||||
#ifdef FIXED_POINT
|
#ifdef FIXED_POINT
|
||||||
new_target = SHL32(MULT16_32_Q15(target, SUB16(tf_estimate, QCONST16(0.05, 14))),1);
|
new_target = SHL32(MULT16_32_Q15(target, tf_estimate),1);
|
||||||
#else
|
#else
|
||||||
{
|
new_target = target*tf_estimate;
|
||||||
//float tf_factor = 1+MIN16(1,2*MAX16(0,sqrt(tf_estimate-1)-.2));
|
|
||||||
float tf_factor = tf_estimate;
|
|
||||||
if (isTransient)
|
|
||||||
tf_factor = MAX16(1.2f, tf_factor);
|
|
||||||
//new_target = target*(tf_estimate-.05);
|
|
||||||
new_target = target*(tf_factor-.15);
|
|
||||||
//new_target = target*MIN32(2.f,MAX16(.85f,tf_sum/21.));
|
|
||||||
//printf("%f %f %f %f ", tf_factor, tf_sum/21., target*(tf_estimate-1.05), target*MIN32(2.f,MAX16(.85f,tf_sum/21.))-target);
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef FIXED_POINT
|
#ifndef FIXED_POINT
|
||||||
if (st->analysis.valid) {
|
if (st->analysis.valid) {
|
||||||
int tonal_target;
|
int tonal_target;
|
||||||
float tonal;
|
float tonal;
|
||||||
tonal = st->analysis.tonality;
|
tonal = MAX16(0,st->analysis.tonality-.2)*(.5+st->analysis.tonality);
|
||||||
tonal -= .15;
|
|
||||||
tonal_target = target + (coded_bins<<BITRES)*1.6f*tonal;
|
tonal_target = target + (coded_bins<<BITRES)*1.6f*tonal;
|
||||||
|
if (pitch_change)
|
||||||
|
tonal_target += (coded_bins<<BITRES)*.8;
|
||||||
/*printf("%f %d\n", tonal, tonal_target);*/
|
/*printf("%f %d\n", tonal, tonal_target);*/
|
||||||
new_target = IMAX(tonal_target,new_target);
|
new_target = IMAX(tonal_target,new_target);
|
||||||
//printf("%f %f ", tonal, (coded_bins<<BITRES)*1.6f*tonal);
|
//printf("%f %f ", tonal, (coded_bins<<BITRES)*1.6f*tonal);
|
||||||
|
|
|
@ -54,6 +54,7 @@ typedef struct {
|
||||||
int valid;
|
int valid;
|
||||||
opus_val16 tonality;
|
opus_val16 tonality;
|
||||||
opus_val16 tonality_slope;
|
opus_val16 tonality_slope;
|
||||||
|
opus_val16 noisiness;
|
||||||
opus_val16 activity;
|
opus_val16 activity;
|
||||||
int boost_band[2];
|
int boost_band[2];
|
||||||
opus_val16 boost_amount[2];
|
opus_val16 boost_amount[2];
|
||||||
|
|
|
@ -420,5 +420,6 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
|
||||||
else if (bandwidth<=15 || (bandwidth==16 && close_enough))
|
else if (bandwidth<=15 || (bandwidth==16 && close_enough))
|
||||||
tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
|
tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
|
||||||
}
|
}
|
||||||
|
info->noisiness = frame_noisiness;
|
||||||
info->valid = 1;
|
info->valid = 1;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue