Tonality and pitch tuning

Tuned the tonality estimator to trigger on signals where only part of the
spectrum is tonal. Also tuned the pitch detector not to be confused
by short-term correlation.
This commit is contained in:
Jean-Marc Valin 2012-01-12 03:44:49 -05:00
parent ac2e623d25
commit 0892c169c6
3 changed files with 21 additions and 12 deletions

View file

@ -442,7 +442,7 @@ static int transient_analysis(const opus_val32 * restrict in, int len, int C,
#ifdef FUZZING
is_transient = rand()&0x1;
#endif
/*printf("%d %d %d %f %f\n", is_transient, *tf_estimate, tf_max, 0., 1.);*/
/*printf("%d %f %d %f %f ", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);*/
return is_transient;
}
@ -1206,8 +1206,10 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
/* Don't search for the fir last 1.5 octave of the range because
there's too many false-positives due to short-term correlation */
pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index);
COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index);
pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
@ -1619,11 +1621,11 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
if (st->analysis.valid) {
int tonal_target;
float tonal;
tonal = MAX16(0,st->analysis.tonality-.2)*(.5+st->analysis.tonality);
tonal_target = target + (coded_bins<<BITRES)*1.6f*tonal;
tonal = MAX16(0,st->analysis.tonality-.2);
tonal_target = new_target + (coded_bins<<BITRES)*2.0f*tonal;
if (pitch_change)
tonal_target += (coded_bins<<BITRES)*.8;
/*printf("%f %d\n", tonal, tonal_target);*/
/*printf("%f %f ", st->analysis.tonality, tonal);*/
new_target = IMAX(tonal_target,new_target);
}
#endif

View file

@ -331,6 +331,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
int T1, T1b;
opus_val16 g1;
opus_val16 cont=0;
opus_val16 thresh;
T1 = (2*T0+k)/(2*k);
if (T1 < minperiod)
break;
@ -372,7 +373,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
cont = HALF32(prev_gain);
else
cont = 0;
if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont)
thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7,15),g0)-cont);
/* Bias against very high pitch (very short period) to avoid false-positives
due to short-term correlation */
if (T1<3*minperiod)
thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85,15),g0)-cont);
else if (T1<2*minperiod)
thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9,15),g0)-cont);
if (g1 > thresh)
{
best_xy = xy;
best_yy = yy;

View file

@ -74,7 +74,7 @@ static const float tweight[NB_TBANDS+1] = {
.3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
};
#define NB_TONAL_SKIP_BANDS 0
#define NB_TONAL_SKIP_BANDS 9
typedef struct {
float angle[240];
@ -265,8 +265,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
frame_stationarity += stationarity;
/*band_tonality[b] = tE/(1e-15+E)*/;
band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
//printf("%f ", band_tonality[b]);
#if 1
#if 0
if (b>=NB_TONAL_SKIP_BANDS)
{
frame_tonality += tweight[b]*band_tonality[b];
@ -277,7 +276,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
#endif
max_frame_tonality = MAX16(max_frame_tonality, frame_tonality);
max_frame_tonality = MAX16(max_frame_tonality, (1+.03*(b-NB_TBANDS))*frame_tonality);
slope += band_tonality[b]*(b-8);
/*printf("%f %f ", band_tonality[b], stationarity);*/
if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1)
@ -295,7 +294,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
}
tonal->prev_band_tonality[b] = band_tonality[b];
}
//printf("\n");
frame_loudness = 20*log10(frame_loudness);
tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);
tonal->lowECount *= (1-alphaE);
@ -320,7 +319,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
#else
info->activity = .5*(1+frame_noisiness-frame_stationarity);
#endif
frame_tonality = (max_frame_tonality/(tw_sum));
frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8);
tonal->prev_tonality = frame_tonality;
info->boost_amount[0] -= frame_tonality+.2;