mirror of
https://github.com/xiph/opus.git
synced 2025-05-31 07:37:42 +00:00
Tonality and pitch tuning
Tuned the tonality estimator to trigger on signals where only part of the spectrum is tonal. Also tuned the pitch detector not to be confused by short-term correlation.
This commit is contained in:
parent
ac2e623d25
commit
0892c169c6
3 changed files with 21 additions and 12 deletions
12
celt/celt.c
12
celt/celt.c
|
@ -442,7 +442,7 @@ static int transient_analysis(const opus_val32 * restrict in, int len, int C,
|
|||
#ifdef FUZZING
|
||||
is_transient = rand()&0x1;
|
||||
#endif
|
||||
/*printf("%d %d %d %f %f\n", is_transient, *tf_estimate, tf_max, 0., 1.);*/
|
||||
/*printf("%d %f %d %f %f ", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);*/
|
||||
return is_transient;
|
||||
}
|
||||
|
||||
|
@ -1206,8 +1206,10 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
|
|||
ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
|
||||
|
||||
pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
|
||||
/* Don't search for the fir last 1.5 octave of the range because
|
||||
there's too many false-positives due to short-term correlation */
|
||||
pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
|
||||
COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index);
|
||||
COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index);
|
||||
pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
|
||||
|
||||
gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
|
||||
|
@ -1619,11 +1621,11 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
|
|||
if (st->analysis.valid) {
|
||||
int tonal_target;
|
||||
float tonal;
|
||||
tonal = MAX16(0,st->analysis.tonality-.2)*(.5+st->analysis.tonality);
|
||||
tonal_target = target + (coded_bins<<BITRES)*1.6f*tonal;
|
||||
tonal = MAX16(0,st->analysis.tonality-.2);
|
||||
tonal_target = new_target + (coded_bins<<BITRES)*2.0f*tonal;
|
||||
if (pitch_change)
|
||||
tonal_target += (coded_bins<<BITRES)*.8;
|
||||
/*printf("%f %d\n", tonal, tonal_target);*/
|
||||
/*printf("%f %f ", st->analysis.tonality, tonal);*/
|
||||
new_target = IMAX(tonal_target,new_target);
|
||||
}
|
||||
#endif
|
||||
|
|
10
celt/pitch.c
10
celt/pitch.c
|
@ -331,6 +331,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
|
|||
int T1, T1b;
|
||||
opus_val16 g1;
|
||||
opus_val16 cont=0;
|
||||
opus_val16 thresh;
|
||||
T1 = (2*T0+k)/(2*k);
|
||||
if (T1 < minperiod)
|
||||
break;
|
||||
|
@ -372,7 +373,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
|
|||
cont = HALF32(prev_gain);
|
||||
else
|
||||
cont = 0;
|
||||
if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont)
|
||||
thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7,15),g0)-cont);
|
||||
/* Bias against very high pitch (very short period) to avoid false-positives
|
||||
due to short-term correlation */
|
||||
if (T1<3*minperiod)
|
||||
thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85,15),g0)-cont);
|
||||
else if (T1<2*minperiod)
|
||||
thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9,15),g0)-cont);
|
||||
if (g1 > thresh)
|
||||
{
|
||||
best_xy = xy;
|
||||
best_yy = yy;
|
||||
|
|
|
@ -74,7 +74,7 @@ static const float tweight[NB_TBANDS+1] = {
|
|||
.3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
|
||||
};
|
||||
|
||||
#define NB_TONAL_SKIP_BANDS 0
|
||||
#define NB_TONAL_SKIP_BANDS 9
|
||||
|
||||
typedef struct {
|
||||
float angle[240];
|
||||
|
@ -265,8 +265,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
|
|||
frame_stationarity += stationarity;
|
||||
/*band_tonality[b] = tE/(1e-15+E)*/;
|
||||
band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
|
||||
//printf("%f ", band_tonality[b]);
|
||||
#if 1
|
||||
#if 0
|
||||
if (b>=NB_TONAL_SKIP_BANDS)
|
||||
{
|
||||
frame_tonality += tweight[b]*band_tonality[b];
|
||||
|
@ -277,7 +276,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
|
|||
if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
|
||||
frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
|
||||
#endif
|
||||
max_frame_tonality = MAX16(max_frame_tonality, frame_tonality);
|
||||
max_frame_tonality = MAX16(max_frame_tonality, (1+.03*(b-NB_TBANDS))*frame_tonality);
|
||||
slope += band_tonality[b]*(b-8);
|
||||
/*printf("%f %f ", band_tonality[b], stationarity);*/
|
||||
if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1)
|
||||
|
@ -295,7 +294,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
|
|||
}
|
||||
tonal->prev_band_tonality[b] = band_tonality[b];
|
||||
}
|
||||
//printf("\n");
|
||||
|
||||
frame_loudness = 20*log10(frame_loudness);
|
||||
tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);
|
||||
tonal->lowECount *= (1-alphaE);
|
||||
|
@ -320,7 +319,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
|
|||
#else
|
||||
info->activity = .5*(1+frame_noisiness-frame_stationarity);
|
||||
#endif
|
||||
frame_tonality = (max_frame_tonality/(tw_sum));
|
||||
frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
|
||||
frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8);
|
||||
tonal->prev_tonality = frame_tonality;
|
||||
info->boost_amount[0] -= frame_tonality+.2;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue