Improved mode/channel/bandwidth control mechanism

Now has tuning parameters for mono/stereo and voice/music. Also switches
to stereo during swb and without reducing the bandwidth.
This commit is contained in:
Jean-Marc Valin 2011-09-02 14:47:26 -04:00
parent bafbd08db1
commit c681bd0480
2 changed files with 103 additions and 69 deletions

View file

@ -83,23 +83,43 @@ struct OpusEncoder {
int rangeFinal; int rangeFinal;
}; };
/* Transition tables for the voice and audio modes. First column is the /* Transition tables for the voice and music. First column is the
middle (memoriless) threshold. The second column is the hysteresis middle (memoriless) threshold. The second column is the hysteresis
(difference with the middle) */ (difference with the middle) */
static const int voice_bandwidth_thresholds[10] = { static const opus_int32 mono_voice_bandwidth_thresholds[8] = {
11000, 1000, /* NB<->MB */ 11000, 1000, /* NB<->MB */
14000, 1000, /* MB<->WB */ 14000, 1000, /* MB<->WB */
21000, 2000, /* WB<->SWB */ 21000, 2000, /* WB<->SWB */
29000, 2000, /* SWB<->FB */ 29000, 2000, /* SWB<->FB */
}; };
static const int audio_bandwidth_thresholds[10] = { static const opus_int32 mono_music_bandwidth_thresholds[8] = {
30000, 0, /* MB not allowed */ 14000, 1000, /* MB not allowed */
20000, 2000, /* MB<->WB */ 18000, 2000, /* MB<->WB */
26000, 2000, /* WB<->SWB */ 24000, 2000, /* WB<->SWB */
33000, 2000, /* SWB<->FB */ 33000, 2000, /* SWB<->FB */
}; };
static const opus_int32 stereo_voice_bandwidth_thresholds[8] = {
11000, 1000, /* NB<->MB */
14000, 1000, /* MB<->WB */
21000, 2000, /* WB<->SWB */
32000, 2000, /* SWB<->FB */
};
static const opus_int32 stereo_music_bandwidth_thresholds[8] = {
14000, 1000, /* MB not allowed */
18000, 2000, /* MB<->WB */
24000, 2000, /* WB<->SWB */
48000, 2000, /* SWB<->FB */
};
/* Threshold bit-rates for switching between mono and stereo */
static const opus_int32 stereo_voice_threshold = 26000;
static const opus_int32 stereo_music_threshold = 36000;
/* Threshold bit-rate for switching between SILK/hybrid and CELT-only */
static const opus_int32 mode_thresholds[2][2] = {
/* voice */ /* music */
{ 48000, 24000}, /* mono */
{ 48000, 24000}, /* stereo */
};
int opus_encoder_get_size(int channels) int opus_encoder_get_size(int channels)
{ {
int silkEncSizeBytes, celtEncSizeBytes; int silkEncSizeBytes, celtEncSizeBytes;
@ -173,7 +193,7 @@ int opus_encoder_init(OpusEncoder* st, int Fs, int channels, int application)
st->application = application; st->application = application;
st->signal_type = OPUS_SIGNAL_AUTO; st->signal_type = OPUS_SIGNAL_AUTO;
st->user_bandwidth = OPUS_BANDWIDTH_AUTO; st->user_bandwidth = OPUS_BANDWIDTH_AUTO;
st->voice_ratio = 90; st->voice_ratio = -1;
st->encoder_buffer = st->Fs/100; st->encoder_buffer = st->Fs/100;
st->delay_compensation = st->Fs/400; st->delay_compensation = st->Fs/400;
@ -350,9 +370,10 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
VARDECL(opus_val16, pcm_buf); VARDECL(opus_val16, pcm_buf);
int nb_compr_bytes; int nb_compr_bytes;
int to_celt = 0; int to_celt = 0;
opus_int32 mono_rate;
opus_uint32 redundant_rng = 0; opus_uint32 redundant_rng = 0;
int cutoff_Hz, hp_freq_smth1; int cutoff_Hz, hp_freq_smth1;
int voice_est;
opus_int32 equiv_rate;
ALLOC_STACK; ALLOC_STACK;
st->rangeFinal = 0; st->rangeFinal = 0;
@ -370,41 +391,45 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
else else
st->bitrate_bps = st->user_bitrate_bps; st->bitrate_bps = st->user_bitrate_bps;
/* Equivalent 20-ms rate for mode/channel/bandwidth decisions */
equiv_rate = st->bitrate_bps - 60*(st->Fs/frame_size - 50);
if (st->signal_type == OPUS_SIGNAL_VOICE)
voice_est = 127;
else if (st->signal_type == OPUS_SIGNAL_MUSIC)
voice_est = 0;
else if (st->voice_ratio >= 0)
voice_est = st->voice_ratio*327>>8;
else if (st->application == OPUS_APPLICATION_VOIP)
voice_est = 115;
else
voice_est = 64;
#ifdef FUZZING
/* Random mono/stereo decision */
if (st->channels == 2 && (rand()&0x1F)==0)
st->stream_channels = 3-st->stream_channels;
#else
/* Rate-dependent mono-stereo decision */ /* Rate-dependent mono-stereo decision */
if (st->force_mono) if (st->force_mono)
{ {
st->stream_channels = 1; st->stream_channels = 1;
} else if (st->mode == MODE_CELT_ONLY && st->channels == 2) } else if (st->channels == 2)
{ {
opus_int32 decision_rate; opus_int32 stereo_threshold;
decision_rate = st->bitrate_bps + st->voice_ratio*st->voice_ratio; stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14);
/* Add some hysteresis */ if (st->stream_channels == 2)
if (st->stream_channels == 2) stereo_threshold -= 4000;
decision_rate += 4000; else
else stereo_threshold += 4000;
decision_rate -= 4000; st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1;
if (decision_rate>48000)
st->stream_channels = 2;
else
st->stream_channels = 1;
} else { } else {
st->stream_channels = st->channels; st->stream_channels = st->channels;
} }
#ifdef FUZZING
if (st->channels == 2 && (rand()&0x1F)==0)
st->stream_channels = 3-st->stream_channels;
#endif #endif
/* Equivalent bit-rate for mono */
mono_rate = st->bitrate_bps;
if (st->stream_channels==2)
mono_rate = 2*mono_rate/3;
/* Compensate for smaller frame sizes assuming an equivalent overhead
of 60 bits/frame */
mono_rate -= 60*(st->Fs/frame_size - 50);
#ifdef FUZZING #ifdef FUZZING
/* Random mode switching */
if ((rand()&0xF)==0) if ((rand()&0xF)==0)
{ {
if ((rand()&0x1)==0) if ((rand()&0x1)==0)
@ -419,44 +444,26 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
} }
#else #else
/* Mode selection depending on application and signal type */ /* Mode selection depending on application and signal type */
if (st->application==OPUS_APPLICATION_VOIP)
{ {
opus_int32 threshold; int chan;
threshold = 20000; opus_int32 mode_voice, mode_music;
/* OPUS_APPLICATION_VOIP default to auto high-pass */ opus_int32 threshold;
/* Hysteresis */
if (st->prev_mode == MODE_CELT_ONLY)
threshold -= 4000;
else if (st->prev_mode>0)
threshold += 4000;
/* OPUS_APPLICATION_VOIP defaults to MODE_SILK_ONLY */ chan = (st->channels==2) && !st->force_mono;
if (st->signal_type == OPUS_SIGNAL_MUSIC && mono_rate > threshold) mode_voice = mode_thresholds[chan][0];
st->mode = MODE_CELT_ONLY; mode_music = mode_thresholds[chan][1];
else threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14);
st->mode = MODE_SILK_ONLY;
} else {/* OPUS_APPLICATION_AUDIO */
opus_int32 threshold;
/* SILK/CELT threshold is higher for voice than for music */
threshold = 36000;
/* OPUS_APPLICATION_AUDIO disables the high-pass */
if (st->signal_type == OPUS_SIGNAL_MUSIC)
threshold -= 20000;
else if (st->signal_type == OPUS_SIGNAL_VOICE)
threshold += 8000;
/* Hysteresis */ /* Hysteresis */
if (st->prev_mode == MODE_CELT_ONLY) if (st->prev_mode == MODE_CELT_ONLY)
threshold -= 4000; threshold -= 4000;
else if (st->prev_mode>0) else if (st->prev_mode>0)
threshold += 4000; threshold += 4000;
if (mono_rate>threshold) st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY;
st->mode = MODE_CELT_ONLY;
else
st->mode = MODE_SILK_ONLY;
} }
#endif #endif
/* Override the chosen mode to make sure we meet the requested frame size */ /* Override the chosen mode to make sure we meet the requested frame size */
if (st->mode == MODE_CELT_ONLY && frame_size > st->Fs/50) if (st->mode == MODE_CELT_ONLY && frame_size > st->Fs/50)
st->mode = MODE_SILK_ONLY; st->mode = MODE_SILK_ONLY;
@ -491,10 +498,24 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
/* Automatic (rate-dependent) bandwidth selection */ /* Automatic (rate-dependent) bandwidth selection */
if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch) if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch)
{ {
const int *bandwidth_thresholds; const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds;
opus_int32 bandwidth_thresholds[8];
int bandwidth = OPUS_BANDWIDTH_FULLBAND; int bandwidth = OPUS_BANDWIDTH_FULLBAND;
bandwidth_thresholds = st->mode == MODE_CELT_ONLY ? audio_bandwidth_thresholds : voice_bandwidth_thresholds; if (st->channels==2 && !st->force_mono)
{
voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds;
music_bandwidth_thresholds = stereo_music_bandwidth_thresholds;
} else {
voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds;
music_bandwidth_thresholds = mono_music_bandwidth_thresholds;
}
/* Interpolate bandwidth thresholds depending on voice estimation */
for (i=0;i<8;i++)
{
bandwidth_thresholds[i] = music_bandwidth_thresholds[i]
+ ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14);
}
do { do {
int threshold, hysteresis; int threshold, hysteresis;
threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)]; threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)];
@ -506,7 +527,7 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
else else
threshold += hysteresis; threshold += hysteresis;
} }
if (mono_rate >= threshold) if (equiv_rate >= threshold)
break; break;
} while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND); } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND);
st->bandwidth = bandwidth; st->bandwidth = bandwidth;
@ -545,6 +566,7 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
if (st->mode == MODE_HYBRID && st->bandwidth <= OPUS_BANDWIDTH_WIDEBAND) if (st->mode == MODE_HYBRID && st->bandwidth <= OPUS_BANDWIDTH_WIDEBAND)
st->mode = MODE_SILK_ONLY; st->mode = MODE_SILK_ONLY;
/* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, st->bandwidth); */
bytes_target = st->bitrate_bps * frame_size / (st->Fs * 8) - 1; bytes_target = st->bitrate_bps * frame_size / (st->Fs * 8) - 1;
data += 1; data += 1;
@ -1040,7 +1062,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
case OPUS_SET_VOICE_RATIO_REQUEST: case OPUS_SET_VOICE_RATIO_REQUEST:
{ {
opus_int32 value = va_arg(ap, opus_int32); opus_int32 value = va_arg(ap, opus_int32);
if (value>100 || value<0) if (value>100 || value<-1)
goto bad_arg; goto bad_arg;
st->voice_ratio = value; st->voice_ratio = value;
} }

View file

@ -116,6 +116,7 @@ int main(int argc, char *argv[])
int encode_only=0, decode_only=0; int encode_only=0, decode_only=0;
int max_frame_size = 960*6; int max_frame_size = 960*6;
int curr_read=0; int curr_read=0;
int sweep_bps = 0;
if (argc < 7 ) if (argc < 7 )
{ {
@ -223,6 +224,9 @@ int main(int argc, char *argv[])
} else if( STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-loss" ) == 0 ) { } else if( STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-loss" ) == 0 ) {
packet_loss_perc = atoi( argv[ args + 1 ] ); packet_loss_perc = atoi( argv[ args + 1 ] );
args += 2; args += 2;
} else if( STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-sweep" ) == 0 ) {
sweep_bps = atoi( argv[ args + 1 ] );
args += 2;
} else { } else {
printf( "Error: unrecognized setting: %s\n\n", argv[ args ] ); printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
print_usage( argv ); print_usage( argv );
@ -363,6 +367,14 @@ int main(int argc, char *argv[])
} }
len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes); len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes);
if (sweep_bps!=0)
{
bitrate_bps += sweep_bps;
/* safety */
if (bitrate_bps<1000)
bitrate_bps = 1000;
opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
}
opus_encoder_ctl(enc, OPUS_GET_FINAL_RANGE(&enc_final_range[toggle])); opus_encoder_ctl(enc, OPUS_GET_FINAL_RANGE(&enc_final_range[toggle]));
if (len[toggle] < 0) if (len[toggle] < 0)
{ {