Improved mode/channel/bandwidth control mechanism
Now has tuning parameters for mono/stereo and voice/music. Also switches to stereo during swb and without reducing the bandwidth.
This commit is contained in:
parent
bafbd08db1
commit
c681bd0480
2 changed files with 103 additions and 69 deletions
|
@ -83,23 +83,43 @@ struct OpusEncoder {
|
||||||
int rangeFinal;
|
int rangeFinal;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Transition tables for the voice and audio modes. First column is the
|
/* Transition tables for the voice and music. First column is the
|
||||||
middle (memoriless) threshold. The second column is the hysteresis
|
middle (memoriless) threshold. The second column is the hysteresis
|
||||||
(difference with the middle) */
|
(difference with the middle) */
|
||||||
static const int voice_bandwidth_thresholds[10] = {
|
static const opus_int32 mono_voice_bandwidth_thresholds[8] = {
|
||||||
11000, 1000, /* NB<->MB */
|
11000, 1000, /* NB<->MB */
|
||||||
14000, 1000, /* MB<->WB */
|
14000, 1000, /* MB<->WB */
|
||||||
21000, 2000, /* WB<->SWB */
|
21000, 2000, /* WB<->SWB */
|
||||||
29000, 2000, /* SWB<->FB */
|
29000, 2000, /* SWB<->FB */
|
||||||
};
|
};
|
||||||
static const int audio_bandwidth_thresholds[10] = {
|
static const opus_int32 mono_music_bandwidth_thresholds[8] = {
|
||||||
30000, 0, /* MB not allowed */
|
14000, 1000, /* MB not allowed */
|
||||||
20000, 2000, /* MB<->WB */
|
18000, 2000, /* MB<->WB */
|
||||||
26000, 2000, /* WB<->SWB */
|
24000, 2000, /* WB<->SWB */
|
||||||
33000, 2000, /* SWB<->FB */
|
33000, 2000, /* SWB<->FB */
|
||||||
};
|
};
|
||||||
|
static const opus_int32 stereo_voice_bandwidth_thresholds[8] = {
|
||||||
|
11000, 1000, /* NB<->MB */
|
||||||
|
14000, 1000, /* MB<->WB */
|
||||||
|
21000, 2000, /* WB<->SWB */
|
||||||
|
32000, 2000, /* SWB<->FB */
|
||||||
|
};
|
||||||
|
static const opus_int32 stereo_music_bandwidth_thresholds[8] = {
|
||||||
|
14000, 1000, /* MB not allowed */
|
||||||
|
18000, 2000, /* MB<->WB */
|
||||||
|
24000, 2000, /* WB<->SWB */
|
||||||
|
48000, 2000, /* SWB<->FB */
|
||||||
|
};
|
||||||
|
/* Threshold bit-rates for switching between mono and stereo */
|
||||||
|
static const opus_int32 stereo_voice_threshold = 26000;
|
||||||
|
static const opus_int32 stereo_music_threshold = 36000;
|
||||||
|
|
||||||
|
/* Threshold bit-rate for switching between SILK/hybrid and CELT-only */
|
||||||
|
static const opus_int32 mode_thresholds[2][2] = {
|
||||||
|
/* voice */ /* music */
|
||||||
|
{ 48000, 24000}, /* mono */
|
||||||
|
{ 48000, 24000}, /* stereo */
|
||||||
|
};
|
||||||
int opus_encoder_get_size(int channels)
|
int opus_encoder_get_size(int channels)
|
||||||
{
|
{
|
||||||
int silkEncSizeBytes, celtEncSizeBytes;
|
int silkEncSizeBytes, celtEncSizeBytes;
|
||||||
|
@ -173,7 +193,7 @@ int opus_encoder_init(OpusEncoder* st, int Fs, int channels, int application)
|
||||||
st->application = application;
|
st->application = application;
|
||||||
st->signal_type = OPUS_SIGNAL_AUTO;
|
st->signal_type = OPUS_SIGNAL_AUTO;
|
||||||
st->user_bandwidth = OPUS_BANDWIDTH_AUTO;
|
st->user_bandwidth = OPUS_BANDWIDTH_AUTO;
|
||||||
st->voice_ratio = 90;
|
st->voice_ratio = -1;
|
||||||
st->encoder_buffer = st->Fs/100;
|
st->encoder_buffer = st->Fs/100;
|
||||||
|
|
||||||
st->delay_compensation = st->Fs/400;
|
st->delay_compensation = st->Fs/400;
|
||||||
|
@ -350,9 +370,10 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
|
||||||
VARDECL(opus_val16, pcm_buf);
|
VARDECL(opus_val16, pcm_buf);
|
||||||
int nb_compr_bytes;
|
int nb_compr_bytes;
|
||||||
int to_celt = 0;
|
int to_celt = 0;
|
||||||
opus_int32 mono_rate;
|
|
||||||
opus_uint32 redundant_rng = 0;
|
opus_uint32 redundant_rng = 0;
|
||||||
int cutoff_Hz, hp_freq_smth1;
|
int cutoff_Hz, hp_freq_smth1;
|
||||||
|
int voice_est;
|
||||||
|
opus_int32 equiv_rate;
|
||||||
ALLOC_STACK;
|
ALLOC_STACK;
|
||||||
|
|
||||||
st->rangeFinal = 0;
|
st->rangeFinal = 0;
|
||||||
|
@ -370,41 +391,45 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
|
||||||
else
|
else
|
||||||
st->bitrate_bps = st->user_bitrate_bps;
|
st->bitrate_bps = st->user_bitrate_bps;
|
||||||
|
|
||||||
|
/* Equivalent 20-ms rate for mode/channel/bandwidth decisions */
|
||||||
|
equiv_rate = st->bitrate_bps - 60*(st->Fs/frame_size - 50);
|
||||||
|
|
||||||
|
if (st->signal_type == OPUS_SIGNAL_VOICE)
|
||||||
|
voice_est = 127;
|
||||||
|
else if (st->signal_type == OPUS_SIGNAL_MUSIC)
|
||||||
|
voice_est = 0;
|
||||||
|
else if (st->voice_ratio >= 0)
|
||||||
|
voice_est = st->voice_ratio*327>>8;
|
||||||
|
else if (st->application == OPUS_APPLICATION_VOIP)
|
||||||
|
voice_est = 115;
|
||||||
|
else
|
||||||
|
voice_est = 64;
|
||||||
|
|
||||||
|
#ifdef FUZZING
|
||||||
|
/* Random mono/stereo decision */
|
||||||
|
if (st->channels == 2 && (rand()&0x1F)==0)
|
||||||
|
st->stream_channels = 3-st->stream_channels;
|
||||||
|
#else
|
||||||
/* Rate-dependent mono-stereo decision */
|
/* Rate-dependent mono-stereo decision */
|
||||||
if (st->force_mono)
|
if (st->force_mono)
|
||||||
{
|
{
|
||||||
st->stream_channels = 1;
|
st->stream_channels = 1;
|
||||||
} else if (st->mode == MODE_CELT_ONLY && st->channels == 2)
|
} else if (st->channels == 2)
|
||||||
{
|
{
|
||||||
opus_int32 decision_rate;
|
opus_int32 stereo_threshold;
|
||||||
decision_rate = st->bitrate_bps + st->voice_ratio*st->voice_ratio;
|
stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14);
|
||||||
/* Add some hysteresis */
|
|
||||||
if (st->stream_channels == 2)
|
if (st->stream_channels == 2)
|
||||||
decision_rate += 4000;
|
stereo_threshold -= 4000;
|
||||||
else
|
else
|
||||||
decision_rate -= 4000;
|
stereo_threshold += 4000;
|
||||||
if (decision_rate>48000)
|
st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1;
|
||||||
st->stream_channels = 2;
|
|
||||||
else
|
|
||||||
st->stream_channels = 1;
|
|
||||||
} else {
|
} else {
|
||||||
st->stream_channels = st->channels;
|
st->stream_channels = st->channels;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef FUZZING
|
|
||||||
if (st->channels == 2 && (rand()&0x1F)==0)
|
|
||||||
st->stream_channels = 3-st->stream_channels;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Equivalent bit-rate for mono */
|
|
||||||
mono_rate = st->bitrate_bps;
|
|
||||||
if (st->stream_channels==2)
|
|
||||||
mono_rate = 2*mono_rate/3;
|
|
||||||
/* Compensate for smaller frame sizes assuming an equivalent overhead
|
|
||||||
of 60 bits/frame */
|
|
||||||
mono_rate -= 60*(st->Fs/frame_size - 50);
|
|
||||||
|
|
||||||
#ifdef FUZZING
|
#ifdef FUZZING
|
||||||
|
/* Random mode switching */
|
||||||
if ((rand()&0xF)==0)
|
if ((rand()&0xF)==0)
|
||||||
{
|
{
|
||||||
if ((rand()&0x1)==0)
|
if ((rand()&0x1)==0)
|
||||||
|
@ -419,31 +444,15 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
/* Mode selection depending on application and signal type */
|
/* Mode selection depending on application and signal type */
|
||||||
if (st->application==OPUS_APPLICATION_VOIP)
|
|
||||||
{
|
{
|
||||||
|
int chan;
|
||||||
|
opus_int32 mode_voice, mode_music;
|
||||||
opus_int32 threshold;
|
opus_int32 threshold;
|
||||||
threshold = 20000;
|
|
||||||
/* OPUS_APPLICATION_VOIP default to auto high-pass */
|
|
||||||
/* Hysteresis */
|
|
||||||
if (st->prev_mode == MODE_CELT_ONLY)
|
|
||||||
threshold -= 4000;
|
|
||||||
else if (st->prev_mode>0)
|
|
||||||
threshold += 4000;
|
|
||||||
|
|
||||||
/* OPUS_APPLICATION_VOIP defaults to MODE_SILK_ONLY */
|
chan = (st->channels==2) && !st->force_mono;
|
||||||
if (st->signal_type == OPUS_SIGNAL_MUSIC && mono_rate > threshold)
|
mode_voice = mode_thresholds[chan][0];
|
||||||
st->mode = MODE_CELT_ONLY;
|
mode_music = mode_thresholds[chan][1];
|
||||||
else
|
threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14);
|
||||||
st->mode = MODE_SILK_ONLY;
|
|
||||||
} else {/* OPUS_APPLICATION_AUDIO */
|
|
||||||
opus_int32 threshold;
|
|
||||||
/* SILK/CELT threshold is higher for voice than for music */
|
|
||||||
threshold = 36000;
|
|
||||||
/* OPUS_APPLICATION_AUDIO disables the high-pass */
|
|
||||||
if (st->signal_type == OPUS_SIGNAL_MUSIC)
|
|
||||||
threshold -= 20000;
|
|
||||||
else if (st->signal_type == OPUS_SIGNAL_VOICE)
|
|
||||||
threshold += 8000;
|
|
||||||
|
|
||||||
/* Hysteresis */
|
/* Hysteresis */
|
||||||
if (st->prev_mode == MODE_CELT_ONLY)
|
if (st->prev_mode == MODE_CELT_ONLY)
|
||||||
|
@ -451,12 +460,10 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
|
||||||
else if (st->prev_mode>0)
|
else if (st->prev_mode>0)
|
||||||
threshold += 4000;
|
threshold += 4000;
|
||||||
|
|
||||||
if (mono_rate>threshold)
|
st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY;
|
||||||
st->mode = MODE_CELT_ONLY;
|
|
||||||
else
|
|
||||||
st->mode = MODE_SILK_ONLY;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Override the chosen mode to make sure we meet the requested frame size */
|
/* Override the chosen mode to make sure we meet the requested frame size */
|
||||||
if (st->mode == MODE_CELT_ONLY && frame_size > st->Fs/50)
|
if (st->mode == MODE_CELT_ONLY && frame_size > st->Fs/50)
|
||||||
st->mode = MODE_SILK_ONLY;
|
st->mode = MODE_SILK_ONLY;
|
||||||
|
@ -491,10 +498,24 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
|
||||||
/* Automatic (rate-dependent) bandwidth selection */
|
/* Automatic (rate-dependent) bandwidth selection */
|
||||||
if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch)
|
if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch)
|
||||||
{
|
{
|
||||||
const int *bandwidth_thresholds;
|
const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds;
|
||||||
|
opus_int32 bandwidth_thresholds[8];
|
||||||
int bandwidth = OPUS_BANDWIDTH_FULLBAND;
|
int bandwidth = OPUS_BANDWIDTH_FULLBAND;
|
||||||
|
|
||||||
bandwidth_thresholds = st->mode == MODE_CELT_ONLY ? audio_bandwidth_thresholds : voice_bandwidth_thresholds;
|
if (st->channels==2 && !st->force_mono)
|
||||||
|
{
|
||||||
|
voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds;
|
||||||
|
music_bandwidth_thresholds = stereo_music_bandwidth_thresholds;
|
||||||
|
} else {
|
||||||
|
voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds;
|
||||||
|
music_bandwidth_thresholds = mono_music_bandwidth_thresholds;
|
||||||
|
}
|
||||||
|
/* Interpolate bandwidth thresholds depending on voice estimation */
|
||||||
|
for (i=0;i<8;i++)
|
||||||
|
{
|
||||||
|
bandwidth_thresholds[i] = music_bandwidth_thresholds[i]
|
||||||
|
+ ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14);
|
||||||
|
}
|
||||||
do {
|
do {
|
||||||
int threshold, hysteresis;
|
int threshold, hysteresis;
|
||||||
threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)];
|
threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)];
|
||||||
|
@ -506,7 +527,7 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
|
||||||
else
|
else
|
||||||
threshold += hysteresis;
|
threshold += hysteresis;
|
||||||
}
|
}
|
||||||
if (mono_rate >= threshold)
|
if (equiv_rate >= threshold)
|
||||||
break;
|
break;
|
||||||
} while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND);
|
} while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND);
|
||||||
st->bandwidth = bandwidth;
|
st->bandwidth = bandwidth;
|
||||||
|
@ -545,6 +566,7 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
|
||||||
if (st->mode == MODE_HYBRID && st->bandwidth <= OPUS_BANDWIDTH_WIDEBAND)
|
if (st->mode == MODE_HYBRID && st->bandwidth <= OPUS_BANDWIDTH_WIDEBAND)
|
||||||
st->mode = MODE_SILK_ONLY;
|
st->mode = MODE_SILK_ONLY;
|
||||||
|
|
||||||
|
/* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, st->bandwidth); */
|
||||||
bytes_target = st->bitrate_bps * frame_size / (st->Fs * 8) - 1;
|
bytes_target = st->bitrate_bps * frame_size / (st->Fs * 8) - 1;
|
||||||
|
|
||||||
data += 1;
|
data += 1;
|
||||||
|
@ -1040,7 +1062,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
|
||||||
case OPUS_SET_VOICE_RATIO_REQUEST:
|
case OPUS_SET_VOICE_RATIO_REQUEST:
|
||||||
{
|
{
|
||||||
opus_int32 value = va_arg(ap, opus_int32);
|
opus_int32 value = va_arg(ap, opus_int32);
|
||||||
if (value>100 || value<0)
|
if (value>100 || value<-1)
|
||||||
goto bad_arg;
|
goto bad_arg;
|
||||||
st->voice_ratio = value;
|
st->voice_ratio = value;
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,6 +116,7 @@ int main(int argc, char *argv[])
|
||||||
int encode_only=0, decode_only=0;
|
int encode_only=0, decode_only=0;
|
||||||
int max_frame_size = 960*6;
|
int max_frame_size = 960*6;
|
||||||
int curr_read=0;
|
int curr_read=0;
|
||||||
|
int sweep_bps = 0;
|
||||||
|
|
||||||
if (argc < 7 )
|
if (argc < 7 )
|
||||||
{
|
{
|
||||||
|
@ -223,6 +224,9 @@ int main(int argc, char *argv[])
|
||||||
} else if( STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-loss" ) == 0 ) {
|
} else if( STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-loss" ) == 0 ) {
|
||||||
packet_loss_perc = atoi( argv[ args + 1 ] );
|
packet_loss_perc = atoi( argv[ args + 1 ] );
|
||||||
args += 2;
|
args += 2;
|
||||||
|
} else if( STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-sweep" ) == 0 ) {
|
||||||
|
sweep_bps = atoi( argv[ args + 1 ] );
|
||||||
|
args += 2;
|
||||||
} else {
|
} else {
|
||||||
printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
|
printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
|
||||||
print_usage( argv );
|
print_usage( argv );
|
||||||
|
@ -363,6 +367,14 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes);
|
len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes);
|
||||||
|
if (sweep_bps!=0)
|
||||||
|
{
|
||||||
|
bitrate_bps += sweep_bps;
|
||||||
|
/* safety */
|
||||||
|
if (bitrate_bps<1000)
|
||||||
|
bitrate_bps = 1000;
|
||||||
|
opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
|
||||||
|
}
|
||||||
opus_encoder_ctl(enc, OPUS_GET_FINAL_RANGE(&enc_final_range[toggle]));
|
opus_encoder_ctl(enc, OPUS_GET_FINAL_RANGE(&enc_final_range[toggle]));
|
||||||
if (len[toggle] < 0)
|
if (len[toggle] < 0)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue