From 1b72386a7c44857fe5b35fbfbce6b11c367f0e10 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Thu, 25 Apr 2013 21:34:04 -0400 Subject: [PATCH 1/7] Initial surround code with new API Conflicts: src/opus_multistream_encoder.c --- celt/celt.h | 2 + include/opus_multistream.h | 28 +++++ src/opus_multistream_encoder.c | 207 ++++++++++++++++++++++++++++----- 3 files changed, 211 insertions(+), 26 deletions(-) diff --git a/celt/celt.h b/celt/celt.h index 614698f0..a1c2c480 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -107,6 +107,8 @@ typedef struct { #define CELT_SET_ANALYSIS_REQUEST 10022 #define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x) +#define OPUS_SET_LFE_REQUEST 10022 +#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x) /* Encoder stuff */ diff --git a/include/opus_multistream.h b/include/opus_multistream.h index 658067f7..ae599793 100644 --- a/include/opus_multistream.h +++ b/include/opus_multistream.h @@ -205,6 +205,12 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_encoder_get_size int coupled_streams ); +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_surround_encoder_get_size( + int channels, + int mapping_family +); + + /** Allocates and initializes a multistream encoder state. * Call opus_multistream_encoder_destroy() to release * this object when finished. @@ -258,6 +264,17 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_encoder_crea int *error ) OPUS_ARG_NONNULL(5); +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_surround_encoder_create( + opus_int32 Fs, + int channels, + int mapping_family, + int *streams, + int *coupled_streams, + unsigned char *mapping, + int application, + int *error +) OPUS_ARG_NONNULL(5); + /** Initialize a previously allocated multistream encoder state. * The memory pointed to by \a st must be at least the size returned by * opus_multistream_encoder_get_size(). @@ -316,6 +333,17 @@ OPUS_EXPORT int opus_multistream_encoder_init( int application ) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); +OPUS_EXPORT int opus_multistream_surround_encoder_init( + OpusMSEncoder *st, + opus_int32 Fs, + int channels, + int mapping_family, + int *streams, + int *coupled_streams, + unsigned char *mapping, + int application +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); + /** Encodes a multistream Opus frame. * @param st OpusMSEncoder*: Multistream encoder state. * @param[in] pcm const opus_int16*: The input signal as interleaved diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 08dff363..b3e0ccc4 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -41,6 +41,7 @@ struct OpusMSEncoder { TonalityAnalysisState analysis; ChannelLayout layout; + int lfe_stream; int variable_duration; opus_int32 bitrate_bps; opus_val32 subframe_mem[3]; @@ -81,16 +82,38 @@ opus_int32 opus_multistream_encoder_get_size(int nb_streams, int nb_coupled_stre + (nb_streams-nb_coupled_streams) * align(mono_size); } +opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_family) +{ + int nb_streams; + int nb_coupled_streams; + + if (channels==1 && mapping_family<=1) + { + nb_streams = 1; + nb_coupled_streams=0; + } else if (channels==2 && mapping_family<=1) + { + nb_streams = 1; + nb_coupled_streams=1; + } else if (channels==6 && mapping_family==1) + { + nb_streams = 4; + nb_coupled_streams=2; + } else + return 0; + return opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); +} -int opus_multistream_encoder_init( +static int opus_multistream_encoder_init_impl( OpusMSEncoder *st, opus_int32 Fs, int channels, int streams, int coupled_streams, const unsigned char *mapping, - int application + int application, + int surround ) { int coupled_size; @@ -107,7 +130,8 @@ int opus_multistream_encoder_init( st->layout.nb_coupled_streams = coupled_streams; st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0; OPUS_CLEAR(&st->analysis,1); - + if (!surround) + st->lfe_stream = -1; st->bitrate_bps = OPUS_AUTO; st->variable_duration = OPUS_FRAMESIZE_ARG; for (i=0;ilayout.nb_channels;i++) @@ -121,18 +145,75 @@ int opus_multistream_encoder_init( for (i=0;ilayout.nb_coupled_streams;i++) { ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 2, application); + if (i==st->lfe_stream) + opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1)); if(ret!=OPUS_OK)return ret; ptr += align(coupled_size); } for (;ilayout.nb_streams;i++) { ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 1, application); + if (i==st->lfe_stream) + opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1)); if(ret!=OPUS_OK)return ret; ptr += align(mono_size); } return OPUS_OK; } +int opus_multistream_encoder_init( + OpusMSEncoder *st, + opus_int32 Fs, + int channels, + int streams, + int coupled_streams, + const unsigned char *mapping, + int application +) +{ + return opus_multistream_encoder_init_impl(st, Fs, channels, streams, coupled_streams, mapping, application, 0); +} + +int opus_multistream_surround_encoder_init( + OpusMSEncoder *st, + opus_int32 Fs, + int channels, + int mapping_family, + int *streams, + int *coupled_streams, + unsigned char *mapping, + int application +) +{ + st->lfe_stream = -1; + if (channels==1 && mapping_family<=1) + { + *streams=1; + *coupled_streams=0; + mapping[0]=0; + } else if (channels==2 && mapping_family<=1) + { + *streams=1; + *coupled_streams=1; + mapping[0]=0; + mapping[1]=1; + } else if (channels==6 && mapping_family==1) + { + *streams=4; + *coupled_streams=2; + mapping[0]=0; + mapping[1]=4; + mapping[2]=1; + mapping[3]=2; + mapping[4]=3; + mapping[5]=5; + st->lfe_stream = 3; + } else + return OPUS_BAD_ARG; + opus_multistream_encoder_init_impl(st, Fs, channels, *streams, *coupled_streams, mapping, application, 1); + return OPUS_OK; +} + OpusMSEncoder *opus_multistream_encoder_create( opus_int32 Fs, int channels, @@ -170,6 +251,43 @@ OpusMSEncoder *opus_multistream_encoder_create( return st; } +OpusMSEncoder *opus_multistream_surround_encoder_create( + opus_int32 Fs, + int channels, + int mapping_family, + int *streams, + int *coupled_streams, + unsigned char *mapping, + int application, + int *error +) +{ + int ret; + OpusMSEncoder *st; + if ((channels>255) || (channels<1)) + { + if (error) + *error = OPUS_BAD_ARG; + return NULL; + } + st = (OpusMSEncoder *)opus_alloc(opus_multistream_surround_encoder_get_size(channels, mapping_family)); + if (st==NULL) + { + if (error) + *error = OPUS_ALLOC_FAIL; + return NULL; + } + ret = opus_multistream_surround_encoder_init(st, Fs, channels, mapping_family, streams, coupled_streams, mapping, application); + if (ret != OPUS_OK) + { + opus_free(st); + st = NULL; + } + if (error) + *error = ret; + return st; +} + typedef void (*opus_copy_channel_in_func)( opus_val16 *dst, int dst_stride, @@ -179,6 +297,62 @@ typedef void (*opus_copy_channel_in_func)( int frame_size ); +static void surround_rate_allocation( + OpusMSEncoder *st, + opus_int32 *rate, + int frame_size + ) +{ + int i; + opus_int32 channel_rate; + opus_int32 Fs; + char *ptr; + int coupled_ratio; /* Q8 */ + int lfe_ratio; /* Q8 */ + + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); + + /* Should depend on the bitrate, for now we assume coupled streams get 60% more bits than mono */ + coupled_ratio = 410; + /* Should depend on the bitrate, for now we assume LFE gets 1/12 the bits of mono */ + lfe_ratio = 32; + + /* Compute bitrate allocation between streams */ + if (st->bitrate_bps==OPUS_AUTO) + { + channel_rate = Fs+60*Fs/frame_size; + } else if (st->bitrate_bps==OPUS_BITRATE_MAX) + { + channel_rate = 300000; + } else { + int total = ((st->layout.nb_streams-st->layout.nb_coupled_streams-(st->lfe_stream!=-1))<<8) /* mono */ + + coupled_ratio*st->layout.nb_coupled_streams /* stereo */ + + (st->lfe_stream!=-1)*lfe_ratio; + channel_rate = 256*st->bitrate_bps/total; + } +#ifndef FIXED_POINT + if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) + { + opus_int32 bonus; + bonus = 60*(Fs/frame_size-50); + channel_rate += bonus; + } +#endif + + for (i=0;ilayout.nb_streams;i++) + { + if (ilayout.nb_coupled_streams) + rate[i] = channel_rate*coupled_ratio>>8; + else if (i!=st->lfe_stream) + rate[i] = channel_rate; + else + rate[i] = channel_rate*lfe_ratio>>8; + } + + +} + /* Max size in case the encoder decides to return three frames */ #define MS_FRAME_TMP (3*1275+7) static int opus_multistream_encode_native @@ -205,12 +379,10 @@ static int opus_multistream_encode_native VARDECL(opus_val16, buf); unsigned char tmp_data[MS_FRAME_TMP]; OpusRepacketizer rp; - int orig_frame_size; - int coded_channels; - opus_int32 channel_rate; opus_int32 complexity; AnalysisInfo analysis_info; const CELTMode *celt_mode; + opus_int32 bitrates[256]; ALLOC_STACK; ptr = (char*)st + align(sizeof(OpusMSEncoder)); @@ -223,7 +395,6 @@ static int opus_multistream_encode_native RESTORE_STACK; return OPUS_BAD_ARG; } - orig_frame_size = IMIN(frame_size,Fs/50); #ifndef FIXED_POINT analysis_info.valid = 0; if (complexity >= 7 && Fs==48000) @@ -262,24 +433,8 @@ static int opus_multistream_encode_native } /* Compute bitrate allocation between streams (this could be a lot better) */ - coded_channels = st->layout.nb_streams + st->layout.nb_coupled_streams; - if (st->bitrate_bps==OPUS_AUTO) - { - channel_rate = Fs+60*Fs/orig_frame_size; - } else if (st->bitrate_bps==OPUS_BITRATE_MAX) - { - channel_rate = 300000; - } else { - channel_rate = st->bitrate_bps/coded_channels; - } -#ifndef FIXED_POINT - if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) - { - opus_int32 bonus; - bonus = 60*(Fs/frame_size-50); - channel_rate += bonus; - } -#endif + surround_rate_allocation(st, bitrates, frame_size); + ptr = (char*)st + align(sizeof(OpusMSEncoder)); for (s=0;slayout.nb_streams;s++) { @@ -289,7 +444,7 @@ static int opus_multistream_encode_native ptr += align(coupled_size); else ptr += align(mono_size); - opus_encoder_ctl(enc, OPUS_SET_BITRATE(channel_rate * (s < st->layout.nb_coupled_streams ? 2 : 1))); + opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s])); } ptr = (char*)st + align(sizeof(OpusMSEncoder)); From b08c4ca3f54c964f1108170d0e4b7df2200b6a82 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Fri, 26 Apr 2013 16:32:10 -0400 Subject: [PATCH 2/7] Surround: Better LFE handling Forces CELT-only mode for LFE (despite the rate) and "locks" most of the CELT analysis: - No transient or TF - Band boost on first band - Only first two bands get PVQ bits - Forced energy decay after the first two bands --- celt/celt.h | 2 +- celt/celt_encoder.c | 29 ++++++++++++++++++++++------- celt/quant_bands.c | 14 +++++++++----- celt/quant_bands.h | 2 +- src/opus_encoder.c | 13 +++++++++++++ src/opus_multistream_encoder.c | 4 ++-- 6 files changed, 48 insertions(+), 16 deletions(-) diff --git a/celt/celt.h b/celt/celt.h index a1c2c480..a8f7cb03 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -107,7 +107,7 @@ typedef struct { #define CELT_SET_ANALYSIS_REQUEST 10022 #define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x) -#define OPUS_SET_LFE_REQUEST 10022 +#define OPUS_SET_LFE_REQUEST 10024 #define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x) /* Encoder stuff */ diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 7347cb31..a88e5922 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -74,6 +74,7 @@ struct OpusCustomEncoder { int loss_rate; int lsb_depth; int variable_duration; + int lfe; /* Everything beyond this point gets cleared on a reset */ #define ENCODER_RESET_START rng @@ -869,7 +870,7 @@ static int stereo_analysis(const CELTMode *m, const celt_norm *X, static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, - int effectiveBytes, opus_int32 *tot_boost_) + int effectiveBytes, opus_int32 *tot_boost_, int lfe) { int i, c; opus_int32 tot_boost=0; @@ -897,7 +898,7 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]); } while (++c 50 && LM>=1) + if (effectiveBytes > 50 && LM>=1 && !lfe) { int last=0; c=0;do @@ -1356,7 +1357,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, isTransient = 0; shortBlocks = 0; - if (st->complexity >= 1) + if (st->complexity >= 1 && !st->lfe) { isTransient = transient_analysis(in, N+st->overlap, CC, &tf_estimate, &tf_chan); @@ -1429,7 +1430,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(tf_res, nbEBands, int); /* Disable variable tf resolution for hybrid and at very low bitrate */ - if (effectiveBytes>=15*C && st->start==0 && st->complexity>=2) + if (effectiveBytes>=15*C && st->start==0 && st->complexity>=2 && !st->lfe) { int lambda; if (effectiveBytes<40) @@ -1455,7 +1456,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, quant_coarse_energy(mode, st->start, st->end, effEnd, bandLogE, oldBandE, total_bits, error, enc, C, LM, nbAvailableBytes, st->force_intra, - &st->delayedIntra, st->complexity >= 4, st->loss_rate); + &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe); tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc); @@ -1494,7 +1495,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets, st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, - eBands, LM, effectiveBytes, &tot_boost); + eBands, LM, effectiveBytes, &tot_boost, st->lfe); + /* For LFE, everything interesting is in the first band */ + if (st->lfe) + offsets[0] = IMIN(8, effectiveBytes/3); ALLOC(cap, nbEBands, int); init_caps(mode,cap,LM,C); @@ -1560,7 +1564,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, alloc_trim = 5; if (tell+(6<lfe) + alloc_trim = 5; + else + alloc_trim = alloc_trim_analysis(mode, X, bandLogE, st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity); ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); tell = ec_tell_frac(enc); @@ -1738,6 +1745,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (st->analysis.valid) signalBandwidth = st->analysis.bandwidth; #endif + if (st->lfe) + signalBandwidth = 1; codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); @@ -2127,6 +2136,12 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) *value=st->rng; } break; + case OPUS_SET_LFE_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->lfe = value; + } + break; default: goto bad_request; } diff --git a/celt/quant_bands.c b/celt/quant_bands.c index 514f03c4..48196bde 100644 --- a/celt/quant_bands.c +++ b/celt/quant_bands.c @@ -157,7 +157,7 @@ static int quant_coarse_energy_impl(const CELTMode *m, int start, int end, const opus_val16 *eBands, opus_val16 *oldEBands, opus_int32 budget, opus_int32 tell, const unsigned char *prob_model, opus_val16 *error, ec_enc *enc, - int C, int LM, int intra, opus_val16 max_decay) + int C, int LM, int intra, opus_val16 max_decay, int lfe) { int i, c; int badness = 0; @@ -222,6 +222,8 @@ static int quant_coarse_energy_impl(const CELTMode *m, int start, int end, if (bits_left < 16) qi = IMAX(-1, qi); } + if (lfe && i>=2) + qi = IMIN(qi, 0); if (budget-tell >= 15) { int pi; @@ -253,13 +255,13 @@ static int quant_coarse_energy_impl(const CELTMode *m, int start, int end, prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8)); } while (++c < C); } - return badness; + return lfe ? 0 : badness; } void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget, opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes, - int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate) + int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate, int lfe) { int intra; opus_val16 max_decay; @@ -289,6 +291,8 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, max_decay = MIN32(max_decay, .125f*nbAvailableBytes); #endif } + if (lfe) + max_decay=3; enc_start_state = *enc; ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16); @@ -298,7 +302,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, if (two_pass || intra) { badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, budget, - tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay); + tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay, lfe); } if (!intra) @@ -325,7 +329,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, *enc = enc_start_state; badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget, - tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay); + tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe); if (two_pass && (badness1 < badness2 || (badness1 == badness2 && ((opus_int32)ec_tell_frac(enc))+intra_bias > tell_intra))) { diff --git a/celt/quant_bands.h b/celt/quant_bands.h index b3187fad..0490bca4 100644 --- a/celt/quant_bands.h +++ b/celt/quant_bands.h @@ -51,7 +51,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget, opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra, - int two_pass, int loss_rate); + int two_pass, int loss_rate, int lfe); void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C); diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 88bf5aff..235f5573 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -78,6 +78,7 @@ struct OpusEncoder { opus_int32 user_bitrate_bps; int lsb_depth; int encoder_buffer; + int lfe; #define OPUS_ENCODER_RESET_START stream_channels int stream_channels; @@ -1234,6 +1235,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /* CELT mode doesn't support mediumband, use wideband instead */ if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; + if (st->lfe) + { + st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; + st->mode = MODE_CELT_ONLY; + } /* Can't support higher than wideband for >20 ms frames */ if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)) @@ -2203,6 +2209,13 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) st->user_forced_mode = value; } break; + case OPUS_SET_LFE_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->lfe = value; + celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); + } + break; case CELT_GET_MODE_REQUEST: { diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index b3e0ccc4..9dcbb71c 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -329,7 +329,7 @@ static void surround_rate_allocation( int total = ((st->layout.nb_streams-st->layout.nb_coupled_streams-(st->lfe_stream!=-1))<<8) /* mono */ + coupled_ratio*st->layout.nb_coupled_streams /* stereo */ + (st->lfe_stream!=-1)*lfe_ratio; - channel_rate = 256*st->bitrate_bps/total; + channel_rate = 256*(st->bitrate_bps-2000)/total; } #ifndef FIXED_POINT if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) @@ -347,7 +347,7 @@ static void surround_rate_allocation( else if (i!=st->lfe_stream) rate[i] = channel_rate; else - rate[i] = channel_rate*lfe_ratio>>8; + rate[i] = 2000+(channel_rate*lfe_ratio>>8); } From 172f66a3ac321d1c39c4341944983d3de723d199 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sat, 27 Apr 2013 02:29:52 -0400 Subject: [PATCH 3/7] More forced decisions in CELT LFE encoding --- celt/celt_encoder.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index a88e5922..6ac2457d 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -1328,7 +1328,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { int enabled; int qg; - enabled = nbAvailableBytes>12*C && st->start==0 && !silence && !st->disable_pf + enabled = (st->lfe || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration); prefilter_tapset = st->tapset_decision; @@ -1391,6 +1391,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, tf_chan = 0; compute_band_energies(mode, freq, bandE, effEnd, C, M); + if (st->lfe) + { + for (i=2;iend;i++) + bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0])); + } amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); /*for (i=0;i<21;i++) printf("%f ", bandLogE[i]); @@ -1404,7 +1409,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, /* Last chance to catch any transient we might have missed in the time-domain analysis */ - if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5) + if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe) { if (patch_transient_decision(bandLogE, oldBandE, nbEBands, st->end, C)) { @@ -1462,7 +1467,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (ec_tell(enc)+4<=total_bits) { - if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0) + if (st->lfe) + { + st->tapset_decision = 0; + st->spread_decision = SPREAD_NORMAL; + } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0) { if (st->complexity == 0) st->spread_decision = SPREAD_NONE; From 7a8b1399d24e2da23350d1ce219bdd3fcef26183 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Mon, 29 Apr 2013 18:32:27 -0400 Subject: [PATCH 4/7] Adds support for all Vorbis mappings --- src/opus_multistream_encoder.c | 97 +++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 31 deletions(-) diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 9dcbb71c..d94aa705 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -38,6 +38,24 @@ #include "os_support.h" #include "analysis.h" +typedef struct { + int nb_streams; + int nb_coupled_streams; + unsigned char mapping[8]; +} VorbisLayout; + +/* Index is nb_channel-1*/ +static const VorbisLayout vorbis_mappings[8] = { + {1, 0, {0}}, /* 1: mono */ + {1, 1, {0, 1}}, /* 2: stereo */ + {2, 1, {0, 2, 1}}, /* 3: 1-d surround */ + {2, 2, {0, 1, 2, 3}}, /* 4: quadraphonic surround */ + {3, 2, {0, 4, 1, 2, 3}}, /* 5: 5-channel surround */ + {4, 2, {0, 4, 1, 2, 3, 5}}, /* 6: 5.1 surround */ + {4, 3, {0, 4, 1, 2, 3, 5, 6}}, /* 7: 6.1 surround */ + {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */ +}; + struct OpusMSEncoder { TonalityAnalysisState analysis; ChannelLayout layout; @@ -87,18 +105,26 @@ opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_ int nb_streams; int nb_coupled_streams; - if (channels==1 && mapping_family<=1) + if (mapping_family==0) { - nb_streams = 1; + if (channels==1) + { + nb_streams=1; + nb_coupled_streams=0; + } else if (channels==2) + { + nb_streams=1; + nb_coupled_streams=1; + } else + return 0; + } else if (mapping_family==1 && channels<=8 && channels>=1) + { + nb_streams=vorbis_mappings[channels-1].nb_streams; + nb_coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams; + } else if (mapping_family==255) + { + nb_streams=channels; nb_coupled_streams=0; - } else if (channels==2 && mapping_family<=1) - { - nb_streams = 1; - nb_coupled_streams=1; - } else if (channels==6 && mapping_family==1) - { - nb_streams = 4; - nb_coupled_streams=2; } else return 0; return opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); @@ -186,30 +212,39 @@ int opus_multistream_surround_encoder_init( ) { st->lfe_stream = -1; - if (channels==1 && mapping_family<=1) + if (mapping_family==0) { - *streams=1; + if (channels==1) + { + *streams=1; + *coupled_streams=0; + mapping[0]=0; + } else if (channels==2) + { + *streams=1; + *coupled_streams=1; + mapping[0]=0; + mapping[1]=1; + } else + return OPUS_UNIMPLEMENTED; + } else if (mapping_family==1 && channels<=8 && channels>=1) + { + int i; + *streams=vorbis_mappings[channels-1].nb_streams; + *coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams; + for (i=0;i=6) + st->lfe_stream = *streams-1; + } else if (mapping_family==255) + { + int i; + *streams=channels; *coupled_streams=0; - mapping[0]=0; - } else if (channels==2 && mapping_family<=1) - { - *streams=1; - *coupled_streams=1; - mapping[0]=0; - mapping[1]=1; - } else if (channels==6 && mapping_family==1) - { - *streams=4; - *coupled_streams=2; - mapping[0]=0; - mapping[1]=4; - mapping[2]=1; - mapping[3]=2; - mapping[4]=3; - mapping[5]=5; - st->lfe_stream = 3; + for(i=0;i Date: Sat, 4 May 2013 23:54:20 -0400 Subject: [PATCH 5/7] Implements basic surround masking The idea is that the rate of each stream is adjusted based on its contribution to the total energy of a stereo downmix. --- celt/celt.h | 6 + celt/celt_encoder.c | 52 ++++++++- include/opus_defines.h | 1 + src/opus_encoder.c | 16 ++- src/opus_multistream_encoder.c | 200 +++++++++++++++++++++++++++++++-- 5 files changed, 264 insertions(+), 11 deletions(-) diff --git a/celt/celt.h b/celt/celt.h index a8f7cb03..ea8c2f95 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -110,6 +110,12 @@ typedef struct { #define OPUS_SET_LFE_REQUEST 10024 #define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x) +#define OPUS_SET_ENERGY_SAVE_REQUEST 10026 +#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x) + +#define OPUS_SET_ENERGY_MASK_REQUEST 10028 +#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x) + /* Encoder stuff */ int celt_encoder_get_size(int channels); diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 6ac2457d..2030ad08 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -109,6 +109,8 @@ struct OpusCustomEncoder { opus_val16 overlap_max; opus_val16 stereo_saving; int intensity; + opus_val16 *energy_save; + opus_val16 *energy_mask; #ifdef RESYNTH /* +MAX_PERIOD/2 to make space for overlap */ @@ -1165,6 +1167,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int secondMdct; int signalBandwidth; int transient_got_disabled=0; + opus_val16 surround_masking=0; ALLOC_STACK; mode = st->mode; @@ -1397,6 +1400,27 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0])); } amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); + if (st->energy_save) + { + opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; +#ifdef FIXED_POINT + /* Compensate for the 1/8 gain we apply in the fixed-point downshift to avoid overflows. */ + offset -= QCONST16(3.0f, DB_SHIFT); +#endif + for(i=0;ienergy_save[i]=bandLogE[i]-offset; + st->energy_save=NULL; + } + if (st->energy_mask&&!st->lfe) + { + opus_val32 mask_avg=0; + opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; + for (c=0;cend;i++) + mask_avg += bandLogE[nbEBands*c+i]-offset-st->energy_mask[nbEBands*c+i]; + surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.0f, DB_SHIFT); + surround_masking = MIN16(MAX16(surround_masking,-QCONST16(1.5f, DB_SHIFT)), 0); + } /*for (i=0;i<21;i++) printf("%f ", bandLogE[i]); printf("\n");*/ @@ -1625,7 +1649,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins); /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/ target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target), - SHR16(MULT16_16(st->stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<energy_mask&&!st->lfe) + { + opus_int32 surround_target = target + SHR32(MULT16_16(surround_masking,coded_bins<end, st->intensity, surround_target, target, st->bitrate);*/ + target = IMAX(target/4, surround_target); + } + { opus_int32 floor_depth; int bins; @@ -1660,7 +1691,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, /*printf("%f %d\n", maxDepth, floor_depth);*/ } - if (st->constrained_vbr || st->bitrate<64000) + if ((!st->energy_mask||st->lfe) && (st->constrained_vbr || st->bitrate<64000)) { opus_val16 rate_factor; #ifdef FIXED_POINT @@ -1759,7 +1790,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); - st->lastCodedBands = codedBands; + if (st->lastCodedBands) + st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands)); + else + st->lastCodedBands = codedBands; quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C); @@ -2151,6 +2185,18 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) st->lfe = value; } break; + case OPUS_SET_ENERGY_SAVE_REQUEST: + { + opus_val16 *value = va_arg(ap, opus_val16*); + st->energy_save=value; + } + break; + case OPUS_SET_ENERGY_MASK_REQUEST: + { + opus_val16 *value = va_arg(ap, opus_val16*); + st->energy_mask = value; + } + break; default: goto bad_request; } diff --git a/include/opus_defines.h b/include/opus_defines.h index 203144a7..00918b89 100644 --- a/include/opus_defines.h +++ b/include/opus_defines.h @@ -158,6 +158,7 @@ extern "C" { #define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x)) #define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr))) #define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr))) +#define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr))) /** @endcond */ /** @defgroup opus_ctlvalues Pre-defined values for CTL interface diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 235f5573..e2a6347e 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -94,6 +94,7 @@ struct OpusEncoder { int silk_bw_switch; /* Sampling rate (at the API level) */ int first; + int energy_masking; StereoWidthState width_mem; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; #ifndef FIXED_POINT @@ -1602,7 +1603,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->prev_HB_gain = HB_gain; if (st->mode != MODE_HYBRID || st->stream_channels==1) st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),IMAX(0,st->bitrate_bps-32000)); - if( st->channels == 2 ) { + if( !st->energy_masking && st->channels == 2 ) { /* Apply stereo width reduction (at low bitrates) */ if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { opus_val16 g1, g2; @@ -2216,6 +2217,19 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); } break; + case OPUS_SET_ENERGY_SAVE_REQUEST: + { + opus_val16 *value = va_arg(ap, opus_val16*); + celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_SAVE(value)); + } + break; + case OPUS_SET_ENERGY_MASK_REQUEST: + { + opus_val16 *value = va_arg(ap, opus_val16*); + st->energy_masking = (value!=NULL); + celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); + } + break; case CELT_GET_MODE_REQUEST: { diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index d94aa705..163e73c2 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -61,6 +61,7 @@ struct OpusMSEncoder { ChannelLayout layout; int lfe_stream; int variable_duration; + int surround; opus_int32 bitrate_bps; opus_val32 subframe_mem[3]; /* Encoder states go here */ @@ -104,6 +105,7 @@ opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_ { int nb_streams; int nb_coupled_streams; + opus_int32 size; if (mapping_family==0) { @@ -127,7 +129,10 @@ opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_ nb_coupled_streams=0; } else return 0; - return opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); + size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); + if (channels>2) + size += align(opus_encoder_get_size(2)); + return size; } @@ -171,9 +176,9 @@ static int opus_multistream_encoder_init_impl( for (i=0;ilayout.nb_coupled_streams;i++) { ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 2, application); + if(ret!=OPUS_OK)return ret; if (i==st->lfe_stream) opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1)); - if(ret!=OPUS_OK)return ret; ptr += align(coupled_size); } for (;ilayout.nb_streams;i++) @@ -184,6 +189,14 @@ static int opus_multistream_encoder_init_impl( if(ret!=OPUS_OK)return ret; ptr += align(mono_size); } + if (surround && st->layout.nb_channels>2) + { + OpusEncoder *downmix_enc; + downmix_enc = (OpusEncoder*)ptr; + ret = opus_encoder_init(downmix_enc, Fs, 2, OPUS_APPLICATION_AUDIO); + if(ret!=OPUS_OK)return ret; + } + st->surround = surround; return OPUS_OK; } @@ -332,6 +345,13 @@ typedef void (*opus_copy_channel_in_func)( int frame_size ); +typedef void (*opus_surround_downmix_funct)( + opus_val16 *dst, + const void *src, + int channels, + int frame_size +); + static void surround_rate_allocation( OpusMSEncoder *st, opus_int32 *rate, @@ -398,7 +418,8 @@ static int opus_multistream_encode_native int frame_size, unsigned char *data, opus_int32 max_data_bytes, - int lsb_depth + int lsb_depth, + opus_surround_downmix_funct surround_downmix #ifndef FIXED_POINT , downmix_func downmix , const void *pcm_analysis @@ -418,6 +439,8 @@ static int opus_multistream_encode_native AnalysisInfo analysis_info; const CELTMode *celt_mode; opus_int32 bitrates[256]; + opus_val16 bandLogE[42]; + opus_val16 bandLogE_mono[21]; ALLOC_STACK; ptr = (char*)st + align(sizeof(OpusMSEncoder)); @@ -461,6 +484,36 @@ static int opus_multistream_encode_native coupled_size = opus_encoder_get_size(2); mono_size = opus_encoder_get_size(1); + if (st->surround && st->layout.nb_channels>2) + { + int i; + unsigned char dummy[512]; + /* Temporary kludge -- remove */ + OpusEncoder *downmix_enc; + + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + for (s=0;slayout.nb_streams;s++) + { + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + } + downmix_enc = (OpusEncoder*)ptr; + surround_downmix(buf, pcm, st->layout.nb_channels, frame_size); + opus_encoder_ctl(downmix_enc, OPUS_SET_ENERGY_SAVE(bandLogE)); + opus_encoder_ctl(downmix_enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); + opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_CHANNELS(2)); + opus_encode_native(downmix_enc, buf, frame_size, dummy, 512, lsb_depth +#ifndef FIXED_POINT + , &analysis_info +#endif + ); + for(i=0;i<21;i++) + bandLogE_mono[i] = MAX16(bandLogE[i], bandLogE[21+i]); + } + if (max_data_bytes < 4*st->layout.nb_streams-1) { RESTORE_STACK; @@ -480,6 +533,13 @@ static int opus_multistream_encode_native else ptr += align(mono_size); opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s])); + if (st->surround) + { + opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + if (s < st->layout.nb_coupled_streams) + opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2)); + } } ptr = (char*)st + align(sizeof(OpusMSEncoder)); @@ -503,11 +563,17 @@ static int opus_multistream_encode_native (*copy_channel_in)(buf+1, 2, pcm, st->layout.nb_channels, right, frame_size); ptr += align(coupled_size); + /* FIXME: This isn't correct for the coupled center channels in + 6.1 surround configuration */ + if (st->surround) + opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); } else { int chan = get_mono_channel(&st->layout, s, -1); (*copy_channel_in)(buf, 1, pcm, st->layout.nb_channels, chan, frame_size); ptr += align(mono_size); + if (st->surround) + opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE_mono)); } /* number of bytes left (+Toc) */ curr_max = max_data_bytes - tot_size; @@ -557,6 +623,85 @@ static void opus_copy_channel_in_float( dst[i*dst_stride] = float_src[i*src_stride+src_channel]; #endif } + +static void channel_pos(int channels, int pos[8]) +{ + /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */ + if (channels==4) + { + pos[0]=1; + pos[1]=3; + pos[2]=1; + pos[3]=3; + } else if (channels==3||channels==5||channels==6) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=0; + } else if (channels==7) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=2; + pos[6]=0; + } else if (channels==8) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=1; + pos[6]=3; + pos[7]=0; + } +} + +static void opus_surround_downmix_float( + opus_val16 *dst, + const void *src, + int channels, + int frame_size +) +{ + const float *float_src; + opus_int32 i; + int pos[8] = {0}; + int c; + float_src = (const float *)src; + + channel_pos(channels, pos); + for (i=0;i<2*frame_size;i++) + dst[i]=0; + + for (c=0;clayout.nb_streams + st->layout.nb_coupled_streams; return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 24, downmix_float, pcm+channels*st->analysis.analysis_offset); + pcm, frame_size, data, max_data_bytes, 24, opus_surround_downmix_float, downmix_float, pcm+channels*st->analysis.analysis_offset); } int opus_multistream_encode( @@ -632,7 +818,7 @@ int opus_multistream_encode( { int channels = st->layout.nb_streams + st->layout.nb_coupled_streams; return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int, pcm+channels*st->analysis.analysis_offset); + pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short, downmix_int, pcm+channels*st->analysis.analysis_offset); } #endif From d2c484b5e76f0d02b804b81e30672efa771081da Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Mon, 6 May 2013 16:02:31 -0400 Subject: [PATCH 6/7] Adds spreading to the surround masking computation This avoids a single low-energy band from changing the masking results. --- celt/celt_encoder.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 2030ad08..66de1ba2 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -1411,13 +1411,23 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, st->energy_save[i]=bandLogE[i]-offset; st->energy_save=NULL; } + /* This computes how much masking takes place between surround channels */ if (st->energy_mask&&!st->lfe) { opus_val32 mask_avg=0; opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; for (c=0;cend;i++) - mask_avg += bandLogE[nbEBands*c+i]-offset-st->energy_mask[nbEBands*c+i]; + { + /* We use a simple follower to approximate the masking spreading function. */ + followE = MAX16(followE-QCONST16(1.f, DB_SHIFT), bandLogE[nbEBands*c+i]-offset); + followMask = MAX16(followMask-QCONST16(1.f, DB_SHIFT), st->energy_mask[nbEBands*c+i]); + mask_avg += followE-followMask; + } + } surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.0f, DB_SHIFT); surround_masking = MIN16(MAX16(surround_masking,-QCONST16(1.5f, DB_SHIFT)), 0); } From d66bdc73431481c924134c807e1b84f7391e0f69 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Mon, 6 May 2013 16:03:39 -0400 Subject: [PATCH 7/7] Revisit surround rate allocation Stereo now gets twice the "marginal allocation" compared to mono, but has a 20 kb/s offset. This should be more realistic across a wide range of bitrates. --- src/opus_multistream_encoder.c | 36 ++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 163e73c2..89844cbc 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -362,15 +362,22 @@ static void surround_rate_allocation( opus_int32 channel_rate; opus_int32 Fs; char *ptr; + int stream_offset; + int lfe_offset; int coupled_ratio; /* Q8 */ int lfe_ratio; /* Q8 */ ptr = (char*)st + align(sizeof(OpusMSEncoder)); opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); - /* Should depend on the bitrate, for now we assume coupled streams get 60% more bits than mono */ - coupled_ratio = 410; - /* Should depend on the bitrate, for now we assume LFE gets 1/12 the bits of mono */ + /* We start by giving each stream (coupled or uncoupled) the same bitrate. + This models the main saving of coupled channels over uncoupled. */ + stream_offset = 20000; + /* The LFE stream is an exception to the above and gets fewer bits. */ + lfe_offset = 3500; + /* Coupled streams get twice the mono rate after the first 20 kb/s. */ + coupled_ratio = 512; + /* Should depend on the bitrate, for now we assume LFE gets 1/8 the bits of mono */ lfe_ratio = 32; /* Compute bitrate allocation between streams */ @@ -381,10 +388,17 @@ static void surround_rate_allocation( { channel_rate = 300000; } else { - int total = ((st->layout.nb_streams-st->layout.nb_coupled_streams-(st->lfe_stream!=-1))<<8) /* mono */ - + coupled_ratio*st->layout.nb_coupled_streams /* stereo */ - + (st->lfe_stream!=-1)*lfe_ratio; - channel_rate = 256*(st->bitrate_bps-2000)/total; + int nb_lfe; + int nb_uncoupled; + int nb_coupled; + int total; + nb_lfe = (st->lfe_stream!=-1); + nb_coupled = st->layout.nb_coupled_streams; + nb_uncoupled = st->layout.nb_streams-nb_coupled-nb_lfe; + total = (nb_uncoupled<<8) /* mono */ + + coupled_ratio*nb_coupled /* stereo */ + + nb_lfe*lfe_ratio; + channel_rate = 256*(st->bitrate_bps-lfe_offset*nb_lfe-stream_offset*(nb_coupled+nb_uncoupled))/total; } #ifndef FIXED_POINT if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) @@ -398,14 +412,12 @@ static void surround_rate_allocation( for (i=0;ilayout.nb_streams;i++) { if (ilayout.nb_coupled_streams) - rate[i] = channel_rate*coupled_ratio>>8; + rate[i] = stream_offset+(channel_rate*coupled_ratio>>8); else if (i!=st->lfe_stream) - rate[i] = channel_rate; + rate[i] = stream_offset+channel_rate; else - rate[i] = 2000+(channel_rate*lfe_ratio>>8); + rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8); } - - } /* Max size in case the encoder decides to return three frames */