From 132ed59464dccaae8f1e2f13f168763e99f85d17 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Fri, 4 Nov 2016 11:41:13 -0400 Subject: [PATCH] Removes OPUS_FRAMESIZE_VARIABLE That experiment never actually worked --- celt/celt.h | 3 - celt/celt_encoder.c | 16 +- include/opus_defines.h | 2 - src/analysis.h | 1 - src/opus_demo.c | 45 +++-- src/opus_encoder.c | 298 +-------------------------------- src/opus_multistream_encoder.c | 27 +-- src/opus_private.h | 8 - 8 files changed, 30 insertions(+), 370 deletions(-) diff --git a/celt/celt.h b/celt/celt.h index 1ae67062..863a0644 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -74,9 +74,6 @@ typedef struct { /* Encoder/decoder Requests */ -/* Expose this option again when variable framesize actually works */ -#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */ - #define CELT_SET_PREDICTION_REQUEST 10002 /** Controls the use of interframe prediction. diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index de3053aa..77de1b6e 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -73,7 +73,6 @@ struct OpusCustomEncoder { int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */ int loss_rate; int lsb_depth; - int variable_duration; int lfe; int disable_inv; int arch; @@ -1220,7 +1219,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity, int constrained_vbr, opus_val16 stereo_saving, int tot_boost, opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth, - int variable_duration, int lfe, int has_surround_mask, opus_val16 surround_masking, + int lfe, int has_surround_mask, opus_val16 surround_masking, opus_val16 temporal_vbr) { /* The target rate in 8th bits per frame */ @@ -1264,8 +1263,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */ target += tot_boost-(16<lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && !hybrid && !silence && !st->disable_pf - && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE); + && st->complexity >= 5; prefilter_tapset = st->tapset_decision; pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes); @@ -1990,7 +1988,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate, st->lastCodedBands, C, st->intensity, st->constrained_vbr, st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth, - st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking, + st->lfe, st->energy_mask!=NULL, surround_masking, temporal_vbr); } else { target = base_target; @@ -2409,12 +2407,6 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) *value=st->lsb_depth; } break; - case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->variable_duration = value; - } - break; case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST: { opus_int32 value = va_arg(ap, opus_int32); diff --git a/include/opus_defines.h b/include/opus_defines.h index 38a81432..33c5acdb 100644 --- a/include/opus_defines.h +++ b/include/opus_defines.h @@ -573,7 +573,6 @@ extern "C" { *
OPUS_FRAMESIZE_80_MS
Use 80 ms frames.
*
OPUS_FRAMESIZE_100_MS
Use 100 ms frames.
*
OPUS_FRAMESIZE_120_MS
Use 120 ms frames.
- *
OPUS_FRAMESIZE_VARIABLE
Optimize the frame size dynamically.
* * @hideinitializer */ #define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x) @@ -591,7 +590,6 @@ extern "C" { *
OPUS_FRAMESIZE_80_MS
Use 80 ms frames.
*
OPUS_FRAMESIZE_100_MS
Use 100 ms frames.
*
OPUS_FRAMESIZE_120_MS
Use 120 ms frames.
- *
OPUS_FRAMESIZE_VARIABLE
Optimize the frame size dynamically.
* * @hideinitializer */ #define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x) diff --git a/src/analysis.h b/src/analysis.h index 9eae56a5..86bd6340 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -62,7 +62,6 @@ typedef struct { int last_music; int last_transition; int count; - float subframe_mem[3]; int analysis_offset; /** Probability of having speech for time i to DETECT_SIZE-1 (and music before). pspeech[0] is the probability that all frames in the window are speech. */ diff --git a/src/opus_demo.c b/src/opus_demo.c index e5998a12..df9e70de 100644 --- a/src/opus_demo.c +++ b/src/opus_demo.c @@ -415,10 +415,6 @@ int main(int argc, char *argv[]) check_encoder_option(decode_only, "-cvbr"); cvbr = 1; args++; - } else if( strcmp( argv[ args ], "-variable-duration" ) == 0 ) { - check_encoder_option(decode_only, "-variable-duration"); - variable_duration = OPUS_FRAMESIZE_VARIABLE; - args++; } else if( strcmp( argv[ args ], "-delayed-decision" ) == 0 ) { check_encoder_option(decode_only, "-delayed-decision"); delayed_decision = 1; @@ -606,28 +602,25 @@ int main(int argc, char *argv[]) } if(delayed_decision) { - if (variable_duration!=OPUS_FRAMESIZE_VARIABLE) - { - if (frame_size==sampling_rate/400) - variable_duration = OPUS_FRAMESIZE_2_5_MS; - else if (frame_size==sampling_rate/200) - variable_duration = OPUS_FRAMESIZE_5_MS; - else if (frame_size==sampling_rate/100) - variable_duration = OPUS_FRAMESIZE_10_MS; - else if (frame_size==sampling_rate/50) - variable_duration = OPUS_FRAMESIZE_20_MS; - else if (frame_size==sampling_rate/25) - variable_duration = OPUS_FRAMESIZE_40_MS; - else if (frame_size==3*sampling_rate/50) - variable_duration = OPUS_FRAMESIZE_60_MS; - else if (frame_size==4*sampling_rate/50) - variable_duration = OPUS_FRAMESIZE_80_MS; - else if (frame_size==5*sampling_rate/50) - variable_duration = OPUS_FRAMESIZE_100_MS; - else - variable_duration = OPUS_FRAMESIZE_120_MS; - opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration)); - } + if (frame_size==sampling_rate/400) + variable_duration = OPUS_FRAMESIZE_2_5_MS; + else if (frame_size==sampling_rate/200) + variable_duration = OPUS_FRAMESIZE_5_MS; + else if (frame_size==sampling_rate/100) + variable_duration = OPUS_FRAMESIZE_10_MS; + else if (frame_size==sampling_rate/50) + variable_duration = OPUS_FRAMESIZE_20_MS; + else if (frame_size==sampling_rate/25) + variable_duration = OPUS_FRAMESIZE_40_MS; + else if (frame_size==3*sampling_rate/50) + variable_duration = OPUS_FRAMESIZE_60_MS; + else if (frame_size==4*sampling_rate/50) + variable_duration = OPUS_FRAMESIZE_80_MS; + else if (frame_size==5*sampling_rate/50) + variable_duration = OPUS_FRAMESIZE_100_MS; + else + variable_duration = OPUS_FRAMESIZE_120_MS; + opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration)); frame_size = 2*48000; } while (!stop) diff --git a/src/opus_encoder.c b/src/opus_encoder.c index b5f739c9..4c84efec 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -571,215 +571,6 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m return st->user_bitrate_bps; } -#ifndef DISABLE_FLOAT_API -/* Don't use more than 60 ms for the frame size analysis */ -#define MAX_DYNAMIC_FRAMESIZE 24 -/* Estimates how much the bitrate will be boosted based on the sub-frame energy */ -static float transient_boost(const float *E, const float *E_1, int LM, int maxM) -{ - int i; - int M; - float sumE=0, sumE_1=0; - float metric; - - M = IMIN(maxM, (1<10 ? 1 : 0;*/ - /*return MAX16(0,1-exp(-.25*(metric-2.)));*/ - return MIN16(1,(float)sqrt(MAX16(0,.05f*(metric-2)))); -} - -/* Viterbi decoding trying to find the best frame size combination using look-ahead - - State numbering: - 0: unused - 1: 2.5 ms - 2: 5 ms (#1) - 3: 5 ms (#2) - 4: 10 ms (#1) - 5: 10 ms (#2) - 6: 10 ms (#3) - 7: 10 ms (#4) - 8: 20 ms (#1) - 9: 20 ms (#2) - 10: 20 ms (#3) - 11: 20 ms (#4) - 12: 20 ms (#5) - 13: 20 ms (#6) - 14: 20 ms (#7) - 15: 20 ms (#8) -*/ -static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate) -{ - int i; - float cost[MAX_DYNAMIC_FRAMESIZE][16]; - int states[MAX_DYNAMIC_FRAMESIZE][16]; - float best_cost; - int best_state; - float factor; - /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */ - if (rate<80) - factor=0; - else if (rate>160) - factor=1; - else - factor = (rate-80.f)/80.f; - /* Makes variable framesize less aggressive at lower bitrates, but I can't - find any valid theoretical justification for this (other than it seems - to help) */ - for (i=0;i<16;i++) - { - /* Impossible state */ - states[0][i] = -1; - cost[0][i] = 1e10; - } - for (i=0;i<4;i++) - { - cost[0][1<=0;i--) - { - /*printf("%d ", best_state);*/ - best_state = states[i][best_state]; - } - /*printf("%d\n", best_state);*/ - return best_state; -} - -static int optimize_framesize(const void *x, int len, int C, opus_int32 Fs, - int bitrate, opus_val16 tonality, float *mem, int buffering, - downmix_func downmix) -{ - int N; - int i; - float e[MAX_DYNAMIC_FRAMESIZE+4]; - float e_1[MAX_DYNAMIC_FRAMESIZE+3]; - opus_val32 memx; - int bestLM=0; - int subframe; - int pos; - int offset; - VARDECL(opus_val32, sub); - - subframe = Fs/400; - ALLOC(sub, subframe, opus_val32); - e[0]=mem[0]; - e_1[0]=1.f/(EPSILON+mem[0]); - if (buffering) - { - /* Consider the CELT delay when not in restricted-lowdelay */ - /* We assume the buffering is between 2.5 and 5 ms */ - offset = 2*subframe - buffering; - celt_assert(offset>=0 && offset <= subframe); - len -= offset; - e[1]=mem[1]; - e_1[1]=1.f/(EPSILON+mem[1]); - e[2]=mem[2]; - e_1[2]=1.f/(EPSILON+mem[2]); - pos = 3; - } else { - pos=1; - offset=0; - } - N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE); - /* Just silencing a warning, it's really initialized later */ - memx = 0; - for (i=0;i= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_120_MS) { if (variable_duration <= OPUS_FRAMESIZE_40_MS) @@ -882,39 +671,6 @@ opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_ return new_size; } -opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, - int variable_duration, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, downmix_func downmix -#ifndef DISABLE_FLOAT_API - , float *subframe_mem -#endif - ) -{ -#ifndef DISABLE_FLOAT_API - if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) - { - int LM = 3; - LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps, - 0, subframe_mem, delay_compensation, downmix); - while ((Fs/400<frame_size) - LM--; - frame_size = (Fs/400<use_vbr) { - opus_int32 bonus=0; -#ifndef DISABLE_FLOAT_API - if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) - { - bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); - if (analysis_info.valid) - bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality)); - } -#endif celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1)); celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint)); - celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus)); + celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps)); } } } @@ -2420,17 +2167,10 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra { int i, ret; int frame_size; - int delay_compensation; VARDECL(opus_int16, in); ALLOC_STACK; - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_float, st->analysis.subframe_mem); + frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); ALLOC(in, frame_size*st->channels, opus_int16); @@ -2447,18 +2187,7 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram unsigned char *data, opus_int32 out_data_bytes) { int frame_size; - int delay_compensation; - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_int -#ifndef DISABLE_FLOAT_API - , st->analysis.subframe_mem -#endif - ); + frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); } @@ -2469,17 +2198,10 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram { int i, ret; int frame_size; - int delay_compensation; VARDECL(float, in); ALLOC_STACK; - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_int, st->analysis.subframe_mem); + frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); ALLOC(in, frame_size*st->channels, float); @@ -2494,14 +2216,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra unsigned char *data, opus_int32 out_data_bytes) { int frame_size; - int delay_compensation; - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_float, st->analysis.subframe_mem); + frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); } @@ -2865,8 +2580,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS && value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS && value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_80_MS && - value != OPUS_FRAMESIZE_100_MS && value != OPUS_FRAMESIZE_120_MS && - value != OPUS_FRAMESIZE_VARIABLE) + value != OPUS_FRAMESIZE_100_MS && value != OPUS_FRAMESIZE_120_MS) { goto bad_arg; } diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index f0b9ae42..ed93c898 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -87,7 +87,6 @@ struct OpusMSEncoder { int variable_duration; MappingType mapping_type; opus_int32 bitrate_bps; - float subframe_mem[3]; /* Encoder states go here */ /* then opus_val32 window_mem[channels*120]; */ /* then opus_val32 preemph_mem[channels]; */ @@ -461,7 +460,6 @@ static int opus_multistream_encoder_init_impl( st->layout.nb_channels = channels; st->layout.nb_streams = streams; st->layout.nb_coupled_streams = coupled_streams; - st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0; if (mapping_type != MAPPING_TYPE_SURROUND) st->lfe_stream = -1; st->bitrate_bps = OPUS_AUTO; @@ -729,14 +727,6 @@ static void surround_rate_allocation( + coupled_ratio*nb_coupled /* stereo */ + nb_lfe*lfe_ratio; channel_rate = 256*(opus_int64)(bitrate - lfe_offset*nb_lfe - stream_offset*(nb_coupled+nb_uncoupled) - channel_offset*nb_normal)/total; -#ifndef FIXED_POINT - if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) - { - opus_int32 bonus; - bonus = 60*(Fs/frame_size-50); - channel_rate += bonus; - } -#endif for (i=0;ilayout.nb_streams;i++) { @@ -788,14 +778,6 @@ static void ambisonics_rate_allocation( total_rate * rate_ratio_den / (rate_ratio_den*num_channels + rate_ratio_num - rate_ratio_den); -#ifndef FIXED_POINT - if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) - { - opus_int32 bonus = 60*(Fs/frame_size-50); - non_mono_rate += bonus; - } -#endif - rate[0] = total_rate - (num_channels - 1) * non_mono_rate; for (i=1;ilayout.nb_streams;i++) { @@ -887,13 +869,7 @@ static int opus_multistream_encode_native opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation)); delay_compensation -= Fs/400; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->layout.nb_channels, Fs, st->bitrate_bps, - delay_compensation, downmix -#ifndef DISABLE_FLOAT_API - , st->subframe_mem -#endif - ); + frame_size = frame_size_select(analysis_frame_size, st->variable_duration, Fs); } if (400*frame_size < Fs) @@ -1346,7 +1322,6 @@ int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...) case OPUS_RESET_STATE: { int s; - st->subframe_mem[0] = st->subframe_mem[1] = st->subframe_mem[2] = 0; if (st->mapping_type == MAPPING_TYPE_SURROUND) { OPUS_CLEAR(ms_get_preemph_mem(st), st->layout.nb_channels); diff --git a/src/opus_private.h b/src/opus_private.h index 3b62eed0..a731cc55 100644 --- a/src/opus_private.h +++ b/src/opus_private.h @@ -92,14 +92,6 @@ int encode_size(int size, unsigned char *data); opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); -opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, - int variable_duration, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, downmix_func downmix -#ifndef DISABLE_FLOAT_API - , float *subframe_mem -#endif - ); - opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,