From 0962cbe2ae535f8432fec37461ca006f113f200d Mon Sep 17 00:00:00 2001 From: Felicia Lim Date: Thu, 27 Oct 2016 17:03:36 -0700 Subject: [PATCH] Support encoding 80/100/120 ms frame lengths Signed-off-by: Jean-Marc Valin --- include/opus_defines.h | 9 +++ src/opus_demo.c | 18 ++++- src/opus_encoder.c | 129 ++++++++++++++++++++------------- src/opus_multistream_encoder.c | 12 +-- tests/test_opus_api.c | 9 +++ tests/test_opus_encode.c | 18 ++++- 6 files changed, 136 insertions(+), 59 deletions(-) diff --git a/include/opus_defines.h b/include/opus_defines.h index 140c5629..38a81432 100644 --- a/include/opus_defines.h +++ b/include/opus_defines.h @@ -209,6 +209,9 @@ extern "C" { #define OPUS_FRAMESIZE_20_MS 5004 /**< Use 20 ms frames */ #define OPUS_FRAMESIZE_40_MS 5005 /**< Use 40 ms frames */ #define OPUS_FRAMESIZE_60_MS 5006 /**< Use 60 ms frames */ +#define OPUS_FRAMESIZE_80_MS 5007 /**< Use 80 ms frames */ +#define OPUS_FRAMESIZE_100_MS 5008 /**< Use 100 ms frames */ +#define OPUS_FRAMESIZE_120_MS 5009 /**< Use 120 ms frames */ /**@}*/ @@ -567,6 +570,9 @@ extern "C" { *
OPUS_FRAMESIZE_20_MS
Use 20 ms frames.
*
OPUS_FRAMESIZE_40_MS
Use 40 ms frames.
*
OPUS_FRAMESIZE_60_MS
Use 60 ms frames.
+ *
OPUS_FRAMESIZE_80_MS
Use 80 ms frames.
+ *
OPUS_FRAMESIZE_100_MS
Use 100 ms frames.
+ *
OPUS_FRAMESIZE_120_MS
Use 120 ms frames.
*
OPUS_FRAMESIZE_VARIABLE
Optimize the frame size dynamically.
* * @hideinitializer */ @@ -582,6 +588,9 @@ extern "C" { *
OPUS_FRAMESIZE_20_MS
Use 20 ms frames.
*
OPUS_FRAMESIZE_40_MS
Use 40 ms frames.
*
OPUS_FRAMESIZE_60_MS
Use 60 ms frames.
+ *
OPUS_FRAMESIZE_80_MS
Use 80 ms frames.
+ *
OPUS_FRAMESIZE_100_MS
Use 100 ms frames.
+ *
OPUS_FRAMESIZE_120_MS
Use 120 ms frames.
*
OPUS_FRAMESIZE_VARIABLE
Optimize the frame size dynamically.
* * @hideinitializer */ diff --git a/src/opus_demo.c b/src/opus_demo.c index c8135c2e..e5998a12 100644 --- a/src/opus_demo.c +++ b/src/opus_demo.c @@ -57,7 +57,7 @@ void print_usage( char* argv[] ) fprintf(stderr, "-variable-duration : enable frames of variable duration (experimental, experts only); default: disabled\n" ); fprintf(stderr, "-delayed-decision : use look-ahead for speech/music detection (experts only); default: disabled\n" ); fprintf(stderr, "-bandwidth : audio bandwidth (from narrowband to fullband); default: sampling rate\n" ); - fprintf(stderr, "-framesize <2.5|5|10|20|40|60> : frame size in ms; default: 20 \n" ); + fprintf(stderr, "-framesize <2.5|5|10|20|40|60|80|100|120> : frame size in ms; default: 20 \n" ); fprintf(stderr, "-max_payload : maximum payload size in bytes, default: 1024\n" ); fprintf(stderr, "-complexity : complexity, 0 (lowest) ... 10 (highest); default: 10\n" ); fprintf(stderr, "-inbandfec : enable SILK inband FEC\n" ); @@ -383,9 +383,15 @@ int main(int argc, char *argv[]) frame_size = sampling_rate/25; else if (strcmp(argv[ args + 1 ], "60")==0) frame_size = 3*sampling_rate/50; + else if (strcmp(argv[ args + 1 ], "80")==0) + frame_size = 4*sampling_rate/50; + else if (strcmp(argv[ args + 1 ], "100")==0) + frame_size = 5*sampling_rate/50; + else if (strcmp(argv[ args + 1 ], "120")==0) + frame_size = 6*sampling_rate/50; else { fprintf(stderr, "Unsupported frame size: %s ms. " - "Supported are 2.5, 5, 10, 20, 40, 60.\n", + "Supported are 2.5, 5, 10, 20, 40, 60, 80, 100, 120.\n", argv[ args + 1 ]); return EXIT_FAILURE; } @@ -612,8 +618,14 @@ int main(int argc, char *argv[]) variable_duration = OPUS_FRAMESIZE_20_MS; else if (frame_size==sampling_rate/25) variable_duration = OPUS_FRAMESIZE_40_MS; - else + else if (frame_size==3*sampling_rate/50) variable_duration = OPUS_FRAMESIZE_60_MS; + else if (frame_size==4*sampling_rate/50) + variable_duration = OPUS_FRAMESIZE_80_MS; + else if (frame_size==5*sampling_rate/50) + variable_duration = OPUS_FRAMESIZE_100_MS; + else + variable_duration = OPUS_FRAMESIZE_120_MS; opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration)); } frame_size = 2*48000; diff --git a/src/opus_encoder.c b/src/opus_encoder.c index c5d5be1d..f6fa9e1f 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -115,6 +115,7 @@ struct OpusEncoder { int nb_no_activity_frames; opus_val32 peak_signal_energy; #endif + int nonfinal_frame; /* current frame is not the final in a packet */ opus_uint32 rangeFinal; }; @@ -863,14 +864,20 @@ opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_ new_size = frame_size; else if (variable_duration == OPUS_FRAMESIZE_VARIABLE) new_size = Fs/50; - else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS) - new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS)); + else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_120_MS) + { + if (variable_duration <= OPUS_FRAMESIZE_40_MS) + new_size = (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS); + else + new_size = (variable_duration-OPUS_FRAMESIZE_2_5_MS-2)*Fs/50; + } else return -1; if (new_size>frame_size) return -1; - if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs && - 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs) + if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs && + 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs && + 50*new_size!=4*Fs && 50*new_size!=5*Fs && 50*new_size!=6*Fs) return -1; return new_size; } @@ -1212,15 +1219,27 @@ static opus_int32 encode_multiframe_packet(OpusEncoder *st, VARDECL(unsigned char, tmp_data); int bak_mode, bak_bandwidth, bak_channels, bak_to_mono; VARDECL(OpusRepacketizer, rp); + int max_header_bytes; opus_int32 bytes_per_frame; opus_int32 cbr_bytes; opus_int32 repacketize_len; int tmp_len; ALLOC_STACK; - bytes_per_frame = IMIN(1276, (out_data_bytes-3)/nb_frames); - ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char); + /* Worst cases: + * 2 frames: Code 2 with different compressed sizes + * >2 frames: Code 3 VBR */ + max_header_bytes = nb_frames == 2 ? 3 : (2+(nb_frames-1)*2); + if (st->use_vbr || st->user_bitrate_bps==OPUS_BITRATE_MAX) + repacketize_len = out_data_bytes; + else { + cbr_bytes = 3*st->bitrate_bps/(3*8*st->Fs/(frame_size*nb_frames)); + repacketize_len = IMIN(cbr_bytes, out_data_bytes); + } + bytes_per_frame = IMIN(1276, 1+(repacketize_len-max_header_bytes)/nb_frames); + + ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char); ALLOC(rp, 1, OpusRepacketizer); opus_repacketizer_init(rp); @@ -1231,8 +1250,8 @@ static opus_int32 encode_multiframe_packet(OpusEncoder *st, st->user_forced_mode = st->mode; st->user_bandwidth = st->bandwidth; st->force_channels = st->stream_channels; - bak_to_mono = st->silk_mode.toMono; + bak_to_mono = st->silk_mode.toMono; if (bak_to_mono) st->force_channels = 1; else @@ -1241,6 +1260,7 @@ static opus_int32 encode_multiframe_packet(OpusEncoder *st, for (i=0;isilk_mode.toMono = 0; + st->nonfinal_frame = i<(nb_frames-1); /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ if (to_celt && i==nb_frames-1) @@ -1265,14 +1285,7 @@ static opus_int32 encode_multiframe_packet(OpusEncoder *st, } } - if (st->use_vbr) - repacketize_len = out_data_bytes; - else { - /* Multiply by 3 to avoid inexact division */ - cbr_bytes = 3*st->bitrate_bps/(3*8*st->Fs/(frame_size*nb_frames)); - repacketize_len = IMIN(cbr_bytes, out_data_bytes); - } - + /* If encoding multiframes recursively, the true number of frames is rp->nb_frames. */ ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr); if (ret<0) @@ -1338,7 +1351,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->rangeFinal = 0; if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs && - 50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs) + 50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs && 50*frame_size != 4*st->Fs && + 50*frame_size != 5*st->Fs && 50*frame_size != 6*st->Fs) || (400*frame_size < st->Fs) || max_data_bytes<=0 ) @@ -1426,10 +1440,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ { int cbrBytes; /* Multiply by 3 to make sure the division is exact. */ - int frame_rate3 = 3*st->Fs/frame_size; + int frame_rate6 = 6*st->Fs/frame_size; /* We need to make sure that "int" values always fit in 16 bits. */ - cbrBytes = IMIN( (3*st->bitrate_bps/8 + frame_rate3/2)/frame_rate3, max_data_bytes); - st->bitrate_bps = cbrBytes*(opus_int32)frame_rate3*8/3; + cbrBytes = IMIN( (6*st->bitrate_bps/8 + frame_rate6/2)/frame_rate6, max_data_bytes); + st->bitrate_bps = cbrBytes*(opus_int32)frame_rate6*8/6; /* Make sure we provide at least one byte to avoid failing. */ max_data_bytes = IMAX(1, cbrBytes); } @@ -1571,6 +1585,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->silk_mode.useDTX && voice_est > 100) st->mode = MODE_SILK_ONLY; #endif + + /* If max_data_bytes represents less than 6 kb/s, switch to CELT-only mode */ + if (max_data_bytes < (frame_rate > 50 ? 9000 : 6000)*frame_size / (st->Fs * 8)) + st->mode = MODE_CELT_ONLY; } else { st->mode = st->user_forced_mode; } @@ -1580,19 +1598,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->mode = MODE_CELT_ONLY; if (st->lfe) st->mode = MODE_CELT_ONLY; - /* If max_data_bytes represents less than 6 kb/s, switch to CELT-only mode */ - if (max_data_bytes < (frame_rate > 50 ? 9000 : 6000)*frame_size / (st->Fs * 8)) - st->mode = MODE_CELT_ONLY; - - if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0 - && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY) - { - /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */ - st->silk_mode.toMono = 1; - st->stream_channels = 2; - } else { - st->silk_mode.toMono = 0; - } if (st->prev_mode > 0 && ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) || @@ -1613,6 +1618,18 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } } + /* When encoding multiframes, we can ask for a switch to CELT only in the last frame. This switch + * is processed above as the requested mode shouldn't interrupt stereo->mono transition. */ + if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0 + && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY) + { + /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */ + st->silk_mode.toMono = 1; + st->stream_channels = 2; + } else { + st->silk_mode.toMono = 0; + } + /* Update equivalent rate with mode decision. */ equiv_rate = compute_equiv_rate(st->bitrate_bps, st->stream_channels, st->Fs/frame_size, st->use_vbr, st->mode, st->silk_mode.complexity, st->silk_mode.packetLossPercentage); @@ -1740,15 +1757,34 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->lfe) st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; - /* Can't support higher than wideband for >20 ms frames */ - if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)) + curr_bandwidth = st->bandwidth; + + /* Chooses the appropriate mode for speech + *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */ + if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND) + st->mode = MODE_HYBRID; + if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND) + st->mode = MODE_SILK_ONLY; + + /* Can't support higher than >60 ms frames, and >20 ms when in Hybrid or CELT-only modes */ + if ((frame_size > st->Fs/50 && (st->mode != MODE_SILK_ONLY)) || frame_size > 3*st->Fs/50) { int enc_frame_size; int nb_frames; - /* CELT can only support up to 20 ms */ - enc_frame_size = st->Fs/50; - nb_frames = frame_size > st->Fs/25 ? 3 : 2; + if (st->mode == MODE_SILK_ONLY) + { + if (frame_size == 2*st->Fs/25) /* 80 ms -> 2x 40 ms */ + enc_frame_size = st->Fs/25; + if (frame_size == 3*st->Fs/25) /* 120 ms -> 2x 60 ms */ + enc_frame_size = 3*st->Fs/50; + else /* 100 ms -> 5x 20 ms */ + enc_frame_size = st->Fs/50; + } + else + enc_frame_size = st->Fs/50; + + nb_frames = frame_size/enc_frame_size; #ifndef DISABLE_FLOAT_API if (analysis_read_pos_bak!= -1) @@ -1764,14 +1800,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ RESTORE_STACK; return ret; } - curr_bandwidth = st->bandwidth; - /* Chooses the appropriate mode for speech - *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */ - if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND) - st->mode = MODE_HYBRID; - if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND) - st->mode = MODE_SILK_ONLY; /* If we decided to go with CELT, make sure redundancy is off, no matter what we decided earlier. */ if (st->mode == MODE_CELT_ONLY) @@ -2017,7 +2046,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ silk_assert( st->silk_mode.internalSampleRate == 16000 ); } - st->silk_mode.opusCanSwitch = st->silk_mode.switchReady; + st->silk_mode.opusCanSwitch = st->silk_mode.switchReady && !st->nonfinal_frame; /* FIXME: How do we allocate the redundancy for CBR? */ if (st->silk_mode.opusCanSwitch) { @@ -2801,10 +2830,12 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: { opus_int32 value = va_arg(ap, opus_int32); - if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS && - value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS && - value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS && - value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_VARIABLE) + if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS && + value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS && + value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS && + value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_80_MS && + value != OPUS_FRAMESIZE_100_MS && value != OPUS_FRAMESIZE_120_MS && + value != OPUS_FRAMESIZE_VARIABLE) { goto bad_arg; } diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index ff1393a2..6ec0fc5a 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -835,8 +835,8 @@ static opus_int32 rate_allocation( return rate_sum; } -/* Max size in case the encoder decides to return three frames */ -#define MS_FRAME_TMP (3*1275+7) +/* Max size in case the encoder decides to return six frames (6 x 20 ms = 120 ms) */ +#define MS_FRAME_TMP (6*1275+12) static int opus_multistream_encode_native ( OpusMSEncoder *st, @@ -903,9 +903,11 @@ static int opus_multistream_encode_native } /* Validate frame_size before using it to allocate stack space. This mirrors the checks in opus_encode[_float](). */ - if (400*frame_size != Fs && 200*frame_size != Fs && - 100*frame_size != Fs && 50*frame_size != Fs && - 25*frame_size != Fs && 50*frame_size != 3*Fs) + if (400*frame_size != Fs && 200*frame_size != Fs && + 100*frame_size != Fs && 50*frame_size != Fs && + 25*frame_size != Fs && 50*frame_size != 3*Fs && + 50*frame_size != 4*Fs && 50*frame_size != 5*Fs && + 50*frame_size != 6*Fs) { RESTORE_STACK; return OPUS_BAD_ARG; diff --git a/tests/test_opus_api.c b/tests/test_opus_api.c index 489052d2..1d009505 100644 --- a/tests/test_opus_api.c +++ b/tests/test_opus_api.c @@ -1383,6 +1383,15 @@ opus_int32 test_enc_api(void) err=opus_encoder_ctl(enc,OPUS_SET_EXPERT_FRAME_DURATION(OPUS_FRAMESIZE_60_MS)); if(err!=OPUS_OK)test_failed(); cfgs++; + err=opus_encoder_ctl(enc,OPUS_SET_EXPERT_FRAME_DURATION(OPUS_FRAMESIZE_80_MS)); + if(err!=OPUS_OK)test_failed(); + cfgs++; + err=opus_encoder_ctl(enc,OPUS_SET_EXPERT_FRAME_DURATION(OPUS_FRAMESIZE_100_MS)); + if(err!=OPUS_OK)test_failed(); + cfgs++; + err=opus_encoder_ctl(enc,OPUS_SET_EXPERT_FRAME_DURATION(OPUS_FRAMESIZE_120_MS)); + if(err!=OPUS_OK)test_failed(); + cfgs++; CHECK_SETGET(OPUS_SET_EXPERT_FRAME_DURATION(i),OPUS_GET_EXPERT_FRAME_DURATION(&i),0,-1, OPUS_FRAMESIZE_60_MS,OPUS_FRAMESIZE_ARG, " OPUS_SET_EXPERT_FRAME_DURATION ............... OK.\n", diff --git a/tests/test_opus_encode.c b/tests/test_opus_encode.c index c49a2adf..a7439254 100644 --- a/tests/test_opus_encode.c +++ b/tests/test_opus_encode.c @@ -128,6 +128,12 @@ int get_frame_size_enum(int frame_size, int sampling_rate) frame_size_enum = OPUS_FRAMESIZE_40_MS; else if(frame_size==3*sampling_rate/50) frame_size_enum = OPUS_FRAMESIZE_60_MS; + else if(frame_size==4*sampling_rate/50) + frame_size_enum = OPUS_FRAMESIZE_80_MS; + else if(frame_size==5*sampling_rate/50) + frame_size_enum = OPUS_FRAMESIZE_100_MS; + else if(frame_size==6*sampling_rate/50) + frame_size_enum = OPUS_FRAMESIZE_120_MS; else test_failed(); @@ -189,14 +195,16 @@ void fuzz_encoder_settings(const int num_encoders, const int num_setting_changes int use_vbr[3] = {0, 1, 1}; int vbr_constraints[3] = {0, 1, 1}; int complexities[11] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - int max_bandwidths[6] = {OPUS_BANDWIDTH_NARROWBAND, OPUS_BANDWIDTH_MEDIUMBAND, OPUS_BANDWIDTH_WIDEBAND, OPUS_BANDWIDTH_SUPERWIDEBAND, OPUS_BANDWIDTH_FULLBAND, OPUS_BANDWIDTH_FULLBAND}; + int max_bandwidths[6] = {OPUS_BANDWIDTH_NARROWBAND, OPUS_BANDWIDTH_MEDIUMBAND, + OPUS_BANDWIDTH_WIDEBAND, OPUS_BANDWIDTH_SUPERWIDEBAND, + OPUS_BANDWIDTH_FULLBAND, OPUS_BANDWIDTH_FULLBAND}; int signals[4] = {OPUS_AUTO, OPUS_AUTO, OPUS_SIGNAL_VOICE, OPUS_SIGNAL_MUSIC}; int inband_fecs[3] = {0, 0, 1}; int packet_loss_perc[4] = {0, 1, 2, 5}; int lsb_depths[2] = {8, 24}; int prediction_disabled[3] = {0, 0, 1}; int use_dtx[2] = {0, 1}; - int frame_sizes_ms_x2[6] = {5, 10, 20, 40, 80, 120}; /* x2 to avoid 2.5 ms */ + int frame_sizes_ms_x2[9] = {5, 10, 20, 40, 80, 120, 160, 200, 240}; /* x2 to avoid 2.5 ms */ char debug_info[512]; for (i=0; i60 ms. + * Currently, SILK may internally adjust the bandwidth leading to mismatching + * bandwidths within a packet. */ + if (frame_size_ms_x2 > 120) + dtx = 0; + sprintf(debug_info, "fuzz_encoder_settings: %d kHz, %d ch, application: %d, " "%d bps, force ch: %d, vbr: %d, vbr constraint: %d, complexity: %d, "