From c94e4bb103e6a989bcd2677c9178ee6ef461912c Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sun, 8 Dec 2013 03:31:50 -0500 Subject: [PATCH] Optimizes encoder NaN detection and clipping by only running them when needed NaN detection should now be able to catch values that would create NaNs further down. --- celt/celt_encoder.c | 24 ++++++++---------------- src/opus_encoder.c | 29 ++++++++++++++++++++++------- src/opus_multistream_encoder.c | 13 +++++++------ src/opus_private.h | 3 ++- 4 files changed, 39 insertions(+), 30 deletions(-) diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index ab28c8cd..917f9195 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -471,9 +471,8 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES coef0 = coef[0]; m = *mem; -#ifdef FIXED_POINT - /* Fast path for fixed-point in the normal 48kHz case */ - if (coef[1] == 0 && upsample == 1) + /* Fast path for the normal 48kHz case and no clipping */ + if (coef[1] == 0 && upsample == 1 && !clip) { for (i=0;inbits_total+=tell-ec_tell(enc); } c=0; do { + int need_clip=0; +#ifndef FIXED_POINT + need_clip = st->clip && sample_max>65536.f; +#endif celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample, - mode->preemph, st->preemph_memE+c, st->clip); + mode->preemph, st->preemph_memE+c, need_clip); } while (++cuser_forced_mode = MODE_CELT_ONLY; tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, - NULL, 0, c1, c2, analysis_channels, downmix); + NULL, 0, c1, c2, analysis_channels, downmix, float_api); if (tmp_len<0) { RESTORE_STACK; @@ -1444,7 +1445,18 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } else { dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); } - +#ifndef FIXED_POINT + if (float_api) + { + opus_val32 sum=0; + for (i=0;ichannels;i++) + sum += pcm_buf[total_buffer*st->channels+i]*pcm_buf[total_buffer*st->channels+i]; + /* This should filter out both NaNs and ridiculous signals that could + cause NaNs further down. */ + if (!(sum < 1e9)) + OPUS_CLEAR(&pcm_buf[total_buffer*st->channels], frame_size*st->channels); + } +#endif /* SILK processing */ @@ -1955,7 +1967,8 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra for (i=0;ichannels;i++) in[i] = FLOAT2INT16(pcm[i]); - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); RESTORE_STACK; return ret; } @@ -1977,7 +1990,8 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram , st->analysis.subframe_mem #endif ); - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); } #else @@ -2002,7 +2016,8 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram for (i=0;ichannels;i++) in[i] = (1.0f/32768)*pcm[i]; - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); RESTORE_STACK; return ret; } @@ -2019,7 +2034,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, downmix_float, st->analysis.subframe_mem); return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, - pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); + pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); } #endif diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 49e27913..d6db70f6 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -674,7 +674,8 @@ static int opus_multistream_encode_native unsigned char *data, opus_int32 max_data_bytes, int lsb_depth, - downmix_func downmix + downmix_func downmix, + int float_api ) { opus_int32 Fs; @@ -849,7 +850,7 @@ static int opus_multistream_encode_native if (!vbr && s == st->layout.nb_streams-1) opus_encoder_ctl(enc, OPUS_SET_BITRATE(curr_max*(8*Fs/frame_size))); len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth, - pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix); + pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix, float_api); if (len<0) { RESTORE_STACK; @@ -922,7 +923,7 @@ int opus_multistream_encode( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int); + pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0); } #ifndef DISABLE_FLOAT_API @@ -935,7 +936,7 @@ int opus_multistream_encode_float( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 16, downmix_float); + pcm, frame_size, data, max_data_bytes, 16, downmix_float, 1); } #endif @@ -951,7 +952,7 @@ int opus_multistream_encode_float ) { return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 24, downmix_float); + pcm, frame_size, data, max_data_bytes, 24, downmix_float, 1); } int opus_multistream_encode( @@ -963,7 +964,7 @@ int opus_multistream_encode( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int); + pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0); } #endif diff --git a/src/opus_private.h b/src/opus_private.h index 83225f2b..2ab484bd 100644 --- a/src/opus_private.h +++ b/src/opus_private.h @@ -104,7 +104,8 @@ opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, - const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix); + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, + int analysis_channels, downmix_func downmix, int float_api); int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,