diff --git a/silk/control.h b/silk/control.h index 912455b2..ac1b288e 100644 --- a/silk/control.h +++ b/silk/control.h @@ -117,6 +117,9 @@ typedef struct { /* I: Number of samples per packet in milliseconds; 10/20/40/60 */ opus_int payloadSize_ms; + + /* O: Pitch lag of previous frame (0 if unvoiced), measured in samples at 48 kHz */ + opus_int prevPitchLag; } silk_DecControlStruct; #ifdef __cplusplus diff --git a/silk/control_codec.c b/silk/control_codec.c index 64b10915..796c3ef6 100644 --- a/silk/control_codec.c +++ b/silk/control_codec.c @@ -153,6 +153,7 @@ opus_int silk_setup_resamplers( } else { /* Allocate worst case space for temporary upsampling, 8 to 48 kHz, so a factor 6 */ opus_int16 x_buf_API_fs_Hz[ ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * MAX_API_FS_KHZ ]; + silk_resampler_state_struct temp_resampler_state; #ifdef FIXED_POINT opus_int16 *x_bufFIX = psEnc->x_buf; #else @@ -165,32 +166,21 @@ opus_int silk_setup_resamplers( silk_float2short_array( x_bufFIX, psEnc->x_buf, nSamples_temp ); #endif - if( silk_SMULBB( fs_kHz, 1000 ) < psEnc->sCmn.API_fs_Hz && psEnc->sCmn.fs_kHz != 0 ) { - /* Resample buffered data in x_buf to API_fs_Hz */ + /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */ + ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz ); - silk_resampler_state_struct temp_resampler_state; + /* Temporary resampling of x_buf data to API_fs_Hz */ + ret += silk_resampler( &temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp ); - /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */ - ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz ); + /* Calculate number of samples that has been temporarily upsampled */ + nSamples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) ); - /* Temporary resampling of x_buf data to API_fs_Hz */ - ret += silk_resampler( &temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp ); + /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ + ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ) ); - /* Calculate number of samples that has been temporarily upsampled */ - nSamples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) ); + /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */ + ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nSamples_temp ); - /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ - ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ) ); - - } else { - /* Copy data */ - silk_memcpy( x_buf_API_fs_Hz, x_bufFIX, nSamples_temp * sizeof( opus_int16 ) ); - } - - if( 1000 * fs_kHz != psEnc->sCmn.API_fs_Hz ) { - /* Correct resampler state (unless resampling by a factor 1) by resampling buffered data from API_fs_Hz to fs_kHz */ - ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nSamples_temp ); - } #ifndef FIXED_POINT silk_short2float_array( psEnc->x_buf, x_bufFIX, ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * fs_kHz ); #endif @@ -251,14 +241,9 @@ opus_int silk_setup_fs( silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 ); if( psEnc->sCmn.fs_kHz != fs_kHz ) { /* reset part of the state */ -#ifdef FIXED_POINT - silk_memset( &psEnc->sShape, 0, sizeof( silk_shape_state_FIX ) ); - silk_memset( &psEnc->sPrefilt, 0, sizeof( silk_prefilter_state_FIX ) ); -#else - silk_memset( &psEnc->sShape, 0, sizeof( silk_shape_state_FLP ) ); - silk_memset( &psEnc->sPrefilt, 0, sizeof( silk_prefilter_state_FLP ) ); -#endif - silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( silk_nsq_state ) ); + silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) ); + silk_memset( &psEnc->sPrefilt, 0, sizeof( psEnc->sPrefilt ) ); + silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) ); silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) ); psEnc->sCmn.inputBufIx = 0; @@ -272,6 +257,7 @@ opus_int silk_setup_fs( psEnc->sShape.LastGainIndex = 10; psEnc->sCmn.sNSQ.lagPrev = 100; psEnc->sCmn.sNSQ.prev_inv_gain_Q16 = 65536; + psEnc->sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; psEnc->sCmn.fs_kHz = fs_kHz; if( psEnc->sCmn.fs_kHz == 8 ) { diff --git a/silk/create_init_destroy.c b/silk/create_init_destroy.c index 1e32c941..ead74280 100644 --- a/silk/create_init_destroy.c +++ b/silk/create_init_destroy.c @@ -42,7 +42,7 @@ opus_int silk_init_decoder( /* Clear the entire encoder state, except anything copied */ silk_memset( psDec, 0, sizeof( silk_decoder_state ) ); - /* Used to deactivate e.g. LSF interpolation and fluctuation reduction */ + /* Used to deactivate LSF interpolation */ psDec->first_frame_after_reset = 1; psDec->prev_inv_gain_Q16 = 65536; diff --git a/silk/dec_API.c b/silk/dec_API.c index a66eeaf4..d98f74c6 100644 --- a/silk/dec_API.c +++ b/silk/dec_API.c @@ -31,14 +31,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "API.h" #include "main.h" -static const int dec_delay_matrix[3][5] = { -/*SILK API 8 12 16 24 48 */ -/* 8 */ {3, 0, 2, 0, 0}, -/*12 */ {0, 8, 5, 7, 5}, -/*16 */ {0, 0, 8, 5, 5} -}; - - /************************/ /* Decoder Super Struct */ /************************/ @@ -47,6 +39,7 @@ typedef struct { stereo_dec_state sStereo; opus_int nChannelsAPI; opus_int nChannelsInternal; + opus_int prev_decode_only_middle; } silk_decoder; /*********************/ @@ -88,7 +81,7 @@ opus_int silk_Decode( opus_int32 *nSamplesOut /* O: Number of samples decoded */ ) { - opus_int i, n, prev_fs_kHz, decode_only_middle = 0, ret = SILK_NO_ERROR; + opus_int i, n, delay, decode_only_middle = 0, ret = SILK_NO_ERROR; opus_int32 nSamplesOutDec, LBRR_symbol; opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 + MAX_DECODER_DELAY ]; opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ]; @@ -96,9 +89,6 @@ opus_int silk_Decode( opus_int16 *resample_out_ptr; silk_decoder *psDec = ( silk_decoder * )decState; silk_decoder_state *channel_state = psDec->channel_state; - int delay; - - delay = channel_state[ 0 ].delay; /**********************************/ /* Test if first frame in payload */ @@ -109,16 +99,9 @@ opus_int silk_Decode( } } - /* Save previous sample frequency */ - prev_fs_kHz = channel_state[ 0 ].fs_kHz; - /* If Mono -> Stereo transition in bitstream: init state of second channel */ if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { ret += silk_init_decoder( &channel_state[ 1 ] ); - if( psDec->nChannelsAPI == 2 ) { - silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); - silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, MAX_DECODER_DELAY*sizeof(opus_int16)); - } } for( n = 0; n < decControl->nChannelsInternal; n++ ) { @@ -149,24 +132,17 @@ opus_int silk_Decode( silk_assert( 0 ); return SILK_DEC_INVALID_SAMPLING_FREQUENCY; } - silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec ); + ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); } } - /* Initialize resampler when switching internal or external sampling frequency */ - if( prev_fs_kHz != channel_state[ 0 ].fs_kHz || channel_state[ 0 ].prev_API_sampleRate != decControl->API_sampleRate ) { - channel_state[ 0 ].delay = dec_delay_matrix[rateID(silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ))][rateID(decControl->API_sampleRate)]; - silk_assert(channel_state[ 0 ].delay <= MAX_DECODER_DELAY); - ret = silk_resampler_init( &channel_state[ 0 ].resampler_state, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ), decControl->API_sampleRate ); - if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { - silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); - channel_state[ 1 ].delay = channel_state[ 0 ].delay; - } - } - channel_state[ 0 ].prev_API_sampleRate = decControl->API_sampleRate; + delay = channel_state[ 0 ].delay; + if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); + silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); + silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, sizeof(channel_state[ 0 ].delayBuf)); } psDec->nChannelsAPI = decControl->nChannelsAPI; psDec->nChannelsInternal = decControl->nChannelsInternal; @@ -237,11 +213,20 @@ opus_int silk_Decode( } } else { for( n = 0; n < 2; n++ ) { - MS_pred_Q13[n] = psDec->sStereo.pred_prev_Q13[n]; + MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; } } } + /* Reset side channel decoder prediction memory for first frame with side coding */ + if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { + silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); + silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); + psDec->channel_state[ 1 ].lagPrev = 100; + psDec->channel_state[ 1 ].LastGainIndex = 10; + psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; + } + /* Call decoder for one frame */ for( n = 0; n < decControl->nChannelsInternal; n++ ) { if( n == 0 || decode_only_middle == 0 ) { @@ -253,10 +238,10 @@ opus_int silk_Decode( if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { /* Convert Mid/Side to Left/Right */ - silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][delay], &samplesOut1_tmp[ 1 ][delay], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); + silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][ delay ], &samplesOut1_tmp[ 1 ][ delay ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); } else { /* Buffering */ - silk_memcpy( &samplesOut1_tmp[ 0 ][delay], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); + silk_memcpy( &samplesOut1_tmp[ 0 ][ delay ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec + delay ], 2 * sizeof( opus_int16 ) ); } @@ -272,10 +257,10 @@ opus_int silk_Decode( for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { - silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY-delay ], delay*sizeof(opus_int16)); + silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY - delay ], delay * sizeof(opus_int16)); /* Resample decoded signal to API_sampleRate */ ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); - silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY*sizeof(opus_int16)); + silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY * sizeof(opus_int16)); /* Interleave if stereo output and stereo stream */ if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { @@ -292,6 +277,16 @@ opus_int silk_Decode( } } + /* Export pitch lag, measured at 48 kHz sampling rate */ + if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { + int mult_tab[ 3 ] = { 6, 4, 3 }; + decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; + } else { + decControl->prevPitchLag = 0; + } + + psDec->prev_decode_only_middle = decode_only_middle; + return ret; } diff --git a/silk/decoder_set_fs.c b/silk/decoder_set_fs.c index c2430f8c..f80470f3 100644 --- a/silk/decoder_set_fs.c +++ b/silk/decoder_set_fs.c @@ -31,24 +31,59 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "main.h" +static const int dec_delay_matrix[3][5] = { +/*SILK API 8 12 16 24 48 */ +/* 8 */ {3, 0, 2, 0, 0}, +/*12 */ {0, 8, 5, 7, 5}, +/*16 */ {0, 0, 8, 5, 5} +}; + /* Set decoder sampling rate */ -void silk_decoder_set_fs( +opus_int silk_decoder_set_fs( silk_decoder_state *psDec, /* I/O Decoder state pointer */ - opus_int fs_kHz /* I Sampling frequency (kHz) */ + opus_int fs_kHz, /* I Sampling frequency (kHz) */ + opus_int fs_API_Hz /* I API Sampling frequency (Hz) */ ) { - opus_int frame_length; + opus_int frame_length, ret = 0; silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); silk_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 ); + /* New (sub)frame length */ psDec->subfr_length = silk_SMULBB( SUB_FRAME_LENGTH_MS, fs_kHz ); frame_length = silk_SMULBB( psDec->nb_subfr, psDec->subfr_length ); + + /* Initialize resampler when switching internal or external sampling frequency */ + if( psDec->fs_kHz != fs_kHz || psDec->fs_API_hz != fs_API_Hz ) { + /* Allocate worst case space for temporary upsampling, 8 to 48 kHz, so a factor 6 */ + opus_int16 temp_buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ ]; + silk_resampler_state_struct temp_resampler_state; + + /* New delay value */ + psDec->delay = dec_delay_matrix[ rateID( silk_SMULBB( fs_kHz, 1000 ) ) ][ rateID( fs_API_Hz ) ]; + silk_assert( psDec->delay <= MAX_DECODER_DELAY ); + + if( psDec->fs_kHz != fs_kHz && psDec->fs_kHz > 0 ) { + /* Initialize resampler for temporary resampling of outBuf data to the new internal sampling rate */ + ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psDec->fs_kHz, 1000 ), silk_SMULBB( fs_kHz, 1000 ) ); + + /* Temporary resampling of outBuf data to the new internal sampling rate */ + silk_memcpy( temp_buf, psDec->outBuf, psDec->frame_length * sizeof( opus_int16 ) ); + ret += silk_resampler( &temp_resampler_state, psDec->outBuf, temp_buf, psDec->frame_length ); + } + + /* Initialize the resampler for dec_API.c preparing resampling from fs_kHz to API_fs_Hz */ + ret += silk_resampler_init( &psDec->resampler_state, silk_SMULBB( fs_kHz, 1000 ), fs_API_Hz ); + + /* Correct resampler state by resampling buffered data from fs_kHz to API_fs_Hz */ + ret += silk_resampler( &psDec->resampler_state, temp_buf, psDec->outBuf, frame_length ); + + psDec->fs_API_hz = fs_API_Hz; + } + if( psDec->fs_kHz != fs_kHz || frame_length != psDec->frame_length ) { - psDec->fs_kHz = fs_kHz; - psDec->frame_length = frame_length; - psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz ); - if( psDec->fs_kHz == 8 ) { + if( fs_kHz == 8 ) { if( psDec->nb_subfr == MAX_NB_SUBFR ) { psDec->pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; } else { @@ -61,40 +96,38 @@ void silk_decoder_set_fs( psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; } } - if( psDec->fs_kHz == 8 || psDec->fs_kHz == 12 ) { - psDec->LPC_order = MIN_LPC_ORDER; - psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB; - } else { - psDec->LPC_order = MAX_LPC_ORDER; - psDec->psNLSF_CB = &silk_NLSF_CB_WB; + if( psDec->fs_kHz != fs_kHz ) { + psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz ); + if( fs_kHz == 8 || fs_kHz == 12 ) { + psDec->LPC_order = MIN_LPC_ORDER; + psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB; + } else { + psDec->LPC_order = MAX_LPC_ORDER; + psDec->psNLSF_CB = &silk_NLSF_CB_WB; + } + if( fs_kHz == 16 ) { + psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; + } else if( fs_kHz == 12 ) { + psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; + } else if( fs_kHz == 8 ) { + psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; + } else { + /* unsupported sampling rate */ + silk_assert( 0 ); + } + psDec->first_frame_after_reset = 1; + psDec->lagPrev = 100; + psDec->LastGainIndex = 10; + psDec->prevSignalType = TYPE_NO_VOICE_ACTIVITY; } - if( psDec->fs_kHz != fs_kHz) - { - /* Reset part of the decoder state */ - silk_memset( psDec->sLPC_Q14_buf, 0, sizeof( psDec->sLPC_Q14_buf ) ); - silk_memset( psDec->outBuf, 0, MAX_FRAME_LENGTH * sizeof( opus_int16 ) ); - silk_memset( psDec->prevNLSF_Q15, 0, sizeof( psDec->prevNLSF_Q15 ) ); - } - psDec->lagPrev = 100; - psDec->LastGainIndex = 10; - psDec->prevSignalType = TYPE_NO_VOICE_ACTIVITY; - if( psDec->fs_kHz != fs_kHz) - psDec->first_frame_after_reset = 1; - - if( fs_kHz == 16 ) { - psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; - } else if( fs_kHz == 12 ) { - psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; - } else if( fs_kHz == 8 ) { - psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; - } else { - /* unsupported sampling rate */ - silk_assert( 0 ); - } + psDec->fs_kHz = fs_kHz; + psDec->frame_length = frame_length; } /* Check that settings are valid */ silk_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH ); + + return ret; } diff --git a/silk/define.h b/silk/define.h index c7cbdcf8..f56ca2d7 100644 --- a/silk/define.h +++ b/silk/define.h @@ -214,12 +214,12 @@ extern "C" #define NLSF_QUANT_DEL_DEC_STATES ( 1 << NLSF_QUANT_DEL_DEC_STATES_LOG2 ) /* Transition filtering for mode switching */ -# define TRANSITION_TIME_MS 5120 /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/ -# define TRANSITION_NB 3 /* Hardcoded in tables */ -# define TRANSITION_NA 2 /* Hardcoded in tables */ -# define TRANSITION_INT_NUM 5 /* Hardcoded in tables */ -# define TRANSITION_FRAMES ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS ) /* todo: needs to be made flexible for 10 ms frames*/ -# define TRANSITION_INT_STEPS ( TRANSITION_FRAMES / ( TRANSITION_INT_NUM - 1 ) ) +#define TRANSITION_TIME_MS 5120 /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/ +#define TRANSITION_NB 3 /* Hardcoded in tables */ +#define TRANSITION_NA 2 /* Hardcoded in tables */ +#define TRANSITION_INT_NUM 5 /* Hardcoded in tables */ +#define TRANSITION_FRAMES ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS ) +#define TRANSITION_INT_STEPS ( TRANSITION_FRAMES / ( TRANSITION_INT_NUM - 1 ) ) /* BWE factors to apply after packet loss */ #define BWE_AFTER_LOSS_Q16 63570 diff --git a/silk/enc_API.c b/silk/enc_API.c index fb27437e..339dafc4 100644 --- a/silk/enc_API.c +++ b/silk/enc_API.c @@ -237,13 +237,13 @@ opus_int silk_Encode( for( n = 0; n < nSamplesFromInput; n++ ) { buf[ n+delay ] = samplesIn[ 2 * n ]; } - silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16)); + silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16)); /* Making sure to start both resamplers from the same state when switching from mono to stereo */ if(psEnc->nPrevChannelsInternal == 1 && id==0) { silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state)); silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.delayBuf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf, MAX_ENCODER_DELAY*sizeof(opus_int16)); } - silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); + silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); @@ -252,24 +252,24 @@ opus_int silk_Encode( nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx; nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); for( n = 0; n < nSamplesFromInput; n++ ) { - buf[ n+delay ] = samplesIn[ 2 * n + 1 ]; + buf[ n + delay ] = samplesIn[ 2 * n + 1 ]; } - silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16)); + silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16)); ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); + silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer; } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) { /* Combine left and right channels before resampling */ for( n = 0; n < nSamplesFromInput; n++ ) { - buf[ n+delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ], 1 ); + buf[ n + delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ], 1 ); } if(psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded==0) { for ( n = 0; nstate_Fxx[ 0 ].sCmn.delayBuf[ n ] = silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ n ]+(opus_int32)psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ n ], 1); } - silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16)); + silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16)); ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); /* On the first mono frame, average the results for the two resampler states */ @@ -281,17 +281,16 @@ opus_int silk_Encode( silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1); } - } - silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); + silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; } else { silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 ); - silk_memcpy(buf+delay, samplesIn, nSamplesFromInput*sizeof(opus_int16)); - silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16)); + silk_memcpy(buf + delay, samplesIn, nSamplesFromInput*sizeof(opus_int16)); + silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16)); ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); + silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; } @@ -387,6 +386,22 @@ opus_int silk_Encode( silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) ); } + /* Reset side channel encoder memory for first frame with side coding */ + if( encControl->nChannelsInternal == 2 && psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 && psEnc->prev_decode_only_middle == 1 ) { + silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) ); + silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) ); + silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) ); + silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) ); + silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) ); + silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.inputBuf, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.inputBuf ) ); + psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100; + psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100; + psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10; + psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; + psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_inv_gain_Q16 = 65536; + } + psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ]; + /* Encode */ for( n = 0; n < encControl->nChannelsInternal; n++ ) { if( encControl->nChannelsInternal == 1 ) { @@ -450,6 +465,7 @@ opus_int silk_Encode( break; } } + psEnc->nPrevChannelsInternal = encControl->nChannelsInternal; encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch; diff --git a/silk/fixed/structs_FIX.h b/silk/fixed/structs_FIX.h index 35253521..47e845d1 100644 --- a/silk/fixed/structs_FIX.h +++ b/silk/fixed/structs_FIX.h @@ -123,6 +123,7 @@ typedef struct { opus_int nPrevChannelsInternal; opus_int timeSinceSwitchAllowed_ms; opus_int allowBandwidthSwitch; + opus_int prev_decode_only_middle; } silk_encoder; diff --git a/silk/float/structs_FLP.h b/silk/float/structs_FLP.h index eab77ec5..99264c97 100644 --- a/silk/float/structs_FLP.h +++ b/silk/float/structs_FLP.h @@ -121,6 +121,7 @@ typedef struct { opus_int nPrevChannelsInternal; opus_int timeSinceSwitchAllowed_ms; opus_int allowBandwidthSwitch; + opus_int prev_decode_only_middle; } silk_encoder; #ifdef __cplusplus diff --git a/silk/init_encoder.c b/silk/init_encoder.c index cebf8ccd..52befadf 100644 --- a/silk/init_encoder.c +++ b/silk/init_encoder.c @@ -49,7 +49,7 @@ opus_int silk_init_encoder( psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 ); psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15; - /* Used to deactivate LSF interpolation, fluctuation reduction, pitch prediction */ + /* Used to deactivate LSF interpolation, pitch prediction */ psEnc->sCmn.first_frame_after_reset = 1; /* Initialize Silk VAD */ diff --git a/silk/main.h b/silk/main.h index 981c7cab..85423ab6 100644 --- a/silk/main.h +++ b/silk/main.h @@ -363,9 +363,10 @@ opus_int silk_init_decoder( ); /* Set decoder sampling rate */ -void silk_decoder_set_fs( +opus_int silk_decoder_set_fs( silk_decoder_state *psDec, /* I/O Decoder state pointer */ - opus_int fs_kHz /* I Sampling frequency (kHz) */ + opus_int fs_kHz, /* I Sampling frequency (kHz) */ + opus_int fs_API_Hz /* I API Sampling frequency (Hz) */ ); /****************/ diff --git a/silk/stereo_LR_to_MS.c b/silk/stereo_LR_to_MS.c index 7fbb3ae7..bbcd30bf 100644 --- a/silk/stereo_LR_to_MS.c +++ b/silk/stereo_LR_to_MS.c @@ -87,7 +87,7 @@ void silk_stereo_LR_to_MS( smooth_coef_Q16 = is10msFrame ? SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) : SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF, 16 ); - smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8 , prev_speech_act_Q8 ), smooth_coef_Q16 ); + smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 ); pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 ); pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 ); @@ -168,8 +168,20 @@ void silk_stereo_LR_to_MS( width_Q14 = state->smth_width_Q14; } - if (*mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1) - { + /* Make sure to keep on encoding until the tapered output has been transmitted */ + if( *mid_only_flag == 1 ) { + state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz; + if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) { + *mid_only_flag = 0; + } else { + /* Limit to avoid wrapping around */ + state->silent_side_len = 10000; + } + } else { + state->silent_side_len = 0; + } + + if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) { mid_side_rates_bps[ 1 ] = 1; mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]); } diff --git a/silk/stereo_find_predictor.c b/silk/stereo_find_predictor.c index 73c307fe..49ef46d7 100644 --- a/silk/stereo_find_predictor.c +++ b/silk/stereo_find_predictor.c @@ -42,26 +42,32 @@ opus_int32 silk_stereo_find_predictor( /* O Returns predict ) { opus_int scale, scale1, scale2; - opus_int32 nrgx, nrgy, corr, pred_Q13; + opus_int32 nrgx, nrgy, corr, pred_Q13, pred2_Q10; /* Find predictor */ silk_sum_sqr_shift( &nrgx, &scale1, x, length ); silk_sum_sqr_shift( &nrgy, &scale2, y, length ); - scale = silk_max( scale1, scale2 ); + scale = silk_max_int( scale1, scale2 ) + 1; scale = scale + ( scale & 1 ); /* make even */ nrgy = silk_RSHIFT32( nrgy, scale - scale2 ); nrgx = silk_RSHIFT32( nrgx, scale - scale1 ); - nrgx = silk_max( nrgx, 1 ); + nrgx = silk_max_int( nrgx, 1 ); corr = silk_inner_prod_aligned_scale( x, y, scale, length ); pred_Q13 = silk_DIV32_varQ( corr, nrgx, 13 ); - pred_Q13 = silk_SAT16( pred_Q13 ); + pred_Q13 = silk_LIMIT( pred_Q13, -(1 << 14), 1 << 14 ); + pred2_Q10 = silk_SMULWB( pred_Q13, pred_Q13 ); + + /* Faster update for signals with large prediction parameters */ + smooth_coef_Q16 = (opus_int)silk_max_int( smooth_coef_Q16, silk_abs( pred2_Q10 ) ); /* Smoothed mid and residual norms */ silk_assert( smooth_coef_Q16 < 32768 ); scale = silk_RSHIFT( scale, 1 ); mid_res_amp_Q0[ 0 ] = silk_SMLAWB( mid_res_amp_Q0[ 0 ], silk_LSHIFT( silk_SQRT_APPROX( nrgx ), scale ) - mid_res_amp_Q0[ 0 ], smooth_coef_Q16 ); - nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 ); + /* Residual energy = nrgy - 2 * pred * corr + pred^2 * nrgx */ + nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 + 1 ); + nrgy = silk_ADD_LSHIFT32( nrgy, silk_SMULWB( nrgx, pred2_Q10 ), 6 ); mid_res_amp_Q0[ 1 ] = silk_SMLAWB( mid_res_amp_Q0[ 1 ], silk_LSHIFT( silk_SQRT_APPROX( nrgy ), scale ) - mid_res_amp_Q0[ 1 ], smooth_coef_Q16 ); diff --git a/silk/structs.h b/silk/structs.h index c3de0241..d11aa281 100644 --- a/silk/structs.h +++ b/silk/structs.h @@ -101,6 +101,7 @@ typedef struct { opus_int32 mid_side_amp_Q0[ 4 ]; opus_int16 smth_width_Q14; opus_int16 width_prev_Q14; + opus_int16 silent_side_len; opus_int8 predIx[ MAX_FRAMES_PER_PACKET ][ 2 ][ 3 ]; opus_int8 mid_only_flags[ MAX_FRAMES_PER_PACKET ]; } stereo_enc_state; @@ -172,7 +173,7 @@ typedef struct { opus_int LTPQuantLowComplexity; /* Flag for low complexity LTP quantization */ opus_int mu_LTP_Q9; /* Rate-distortion tradeoff in LTP quantization */ opus_int NLSF_MSVQ_Survivors; /* Number of survivors in NLSF MSVQ */ - opus_int first_frame_after_reset; /* Flag for deactivating NLSF interp. and fluc. reduction after resets */ + opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation, pitch prediction */ opus_int controlled_since_last_payload; /* Flag for ensuring codec_control only runs once per packet */ opus_int warping_Q16; /* Warping parameter for warped noise shaping */ opus_int useCBR; /* Flag to enable constant bitrate */ @@ -263,14 +264,14 @@ typedef struct { opus_int lagPrev; /* Previous Lag */ opus_int8 LastGainIndex; /* Previous gain index */ opus_int fs_kHz; /* Sampling frequency in kHz */ - opus_int32 prev_API_sampleRate; /* Previous API sample frequency (Hz) */ + opus_int32 fs_API_hz; /* API sample frequency (Hz) */ opus_int nb_subfr; /* Number of 5 ms subframes in a frame */ opus_int frame_length; /* Frame length (samples) */ opus_int subfr_length; /* Subframe length (samples) */ opus_int ltp_mem_length; /* Length of LTP memory */ opus_int LPC_order; /* LPC order */ opus_int16 prevNLSF_Q15[ MAX_LPC_ORDER ]; /* Used to interpolate LSFs */ - opus_int first_frame_after_reset; /* Flag for deactivating NLSF interp. and fluc. reduction after resets */ + opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation */ const opus_uint8 *pitch_lag_low_bits_iCDF; /* Pointer to iCDF table for low bits of pitch lag index */ const opus_uint8 *pitch_contour_iCDF; /* Pointer to iCDF table for pitch contour index */ diff --git a/src/opus_decoder.c b/src/opus_decoder.c index 6eb34920..ce2598d3 100644 --- a/src/opus_decoder.c +++ b/src/opus_decoder.c @@ -802,9 +802,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) if (st->prev_mode == MODE_CELT_ONLY) celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value)); else - *value = ((silk_decoder_state*)silk_dec)->indices.signalType == TYPE_VOICED - ? ((silk_decoder_state*)silk_dec)->lagPrev*48/((silk_decoder_state*)silk_dec)->fs_kHz - : 0; + *value = st->DecControl.prevPitchLag; } break; default: