SILK fixes following last codec WG meeting

decoder: - fixed incorrect scaling of filter states for the smallest quantization step sizes - NLSF2A now limits the prediction gain of LPC filters encoder: - increased damping of LTP coefficients in LTP analysis - increased white noise fraction in noise shaping LPC analysis - introduced maximum total prediction gain. Used by Burg's method to exit early if prediction gain is exceeded. This improves packet loss robustness and numerical robustness in Burg's method - Prefiltered signal is now in int32 Q10 domain, from int16 Q0 - Increased max number of iterations in CBR gain control loop from 5 to 6 - Removed useless code from LTP scaling control - Optimization: smarter LPC loop unrolling - Switched default win32 compile mode to be floating-point resampler: - made resampler have constant delay of 0.75 ms; removed delay compensation from silk code. - removed obsolete table entries (~850 Bytes) - increased downsampling filter order from 16 to 18/24/36 (depending on frequency ratio) - reoptimized filter coefficients
2025-05-29 06:39:15 +00:00 · 2011-12-13 14:47:31 -05:00 · 2011-12-13 14:47:31 -05:00 · bf75c8ec4d
commit bf75c8ec4d
parent 6619a73637
71 changed files with 961 additions and 1005 deletions
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@ -140,10 +140,10 @@ opus_int silk_Encode(                                   /* O    Returns error co
    opus_int   n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
    opus_int   nSamplesToBuffer, nBlocksOf10ms, nSamplesFromInput = 0;
    opus_int   speech_act_thr_for_switch_Q8;
-    opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol;
+    opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
    silk_encoder *psEnc = ( silk_encoder * )encState;
-    opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ + MAX_ENCODER_DELAY];
-    opus_int transition, delay, curr_block, tot_blocks;
+    opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ ];
+    opus_int transition, curr_block, tot_blocks;

    psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;

@ -233,7 +233,6 @@ opus_int silk_Encode(                                   /* O    Returns error co
    }
    silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );

-    delay = psEnc->state_Fxx[ 0 ].sCmn.delay;
    /* Input buffering/resampling and encoding */
    while( 1 ) {
        nSamplesToBuffer  = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;
@ -243,15 +242,12 @@ opus_int silk_Encode(                                   /* O    Returns error co
        if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
            opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
            for( n = 0; n < nSamplesFromInput; n++ ) {
-                buf[ n+delay ] = samplesIn[ 2 * n ];
+                buf[ n ] = samplesIn[ 2 * n ];
            }
-            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
            /* Making sure to start both resamplers from the same state when switching from mono to stereo */
-            if(psEnc->nPrevChannelsInternal == 1 && id==0) {
+            if( psEnc->nPrevChannelsInternal == 1 && id==0 ) {
               silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
-               silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.delayBuf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf, MAX_ENCODER_DELAY*sizeof(opus_int16));
            }
-            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

            ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
@ -260,25 +256,18 @@ opus_int silk_Encode(                                   /* O    Returns error co
            nSamplesToBuffer  = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
            nSamplesToBuffer  = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
            for( n = 0; n < nSamplesFromInput; n++ ) {
-                buf[ n + delay ] = samplesIn[ 2 * n + 1 ];
+                buf[ n ] = samplesIn[ 2 * n + 1 ];
            }
-            silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
            ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
                &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
-            silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

            psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
        } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
            /* Combine left and right channels before resampling */
            for( n = 0; n < nSamplesFromInput; n++ ) {
-                buf[ n + delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ],  1 );
+                sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ];
+                buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum,  1 );
            }
-            if(psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded==0) {
-               for( n = 0; n<MAX_ENCODER_DELAY; n++ ) {
-                  psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ n ] = silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ n ]+(opus_int32)psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ n ], 1);
-               }
-            }
-            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
            ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
            /* On the first mono frame, average the results for the two resampler states  */
@ -291,15 +280,12 @@ opus_int silk_Encode(                                   /* O    Returns error co
                                  + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
               }
            }
-            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
            psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
        } else {
            silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
-            silk_memcpy(buf + delay, samplesIn, nSamplesFromInput*sizeof(opus_int16));
-            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
+            silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16));
            ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
-            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
            psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
        }