SILK fixes following last codec WG meeting

decoder:
- fixed incorrect scaling of filter states for the smallest quantization
  step sizes
- NLSF2A now limits the prediction gain of LPC filters

encoder:
- increased damping of LTP coefficients in LTP analysis
- increased white noise fraction in noise shaping LPC analysis
- introduced maximum total prediction gain.  Used by Burg's method to
  exit early if prediction gain is exceeded.  This improves packet
  loss robustness and numerical robustness in Burg's method
- Prefiltered signal is now in int32 Q10 domain, from int16 Q0
- Increased max number of iterations in CBR gain control loop from 5 to 6
- Removed useless code from LTP scaling control
- Optimization: smarter LPC loop unrolling
- Switched default win32 compile mode to be floating-point

resampler:
- made resampler have constant delay of 0.75 ms; removed delay
  compensation from silk code.
- removed obsolete table entries (~850 Bytes)
- increased downsampling filter order from 16 to 18/24/36 (depending on
  frequency ratio)
- reoptimized filter coefficients
This commit is contained in:
Koen Vos 2011-12-13 14:47:31 -05:00 committed by Jean-Marc Valin
parent 6619a73637
commit bf75c8ec4d
71 changed files with 961 additions and 1005 deletions

View file

@ -140,10 +140,10 @@ opus_int silk_Encode( /* O Returns error co
opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
opus_int nSamplesToBuffer, nBlocksOf10ms, nSamplesFromInput = 0;
opus_int speech_act_thr_for_switch_Q8;
opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol;
opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
silk_encoder *psEnc = ( silk_encoder * )encState;
opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ + MAX_ENCODER_DELAY];
opus_int transition, delay, curr_block, tot_blocks;
opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ ];
opus_int transition, curr_block, tot_blocks;
psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
@ -233,7 +233,6 @@ opus_int silk_Encode( /* O Returns error co
}
silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
delay = psEnc->state_Fxx[ 0 ].sCmn.delay;
/* Input buffering/resampling and encoding */
while( 1 ) {
nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;
@ -243,15 +242,12 @@ opus_int silk_Encode( /* O Returns error co
if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
for( n = 0; n < nSamplesFromInput; n++ ) {
buf[ n+delay ] = samplesIn[ 2 * n ];
buf[ n ] = samplesIn[ 2 * n ];
}
silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
/* Making sure to start both resamplers from the same state when switching from mono to stereo */
if(psEnc->nPrevChannelsInternal == 1 && id==0) {
if( psEnc->nPrevChannelsInternal == 1 && id==0 ) {
silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.delayBuf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf, MAX_ENCODER_DELAY*sizeof(opus_int16));
}
silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
@ -260,25 +256,18 @@ opus_int silk_Encode( /* O Returns error co
nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
for( n = 0; n < nSamplesFromInput; n++ ) {
buf[ n + delay ] = samplesIn[ 2 * n + 1 ];
buf[ n ] = samplesIn[ 2 * n + 1 ];
}
silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
} else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
/* Combine left and right channels before resampling */
for( n = 0; n < nSamplesFromInput; n++ ) {
buf[ n + delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ], 1 );
sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ];
buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 );
}
if(psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded==0) {
for( n = 0; n<MAX_ENCODER_DELAY; n++ ) {
psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ n ] = silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ n ]+(opus_int32)psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ n ], 1);
}
}
silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
/* On the first mono frame, average the results for the two resampler states */
@ -291,15 +280,12 @@ opus_int silk_Encode( /* O Returns error co
+ psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
}
}
silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
} else {
silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
silk_memcpy(buf + delay, samplesIn, nSamplesFromInput*sizeof(opus_int16));
silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
}