mirror of
https://github.com/xiph/opus.git
synced 2025-05-29 06:39:15 +00:00
Improvements to the pitch search
Normalizes the cost function by (x+y) instead of sqrt(x*y)
This commit is contained in:
parent
1bcf028af8
commit
9cbbcb53ae
6 changed files with 144 additions and 248 deletions
|
@ -168,12 +168,6 @@ opus_int32 silk_log2lin(
|
|||
const opus_int32 inLog_Q7 /* I input on log scale */
|
||||
);
|
||||
|
||||
/* Function that returns the maximum absolut value of the input vector */
|
||||
opus_int16 silk_int16_array_maxabs( /* O Maximum absolute value, max: 2^15-1 */
|
||||
const opus_int16 *vec, /* I Input vector [len] */
|
||||
const opus_int32 len /* I Length of input vector */
|
||||
);
|
||||
|
||||
/* Compute number of bits to right shift the sum of squares of a vector */
|
||||
/* of int16s to make it fit in an int32 */
|
||||
void silk_sum_sqr_shift(
|
||||
|
@ -252,7 +246,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */
|
||||
opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */
|
||||
const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */
|
||||
const opus_int search_thres2_Q15, /* I Final threshold for lag candidates 0 - 1 */
|
||||
const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */
|
||||
const opus_int Fs_kHz, /* I Sample frequency (kHz) */
|
||||
const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
|
||||
const opus_int nb_subfr /* I number of 5 ms subframes */
|
||||
|
|
|
@ -41,7 +41,7 @@ void silk_find_pitch_lags_FIX(
|
|||
)
|
||||
{
|
||||
opus_int buf_len, i, scale;
|
||||
opus_int32 thrhld_Q15, res_nrg;
|
||||
opus_int32 thrhld_Q13, res_nrg;
|
||||
const opus_int16 *x_buf, *x_buf_ptr;
|
||||
opus_int16 Wsig[ FIND_PITCH_LPC_WIN_MAX ], *Wsig_ptr;
|
||||
opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];
|
||||
|
@ -110,19 +110,19 @@ void silk_find_pitch_lags_FIX(
|
|||
|
||||
if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) {
|
||||
/* Threshold for pitch estimator */
|
||||
thrhld_Q15 = SILK_FIX_CONST( 0.6, 15 );
|
||||
thrhld_Q15 = silk_SMLABB( thrhld_Q15, SILK_FIX_CONST( -0.004, 15 ), psEnc->sCmn.pitchEstimationLPCOrder );
|
||||
thrhld_Q15 = silk_SMLABB( thrhld_Q15, SILK_FIX_CONST( -0.1, 7 ), psEnc->sCmn.speech_activity_Q8 );
|
||||
thrhld_Q15 = silk_SMLABB( thrhld_Q15, SILK_FIX_CONST( -0.15, 15 ), silk_RSHIFT( psEnc->sCmn.prevSignalType, 1 ) );
|
||||
thrhld_Q15 = silk_SMLAWB( thrhld_Q15, SILK_FIX_CONST( -0.1, 16 ), psEnc->sCmn.input_tilt_Q15 );
|
||||
thrhld_Q15 = silk_SAT16( thrhld_Q15 );
|
||||
thrhld_Q13 = SILK_FIX_CONST( 0.6, 13 );
|
||||
thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.004, 13 ), psEnc->sCmn.pitchEstimationLPCOrder );
|
||||
thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1, 21 ), psEnc->sCmn.speech_activity_Q8 );
|
||||
thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.15, 13 ), silk_RSHIFT( psEnc->sCmn.prevSignalType, 1 ) );
|
||||
thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1, 14 ), psEnc->sCmn.input_tilt_Q15 );
|
||||
thrhld_Q13 = silk_SAT16( thrhld_Q13 );
|
||||
|
||||
/*****************************************/
|
||||
/* Call pitch estimator */
|
||||
/*****************************************/
|
||||
if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex,
|
||||
&psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16,
|
||||
(opus_int16)thrhld_Q15, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 )
|
||||
(opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 )
|
||||
{
|
||||
psEnc->sCmn.indices.signalType = TYPE_VOICED;
|
||||
} else {
|
||||
|
|
|
@ -41,8 +41,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/************************************************************/
|
||||
/* Internally used functions */
|
||||
/************************************************************/
|
||||
void silk_P_Ana_calc_corr_st3(
|
||||
opus_int32 cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM correlation array */
|
||||
static void silk_P_Ana_calc_corr_st3(
|
||||
opus_int32 cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM correlation array */
|
||||
const opus_int16 frame[], /* I vector to correlate */
|
||||
opus_int start_lag, /* I lag offset to search around */
|
||||
opus_int sf_length, /* I length of a 5 ms subframe */
|
||||
|
@ -50,8 +50,8 @@ void silk_P_Ana_calc_corr_st3(
|
|||
opus_int complexity /* I Complexity setting */
|
||||
);
|
||||
|
||||
void silk_P_Ana_calc_energy_st3(
|
||||
opus_int32 energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM energy array */
|
||||
static void silk_P_Ana_calc_energy_st3(
|
||||
opus_int32 energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM energy array */
|
||||
const opus_int16 frame[], /* I vector to calc energy in */
|
||||
opus_int start_lag, /* I lag offset to search around */
|
||||
opus_int sf_length, /* I length of one 5 ms subframe */
|
||||
|
@ -59,12 +59,6 @@ void silk_P_Ana_calc_energy_st3(
|
|||
opus_int complexity /* I Complexity setting */
|
||||
);
|
||||
|
||||
opus_int32 silk_P_Ana_find_scaling(
|
||||
const opus_int16 *frame,
|
||||
const opus_int frame_length,
|
||||
const opus_int sum_sqr_len
|
||||
);
|
||||
|
||||
/*************************************************************/
|
||||
/* FIXED POINT CORE PITCH ANALYSIS FUNCTION */
|
||||
/*************************************************************/
|
||||
|
@ -76,7 +70,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */
|
||||
opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */
|
||||
const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */
|
||||
const opus_int search_thres2_Q15, /* I Final threshold for lag candidates 0 - 1 */
|
||||
const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */
|
||||
const opus_int Fs_kHz, /* I Sample frequency (kHz) */
|
||||
const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
|
||||
const opus_int nb_subfr /* I number of 5 ms subframes */
|
||||
|
@ -93,18 +87,18 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
|
||||
opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
|
||||
opus_int16 d_comp[ ( PE_MAX_LAG >> 1 ) + 5 ];
|
||||
opus_int32 sum, threshold, temp32, lag_counter;
|
||||
opus_int32 sum, threshold, lag_counter;
|
||||
opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
|
||||
opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
|
||||
opus_int32 energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
|
||||
opus_int32 crosscorr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
|
||||
opus_int frame_length, frame_length_8kHz, frame_length_4kHz, max_sum_sq_length;
|
||||
opus_int frame_length, frame_length_8kHz, frame_length_4kHz;
|
||||
opus_int sf_length, sf_length_8kHz, sf_length_4kHz;
|
||||
opus_int min_lag, min_lag_8kHz, min_lag_4kHz;
|
||||
opus_int max_lag, max_lag_8kHz, max_lag_4kHz;
|
||||
opus_int32 contour_bias_Q20, diff, lz, lshift;
|
||||
opus_int32 contour_bias_Q15, diff;
|
||||
opus_int nb_cbk_search, cbk_size;
|
||||
opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q15, corr_thres_Q15;
|
||||
opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
|
||||
const opus_int8 *Lag_CB_ptr;
|
||||
/* Check for valid sampling frequency */
|
||||
silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
|
||||
|
@ -114,7 +108,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
|
||||
|
||||
silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
|
||||
silk_assert( search_thres2_Q15 >= 0 && search_thres2_Q15 <= (1<<15) );
|
||||
silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
|
||||
|
||||
/* Set up frame lengths max / min lag for the sampling frequency */
|
||||
frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
|
||||
|
@ -130,8 +124,6 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
max_lag_4kHz = PE_MAX_LAG_MS * 4;
|
||||
max_lag_8kHz = PE_MAX_LAG_MS * 8 - 1;
|
||||
|
||||
silk_memset( C, 0, sizeof( opus_int16 ) * nb_subfr * ( ( PE_MAX_LAG >> 1 ) + 5) );
|
||||
|
||||
/* Resample from input sampled at Fs_kHz to 8 kHz */
|
||||
if( Fs_kHz == 16 ) {
|
||||
silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
|
||||
|
@ -159,9 +151,9 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
*******************************************************************************/
|
||||
|
||||
/* Inner product is calculated with different lengths, so scale for the worst case */
|
||||
max_sum_sq_length = silk_max_32( sf_length_8kHz, silk_LSHIFT( sf_length_4kHz, 2 ) );
|
||||
shift = silk_P_Ana_find_scaling( frame_4kHz, frame_length_4kHz, max_sum_sq_length );
|
||||
silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );
|
||||
if( shift > 0 ) {
|
||||
shift = silk_RSHIFT( shift, 1 );
|
||||
for( i = 0; i < frame_length_4kHz; i++ ) {
|
||||
frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
|
||||
}
|
||||
|
@ -170,6 +162,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
/******************************************************************************
|
||||
* FIRST STAGE, operating in 4 khz
|
||||
******************************************************************************/
|
||||
silk_memset( C, 0, sizeof( opus_int16 ) * nb_subfr * ( ( PE_MAX_LAG >> 1 ) + 5) );
|
||||
target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];
|
||||
for( k = 0; k < nb_subfr >> 1; k++ ) {
|
||||
/* Check that we are within range of the array */
|
||||
|
@ -183,12 +176,12 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
|
||||
|
||||
/* Calculate first vector products before loop */
|
||||
cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );
|
||||
normalizer = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length_8kHz );
|
||||
normalizer = silk_ADD_SAT32( normalizer, silk_SMULBB( sf_length_8kHz, 4000 ) );
|
||||
cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );
|
||||
normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, sf_length_8kHz );
|
||||
normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length_8kHz ) );
|
||||
normalizer = silk_ADD32( normalizer, silk_SMULBB( sf_length_8kHz, 4000 ) );
|
||||
|
||||
temp32 = silk_DIV32( cross_corr, silk_SQRT_APPROX( normalizer ) + 1 );
|
||||
C[ k ][ min_lag_4kHz ] = (opus_int16)silk_SAT16( temp32 ); /* Q0 */
|
||||
C[ k ][ min_lag_4kHz ] = (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
|
||||
|
||||
/* From now on normalizer is computed recursively */
|
||||
for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {
|
||||
|
@ -201,12 +194,11 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );
|
||||
|
||||
/* Add contribution of new sample and remove contribution from oldest sample */
|
||||
normalizer +=
|
||||
normalizer = silk_ADD32( normalizer,
|
||||
silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
|
||||
silk_SMULBB( basis_ptr[ sf_length_8kHz ], basis_ptr[ sf_length_8kHz ] );
|
||||
silk_SMULBB( basis_ptr[ sf_length_8kHz ], basis_ptr[ sf_length_8kHz ] ) );
|
||||
|
||||
temp32 = silk_DIV32( cross_corr, silk_SQRT_APPROX( normalizer ) + 1 );
|
||||
C[ k ][ d ] = (opus_int16)silk_SAT16( temp32 ); /* Q0 */
|
||||
C[ k ][ d ] = (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
|
||||
}
|
||||
/* Update target pointer */
|
||||
target_ptr += sf_length_8kHz;
|
||||
|
@ -215,20 +207,16 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
/* Combine two subframes into single correlation measure and apply short-lag bias */
|
||||
if( nb_subfr == PE_MAX_NB_SUBFR ) {
|
||||
for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {
|
||||
sum = (opus_int32)C[ 0 ][ i ] + (opus_int32)C[ 1 ][ i ]; /* Q0 */
|
||||
silk_assert( silk_RSHIFT( sum, 1 ) == silk_SAT16( silk_RSHIFT( sum, 1 ) ) );
|
||||
sum = silk_RSHIFT( sum, 1 ); /* Q-1 */
|
||||
silk_assert( silk_LSHIFT( (opus_int32)-i, 4 ) == silk_SAT16( silk_LSHIFT( (opus_int32)-i, 4 ) ) );
|
||||
sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q-1 */
|
||||
silk_assert( sum == silk_SAT16( sum ) );
|
||||
C[ 0 ][ i ] = (opus_int16)sum; /* Q-1 */
|
||||
sum = (opus_int32)C[ 0 ][ i ] + (opus_int32) C[ 1 ][ i ]; /* Q14 */
|
||||
sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
|
||||
C[ 0 ][ i ] = (opus_int16)sum; /* Q14 */
|
||||
}
|
||||
} else {
|
||||
/* Only short-lag bias */
|
||||
for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {
|
||||
sum = (opus_int32)C[ 0 ][ i ];
|
||||
sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q-1 */
|
||||
C[ 0 ][ i ] = (opus_int16)sum; /* Q-1 */
|
||||
sum = silk_LSHIFT( (opus_int32)C[ 0 ][ i ], 1 ); /* Q14 */
|
||||
sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
|
||||
C[ 0 ][ i ] = (opus_int16)sum; /* Q14 */
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -238,14 +226,8 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
silk_insertion_sort_decreasing_int16( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch );
|
||||
|
||||
/* Escape if correlation is very low already here */
|
||||
target_ptr = &frame_4kHz[ silk_SMULBB( sf_length_4kHz, nb_subfr ) ];
|
||||
energy = silk_inner_prod_aligned( target_ptr, target_ptr, silk_LSHIFT( sf_length_4kHz, 2 ) );
|
||||
energy = silk_ADD_SAT32( energy, 1000 ); /* Q0 */
|
||||
Cmax = (opus_int)C[ 0 ][ min_lag_4kHz ]; /* Q-1 */
|
||||
threshold = silk_SMULBB( Cmax, Cmax ); /* Q-2 */
|
||||
|
||||
/* Compare in Q-2 domain */
|
||||
if( silk_RSHIFT( energy, 4 + 2 ) > threshold ) {
|
||||
Cmax = (opus_int)C[ 0 ][ min_lag_4kHz ]; /* Q14 */
|
||||
if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
|
||||
silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
|
||||
*LTPCorr_Q15 = 0;
|
||||
*lagIndex = 0;
|
||||
|
@ -306,8 +288,9 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
** Scale signal down to avoid correlations measures from overflowing
|
||||
*******************************************************************************/
|
||||
/* find scaling as max scaling for each subframe */
|
||||
shift = silk_P_Ana_find_scaling( frame_8kHz, frame_length_8kHz, sf_length_8kHz );
|
||||
silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );
|
||||
if( shift > 0 ) {
|
||||
shift = silk_RSHIFT( shift, 1 );
|
||||
for( i = 0; i < frame_length_8kHz; i++ ) {
|
||||
frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
|
||||
}
|
||||
|
@ -325,7 +308,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
silk_assert( target_ptr >= frame_8kHz );
|
||||
silk_assert( target_ptr + sf_length_8kHz <= frame_8kHz + frame_length_8kHz );
|
||||
|
||||
energy_target = silk_inner_prod_aligned( target_ptr, target_ptr, sf_length_8kHz );
|
||||
energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, sf_length_8kHz ), 1 );
|
||||
for( j = 0; j < length_d_comp; j++ ) {
|
||||
d = d_comp[ j ];
|
||||
basis_ptr = target_ptr - d;
|
||||
|
@ -334,20 +317,10 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
silk_assert( basis_ptr >= frame_8kHz );
|
||||
silk_assert( basis_ptr + sf_length_8kHz <= frame_8kHz + frame_length_8kHz );
|
||||
|
||||
cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );
|
||||
energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length_8kHz );
|
||||
cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );
|
||||
if( cross_corr > 0 ) {
|
||||
energy = silk_max( energy_target, energy_basis ); /* Find max to make sure first division < 1.0 */
|
||||
lz = silk_CLZ32( cross_corr );
|
||||
lshift = silk_LIMIT_32( lz - 1, 0, 15 );
|
||||
temp32 = silk_DIV32( silk_LSHIFT( cross_corr, lshift ), silk_RSHIFT( energy, 15 - lshift ) + 1 ); /* Q15 */
|
||||
silk_assert( temp32 == silk_SAT16( temp32 ) );
|
||||
temp32 = silk_SMULWB( cross_corr, temp32 ); /* Q(-1), cc * ( cc / max(b, t) ) */
|
||||
temp32 = silk_ADD_SAT32( temp32, temp32 ); /* Q(0) */
|
||||
lz = silk_CLZ32( temp32 );
|
||||
lshift = silk_LIMIT_32( lz - 1, 0, 15 );
|
||||
energy = silk_min( energy_target, energy_basis );
|
||||
C[ k ][ d ] = silk_DIV32( silk_LSHIFT( temp32, lshift ), silk_RSHIFT( energy, 15 - lshift ) + 1 ); /* Q15*/
|
||||
energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length_8kHz );
|
||||
C[ k ][ d ] = (opus_int16)silk_DIV32_varQ( cross_corr, silk_ADD32( energy_target, energy_basis ), 13 + 1 ); /* Q13 */
|
||||
} else {
|
||||
C[ k ][ d ] = 0;
|
||||
}
|
||||
|
@ -374,7 +347,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
} else {
|
||||
prevLag_log2_Q7 = 0;
|
||||
}
|
||||
silk_assert( search_thres2_Q15 == silk_SAT16( search_thres2_Q15 ) );
|
||||
silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
|
||||
/* Set up stage 2 codebook based on number of subframes */
|
||||
if( nb_subfr == PE_MAX_NB_SUBFR ) {
|
||||
cbk_size = PE_NB_CBKS_STAGE2_EXT;
|
||||
|
@ -385,12 +358,10 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
} else {
|
||||
nb_cbk_search = PE_NB_CBKS_STAGE2;
|
||||
}
|
||||
corr_thres_Q15 = silk_RSHIFT( silk_SMULBB( search_thres2_Q15, search_thres2_Q15 ), 13 );
|
||||
} else {
|
||||
cbk_size = PE_NB_CBKS_STAGE2_10MS;
|
||||
Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
|
||||
nb_cbk_search = PE_NB_CBKS_STAGE2_10MS;
|
||||
corr_thres_Q15 = silk_RSHIFT( silk_SMULBB( search_thres2_Q15, search_thres2_Q15 ), 14 );
|
||||
}
|
||||
|
||||
for( k = 0; k < length_d_srch; k++ ) {
|
||||
|
@ -399,7 +370,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
CC[ j ] = 0;
|
||||
for( i = 0; i < nb_subfr; i++ ) {
|
||||
/* Try all codebooks */
|
||||
CC[ j ] = CC[ j ] + (opus_int32)C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size )];
|
||||
CC[ j ] = CC[ j ] + (opus_int32)C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size ) ];
|
||||
}
|
||||
}
|
||||
/* Find best codebook */
|
||||
|
@ -413,24 +384,24 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
}
|
||||
|
||||
/* Bias towards shorter lags */
|
||||
lag_log2_Q7 = silk_lin2log( (opus_int32)d ); /* Q7 */
|
||||
lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
|
||||
silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
|
||||
silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 15 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 15 ) ) );
|
||||
CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 15 ), lag_log2_Q7 ), 7 ); /* Q15 */
|
||||
silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
|
||||
CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
|
||||
|
||||
/* Bias towards previous lag */
|
||||
silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 15 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 15 ) ) );
|
||||
silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
|
||||
if( prevLag > 0 ) {
|
||||
delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
|
||||
silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
|
||||
delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
|
||||
prev_lag_bias_Q15 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 15 ), *LTPCorr_Q15 ), 15 ); /* Q15 */
|
||||
prev_lag_bias_Q15 = silk_DIV32( silk_MUL( prev_lag_bias_Q15, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + ( 1 << 6 ) );
|
||||
CCmax_new_b -= prev_lag_bias_Q15; /* Q15 */
|
||||
prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
|
||||
prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
|
||||
CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
|
||||
}
|
||||
|
||||
if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */
|
||||
CCmax_new > corr_thres_Q15 && /* Correlation needs to be high enough to be voiced */
|
||||
CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 ) && /* Correlation needs to be high enough to be voiced */
|
||||
silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= min_lag_8kHz /* Lag must be in range */
|
||||
) {
|
||||
CCmax_b = CCmax_new_b;
|
||||
|
@ -449,15 +420,20 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Output normalized correlation */
|
||||
*LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
|
||||
silk_assert( *LTPCorr_Q15 >= 0 );
|
||||
|
||||
if( Fs_kHz > 8 ) {
|
||||
/***************************************************************************/
|
||||
/* Scale input signal down to avoid correlations measures from overflowing */
|
||||
/***************************************************************************/
|
||||
/* find scaling as max scaling for each subframe */
|
||||
shift = silk_P_Ana_find_scaling( frame, frame_length, sf_length );
|
||||
silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
|
||||
if( shift > 0 ) {
|
||||
/* Move signal to scratch mem because the input signal should be unchanged */
|
||||
/* Reuse the 32 bit scratch mem vector, use a 16 bit pointer from now */
|
||||
shift = silk_RSHIFT( shift, 1 );
|
||||
input_frame_ptr = (opus_int16*)scratch_mem;
|
||||
for( i = 0; i < frame_length; i++ ) {
|
||||
input_frame_ptr[ i ] = silk_RSHIFT( frame[ i ], shift );
|
||||
|
@ -483,9 +459,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
start_lag = silk_max_int( lag - 2, min_lag );
|
||||
end_lag = silk_min_int( lag + 2, max_lag );
|
||||
lag_new = lag; /* to avoid undefined lag */
|
||||
CBimax = 0; /* to avoid undefined lag */
|
||||
silk_assert( silk_LSHIFT( CCmax, 13 ) >= 0 );
|
||||
*LTPCorr_Q15 = (opus_int)silk_SQRT_APPROX( silk_LSHIFT( CCmax, 13 ) ); /* Output normalized correlation */
|
||||
CBimax = 0; /* to avoid undefined lag */
|
||||
|
||||
CCmax = silk_int32_MIN;
|
||||
/* pitch lags according to second stage */
|
||||
|
@ -498,7 +472,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
|
||||
lag_counter = 0;
|
||||
silk_assert( lag == silk_SAT16( lag ) );
|
||||
contour_bias_Q20 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 20 ), lag );
|
||||
contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
|
||||
|
||||
/* Set up codebook parameters according to complexity setting and frame length */
|
||||
if( nb_subfr == PE_MAX_NB_SUBFR ) {
|
||||
|
@ -510,41 +484,29 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
cbk_size = PE_NB_CBKS_STAGE3_10MS;
|
||||
Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
|
||||
}
|
||||
|
||||
target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
|
||||
energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
|
||||
for( d = start_lag; d <= end_lag; d++ ) {
|
||||
for( j = 0; j < nb_cbk_search; j++ ) {
|
||||
cross_corr = 0;
|
||||
energy = 0;
|
||||
energy = energy_target;
|
||||
for( k = 0; k < nb_subfr; k++ ) {
|
||||
silk_assert( PE_MAX_NB_SUBFR == 4 );
|
||||
energy += silk_RSHIFT( energies_st3[ k ][ j ][ lag_counter ], 2 ); /* use mean, to avoid overflow */
|
||||
cross_corr = silk_ADD32( cross_corr, crosscorr_st3[ k ][ j ][ lag_counter ] );
|
||||
energy = silk_ADD32( energy, energies_st3[ k ][ j ][ lag_counter ] );
|
||||
silk_assert( energy >= 0 );
|
||||
cross_corr += silk_RSHIFT( crosscorr_st3[ k ][ j ][ lag_counter ], 2 ); /* use mean, to avoid overflow */
|
||||
}
|
||||
if( cross_corr > 0 ) {
|
||||
/* Divide cross_corr / energy and get result in Q15 */
|
||||
lz = silk_CLZ32( cross_corr );
|
||||
/* Divide with result in Q13, cross_corr could be larger than energy */
|
||||
lshift = silk_LIMIT_32( lz - 1, 0, 13 );
|
||||
CCmax_new = silk_DIV32( silk_LSHIFT( cross_corr, lshift ), silk_RSHIFT( energy, 13 - lshift ) + 1 );
|
||||
CCmax_new = silk_SAT16( CCmax_new );
|
||||
CCmax_new = silk_SMULWB( cross_corr, CCmax_new );
|
||||
/* Saturate */
|
||||
if( CCmax_new > silk_RSHIFT( silk_int32_MAX, 3 ) ) {
|
||||
CCmax_new = silk_int32_MAX;
|
||||
} else {
|
||||
CCmax_new = silk_LSHIFT( CCmax_new, 3 );
|
||||
}
|
||||
CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 ); /* Q13 */
|
||||
/* Reduce depending on flatness of contour */
|
||||
diff = silk_int16_MAX - silk_RSHIFT( silk_MUL( contour_bias_Q20, j ), 5 ); /* Q20 -> Q15 */
|
||||
diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j ); /* Q15 */
|
||||
silk_assert( diff == silk_SAT16( diff ) );
|
||||
CCmax_new = silk_LSHIFT( silk_SMULWB( CCmax_new, diff ), 1 );
|
||||
CCmax_new = silk_SMULWB( CCmax_new, diff ); /* Q14 */
|
||||
} else {
|
||||
CCmax_new = 0;
|
||||
}
|
||||
|
||||
if( CCmax_new > CCmax &&
|
||||
( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag
|
||||
) {
|
||||
if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
|
||||
CCmax = CCmax_new;
|
||||
lag_new = d;
|
||||
CBimax = j;
|
||||
|
@ -560,12 +522,10 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
*lagIndex = (opus_int16)( lag_new - min_lag);
|
||||
*contourIndex = (opus_int8)CBimax;
|
||||
} else { /* Fs_kHz == 8 */
|
||||
/* Save Lags and correlation */
|
||||
CCmax = silk_max( CCmax, 0 );
|
||||
*LTPCorr_Q15 = (opus_int)silk_SQRT_APPROX( silk_LSHIFT( CCmax, 13 ) ); /* Output normalized correlation */
|
||||
/* Save Lags */
|
||||
for( k = 0; k < nb_subfr; k++ ) {
|
||||
pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
|
||||
pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * Fs_kHz );
|
||||
pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );
|
||||
}
|
||||
*lagIndex = (opus_int16)( lag - min_lag_8kHz );
|
||||
*contourIndex = (opus_int8)CBimax;
|
||||
|
@ -575,12 +535,21 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
/* Calculates the correlations used in stage 3 search. In order to cover */
|
||||
/* the whole lag codebook for all the searched offset lags (lag +- 2), */
|
||||
/*************************************************************************/
|
||||
void silk_P_Ana_calc_corr_st3(
|
||||
opus_int32 cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM correlation array */
|
||||
/***********************************************************************
|
||||
/* Calculates the correlations used in stage 3 search. In order to cover
|
||||
/* the whole lag codebook for all the searched offset lags (lag +- 2),
|
||||
/* the following correlations are needed in each sub frame:
|
||||
/*
|
||||
/* sf1: lag range [-8,...,7] total 16 correlations
|
||||
/* sf2: lag range [-4,...,4] total 9 correlations
|
||||
/* sf3: lag range [-3,....4] total 8 correltions
|
||||
/* sf4: lag range [-6,....8] total 15 correlations
|
||||
/*
|
||||
/* In total 48 correlations. The direct implementation computed in worst
|
||||
/* case 4*12*5 = 240 correlations, but more likely around 120.
|
||||
/***********************************************************************/
|
||||
static void silk_P_Ana_calc_corr_st3(
|
||||
opus_int32 cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM correlation array */
|
||||
const opus_int16 frame[], /* I vector to correlate */
|
||||
opus_int start_lag, /* I lag offset to search around */
|
||||
opus_int sf_length, /* I length of a 5 ms subframe */
|
||||
|
@ -620,7 +589,7 @@ void silk_P_Ana_calc_corr_st3(
|
|||
lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
|
||||
for( j = lag_low; j <= lag_high; j++ ) {
|
||||
basis_ptr = target_ptr - ( start_lag + j );
|
||||
cross_corr = silk_inner_prod_aligned( (opus_int16*)target_ptr, (opus_int16*)basis_ptr, sf_length );
|
||||
cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length );
|
||||
silk_assert( lag_counter < SCRATCH_SIZE );
|
||||
scratch_mem[ lag_counter ] = cross_corr;
|
||||
lag_counter++;
|
||||
|
@ -645,13 +614,13 @@ void silk_P_Ana_calc_corr_st3(
|
|||
/* Calculate the energies for first two subframes. The energies are */
|
||||
/* calculated recursively. */
|
||||
/********************************************************************/
|
||||
void silk_P_Ana_calc_energy_st3(
|
||||
opus_int32 energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM energy array */
|
||||
const opus_int16 frame[], /* I vector to calc energy in */
|
||||
opus_int start_lag, /* I lag offset to search around */
|
||||
opus_int sf_length, /* I length of one 5 ms subframe */
|
||||
opus_int nb_subfr, /* I number of subframes */
|
||||
opus_int complexity /* I Complexity setting */
|
||||
static void silk_P_Ana_calc_energy_st3(
|
||||
opus_int32 energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM energy array */
|
||||
const opus_int16 frame[], /* I vector to calc energy in */
|
||||
opus_int start_lag, /* I lag offset to search around */
|
||||
opus_int sf_length, /* I length of one 5 ms subframe */
|
||||
opus_int nb_subfr, /* I number of subframes */
|
||||
opus_int complexity /* I Complexity setting */
|
||||
)
|
||||
{
|
||||
const opus_int16 *target_ptr, *basis_ptr;
|
||||
|
@ -716,30 +685,3 @@ void silk_P_Ana_calc_energy_st3(
|
|||
target_ptr += sf_length;
|
||||
}
|
||||
}
|
||||
|
||||
opus_int32 silk_P_Ana_find_scaling(
|
||||
const opus_int16 *frame,
|
||||
const opus_int frame_length,
|
||||
const opus_int sum_sqr_len
|
||||
)
|
||||
{
|
||||
opus_int32 nbits, x_max;
|
||||
|
||||
x_max = silk_int16_array_maxabs( frame, frame_length );
|
||||
|
||||
if( x_max < silk_int16_MAX ) {
|
||||
/* Number of bits needed for the sum of the squares */
|
||||
nbits = 32 - silk_CLZ32( silk_SMULBB( x_max, x_max ) );
|
||||
} else {
|
||||
/* Here we don't know if x_max should have been silk_int16_MAX + 1, so we expect the worst case */
|
||||
nbits = 30;
|
||||
}
|
||||
nbits += 17 - silk_CLZ16( sum_sqr_len );
|
||||
|
||||
/* Without a guarantee of saturation, we need to keep the 31st bit free */
|
||||
if( nbits < 31 ) {
|
||||
return 0;
|
||||
} else {
|
||||
return( nbits - 30 );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,34 +94,3 @@ opus_int64 silk_inner_prod16_aligned_64(
|
|||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* Function that returns the maximum absolut value of the input vector */
|
||||
opus_int16 silk_int16_array_maxabs( /* O Maximum absolute value, max: 2^15-1 */
|
||||
const opus_int16 *vec, /* I Input vector [len] */
|
||||
const opus_int32 len /* I Length of input vector */
|
||||
)
|
||||
{
|
||||
opus_int32 max = 0, i, lvl = 0, ind;
|
||||
if( len == 0 ) return 0;
|
||||
|
||||
ind = len - 1;
|
||||
max = silk_SMULBB( vec[ ind ], vec[ ind ] );
|
||||
for( i = len - 2; i >= 0; i-- ) {
|
||||
lvl = silk_SMULBB( vec[ i ], vec[ i ] );
|
||||
if( lvl > max ) {
|
||||
max = lvl;
|
||||
ind = i;
|
||||
}
|
||||
}
|
||||
|
||||
/* Do not return 32768, as it will not fit in an int16 so may lead to problems later on */
|
||||
if( max >= 1073676289 ) { /* (2^15-1)^2 = 1073676289 */
|
||||
return( silk_int16_MAX );
|
||||
} else {
|
||||
if( vec[ ind ] < 0 ) {
|
||||
return( -vec[ ind ] );
|
||||
} else {
|
||||
return( vec[ ind ] );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,6 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "pitch_est_defines.h"
|
||||
|
||||
#define SCRATCH_SIZE 22
|
||||
#define eps 1.192092896e-07f
|
||||
|
||||
/************************************************************/
|
||||
/* Internally used functions */
|
||||
|
@ -129,8 +128,6 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
max_lag_4kHz = PE_MAX_LAG_MS * 4;
|
||||
max_lag_8kHz = PE_MAX_LAG_MS * 8 - 1;
|
||||
|
||||
silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));
|
||||
|
||||
/* Resample from input sampled at Fs_kHz to 8 kHz */
|
||||
if( Fs_kHz == 16 ) {
|
||||
/* Resample to 16 -> 8 khz */
|
||||
|
@ -164,6 +161,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
/******************************************************************************
|
||||
* FIRST STAGE, operating in 4 khz
|
||||
******************************************************************************/
|
||||
silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));
|
||||
target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];
|
||||
for( k = 0; k < nb_subfr >> 1; k++ ) {
|
||||
/* Check that we are within range of the array */
|
||||
|
@ -178,12 +176,14 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
|
||||
/* Calculate first vector products before loop */
|
||||
cross_corr = silk_inner_product_FLP( target_ptr, basis_ptr, sf_length_8kHz );
|
||||
normalizer = silk_energy_FLP( basis_ptr, sf_length_8kHz ) + sf_length_8kHz * 4000.0f;
|
||||
normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) +
|
||||
silk_energy_FLP( basis_ptr, sf_length_8kHz ) +
|
||||
sf_length_8kHz * 4000.0f;
|
||||
|
||||
C[ 0 ][ min_lag_4kHz ] += (silk_float)(cross_corr / sqrt(normalizer));
|
||||
C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer );
|
||||
|
||||
/* From now on normalizer is computed recursively */
|
||||
for(d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++) {
|
||||
for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {
|
||||
basis_ptr--;
|
||||
|
||||
/* Check that we are within range of the array */
|
||||
|
@ -196,7 +196,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
normalizer +=
|
||||
basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] -
|
||||
basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ];
|
||||
C[ 0 ][ d ] += (silk_float)(cross_corr / sqrt( normalizer ));
|
||||
C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer );
|
||||
}
|
||||
/* Update target pointer */
|
||||
target_ptr += sf_length_8kHz;
|
||||
|
@ -214,13 +214,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
|
||||
/* Escape if correlation is very low already here */
|
||||
Cmax = C[ 0 ][ min_lag_4kHz ];
|
||||
target_ptr = &frame_4kHz[ silk_SMULBB( sf_length_4kHz, nb_subfr ) ];
|
||||
energy = 1000.0f;
|
||||
for( i = 0; i < silk_LSHIFT( sf_length_4kHz, 2 ); i++ ) {
|
||||
energy += target_ptr[i] * (double)target_ptr[i];
|
||||
}
|
||||
threshold = Cmax * Cmax;
|
||||
if( energy / 16.0f > threshold ) {
|
||||
if( Cmax < 0.2f ) {
|
||||
silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
|
||||
*LTPCorr = 0.0f;
|
||||
*lagIndex = 0;
|
||||
|
@ -287,14 +281,14 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
|
||||
}
|
||||
for( k = 0; k < nb_subfr; k++ ) {
|
||||
energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz );
|
||||
energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0;
|
||||
for( j = 0; j < length_d_comp; j++ ) {
|
||||
d = d_comp[ j ];
|
||||
basis_ptr = target_ptr - d;
|
||||
cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz );
|
||||
energy = silk_energy_FLP( basis_ptr, sf_length_8kHz );
|
||||
if( cross_corr > 0.0f ) {
|
||||
C[ k ][ d ] = (silk_float)(cross_corr * cross_corr / (energy * energy_tmp + eps));
|
||||
energy = silk_energy_FLP( basis_ptr, sf_length_8kHz );
|
||||
C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) );
|
||||
} else {
|
||||
C[ k ][ d ] = 0.0f;
|
||||
}
|
||||
|
@ -317,7 +311,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
} else if( Fs_kHz == 16 ) {
|
||||
prevLag = silk_RSHIFT( prevLag, 1 );
|
||||
}
|
||||
prevLag_log2 = silk_log2((silk_float)prevLag);
|
||||
prevLag_log2 = silk_log2( (silk_float)prevLag );
|
||||
} else {
|
||||
prevLag_log2 = 0;
|
||||
}
|
||||
|
@ -356,23 +350,20 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
CBimax_new = i;
|
||||
}
|
||||
}
|
||||
CCmax_new = silk_max_float(CCmax_new, 0.0f); /* To avoid taking square root of negative number later */
|
||||
CCmax_new_b = CCmax_new;
|
||||
|
||||
/* Bias towards shorter lags */
|
||||
lag_log2 = silk_log2((silk_float)d);
|
||||
CCmax_new_b -= PE_SHORTLAG_BIAS * nb_subfr * lag_log2;
|
||||
lag_log2 = silk_log2( (silk_float)d );
|
||||
CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2;
|
||||
|
||||
/* Bias towards previous lag */
|
||||
if( prevLag > 0 ) {
|
||||
delta_lag_log2_sqr = lag_log2 - prevLag_log2;
|
||||
delta_lag_log2_sqr *= delta_lag_log2_sqr;
|
||||
CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / (delta_lag_log2_sqr + 0.5f);
|
||||
CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f );
|
||||
}
|
||||
|
||||
if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */
|
||||
CCmax_new > nb_subfr * search_thres2 * search_thres2 && /* Correlation needs to be high enough to be voiced */
|
||||
silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= min_lag_8kHz /* Lag must be in range */
|
||||
if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */
|
||||
CCmax_new > nb_subfr * search_thres2 /* Correlation needs to be high enough to be voiced */
|
||||
) {
|
||||
CCmax_b = CCmax_new_b;
|
||||
CCmax = CCmax_new;
|
||||
|
@ -390,6 +381,10 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Output normalized correlation */
|
||||
*LTPCorr = (silk_float)( CCmax / nb_subfr );
|
||||
silk_assert( *LTPCorr >= 0.0f );
|
||||
|
||||
if( Fs_kHz > 8 ) {
|
||||
/* Search in original signal */
|
||||
|
||||
|
@ -406,8 +401,6 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
end_lag = silk_min_int( lag + 2, max_lag );
|
||||
lag_new = lag; /* to avoid undefined lag */
|
||||
CBimax = 0; /* to avoid undefined lag */
|
||||
silk_assert( CCmax >= 0.0f );
|
||||
*LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */
|
||||
|
||||
CCmax = -1000.0f;
|
||||
|
||||
|
@ -430,25 +423,25 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
|
||||
}
|
||||
|
||||
target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
|
||||
energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0;
|
||||
for( d = start_lag; d <= end_lag; d++ ) {
|
||||
for( j = 0; j < nb_cbk_search; j++ ) {
|
||||
cross_corr = 0.0;
|
||||
energy = eps;
|
||||
energy = energy_tmp;
|
||||
for( k = 0; k < nb_subfr; k++ ) {
|
||||
energy += energies_st3[ k ][ j ][ lag_counter ];
|
||||
cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ];
|
||||
energy += energies_st3[ k ][ j ][ lag_counter ];
|
||||
}
|
||||
if( cross_corr > 0.0 ) {
|
||||
CCmax_new = (silk_float)(cross_corr * cross_corr / energy);
|
||||
CCmax_new = (silk_float)( 2 * cross_corr / energy );
|
||||
/* Reduce depending on flatness of contour */
|
||||
CCmax_new *= 1.0f - contour_bias * j;
|
||||
} else {
|
||||
CCmax_new = 0.0f;
|
||||
}
|
||||
|
||||
if( CCmax_new > CCmax &&
|
||||
( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag
|
||||
) {
|
||||
if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
|
||||
CCmax = CCmax_new;
|
||||
lag_new = d;
|
||||
CBimax = j;
|
||||
|
@ -464,12 +457,10 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
*lagIndex = (opus_int16)( lag_new - min_lag );
|
||||
*contourIndex = (opus_int8)CBimax;
|
||||
} else { /* Fs_kHz == 8 */
|
||||
/* Save Lags and correlation */
|
||||
silk_assert( CCmax >= 0.0f );
|
||||
*LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */
|
||||
/* Save Lags */
|
||||
for( k = 0; k < nb_subfr; k++ ) {
|
||||
pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
|
||||
pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * Fs_kHz );
|
||||
pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );
|
||||
}
|
||||
*lagIndex = (opus_int16)( lag - min_lag_8kHz );
|
||||
*contourIndex = (opus_int8)CBimax;
|
||||
|
@ -479,6 +470,19 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
/* Calculates the correlations used in stage 3 search. In order to cover
|
||||
/* the whole lag codebook for all the searched offset lags (lag +- 2),
|
||||
/* the following correlations are needed in each sub frame:
|
||||
/*
|
||||
/* sf1: lag range [-8,...,7] total 16 correlations
|
||||
/* sf2: lag range [-4,...,4] total 9 correlations
|
||||
/* sf3: lag range [-3,....4] total 8 correltions
|
||||
/* sf4: lag range [-6,....8] total 15 correlations
|
||||
/*
|
||||
/* In total 48 correlations. The direct implementation computed in worst
|
||||
/* case 4*12*5 = 240 correlations, but more likely around 120.
|
||||
/***********************************************************************/
|
||||
static void silk_P_Ana_calc_corr_st3(
|
||||
silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
|
||||
const silk_float frame[], /* I vector to correlate */
|
||||
|
@ -487,19 +491,6 @@ static void silk_P_Ana_calc_corr_st3(
|
|||
opus_int nb_subfr, /* I number of subframes */
|
||||
opus_int complexity /* I Complexity setting */
|
||||
)
|
||||
/***********************************************************************
|
||||
Calculates the correlations used in stage 3 search. In order to cover
|
||||
the whole lag codebook for all the searched offset lags (lag +- 2),
|
||||
the following correlations are needed in each sub frame:
|
||||
|
||||
sf1: lag range [-8,...,7] total 16 correlations
|
||||
sf2: lag range [-4,...,4] total 9 correlations
|
||||
sf3: lag range [-3,....4] total 8 correltions
|
||||
sf4: lag range [-6,....8] total 15 correlations
|
||||
|
||||
In total 48 correlations. The direct implementation computed in worst case
|
||||
4*12*5 = 240 correlations, but more likely around 120.
|
||||
**********************************************************************/
|
||||
{
|
||||
const silk_float *target_ptr, *basis_ptr;
|
||||
opus_int i, j, k, lag_counter, lag_low, lag_high;
|
||||
|
@ -552,6 +543,10 @@ static void silk_P_Ana_calc_corr_st3(
|
|||
}
|
||||
}
|
||||
|
||||
/********************************************************************/
|
||||
/* Calculate the energies for first two subframes. The energies are */
|
||||
/* calculated recursively. */
|
||||
/********************************************************************/
|
||||
static void silk_P_Ana_calc_energy_st3(
|
||||
silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
|
||||
const silk_float frame[], /* I vector to correlate */
|
||||
|
@ -560,10 +555,6 @@ static void silk_P_Ana_calc_energy_st3(
|
|||
opus_int nb_subfr, /* I number of subframes */
|
||||
opus_int complexity /* I Complexity setting */
|
||||
)
|
||||
/****************************************************************
|
||||
Calculate the energies for first two subframes. The energies are
|
||||
calculated recursively.
|
||||
****************************************************************/
|
||||
{
|
||||
const silk_float *target_ptr, *basis_ptr;
|
||||
double energy;
|
||||
|
|
|
@ -155,7 +155,7 @@ void silk_NSQ_wrapper_FLP(
|
|||
|
||||
/* Convert input to fix */
|
||||
for( i = 0; i < psEnc->sCmn.frame_length; i++ ) {
|
||||
x_Q3[ i ] = silk_float2int( 8.0 * x[ i ] );
|
||||
x_Q3[ i ] = silk_float2int( 8.0f * x[ i ] );
|
||||
}
|
||||
|
||||
/* Call NSQ */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue