mirror of
https://github.com/xiph/opus.git
synced 2025-05-14 15:38:32 +00:00
Optimize silk_biquad_alt_stride2() for ARM NEON
The optimization is bit exact with C function. Change-Id: Ifb8f04b19f2d576e79ce5dcfa7e0fc374d71d6c8 Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
This commit is contained in:
parent
60eb7d88b4
commit
43db56225b
8 changed files with 251 additions and 5 deletions
|
@ -48,6 +48,7 @@ extern "C"
|
|||
#endif
|
||||
|
||||
#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
|
||||
#include "arm/biquad_alt_arm.h"
|
||||
#include "arm/LPC_inv_pred_gain_arm.h"
|
||||
#endif
|
||||
|
||||
|
@ -109,7 +110,7 @@ void silk_biquad_alt_stride1(
|
|||
const opus_int32 len /* I signal length (must be even) */
|
||||
);
|
||||
|
||||
void silk_biquad_alt_stride2(
|
||||
void silk_biquad_alt_stride2_c(
|
||||
const opus_int16 *in, /* I input signal */
|
||||
const opus_int32 *B_Q28, /* I MA coefficients [3] */
|
||||
const opus_int32 *A_Q28, /* I AR coefficients [2] */
|
||||
|
@ -158,6 +159,10 @@ void silk_ana_filt_bank_1(
|
|||
const opus_int32 N /* I Number of input samples */
|
||||
);
|
||||
|
||||
#if !defined(OVERRIDE_silk_biquad_alt_stride2)
|
||||
#define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_c(in, B_Q28, A_Q28, S, out, len))
|
||||
#endif
|
||||
|
||||
#if !defined(OVERRIDE_silk_LPC_inverse_pred_gain)
|
||||
#define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), silk_LPC_inverse_pred_gain_c(A_Q12, order))
|
||||
#endif
|
||||
|
|
|
@ -37,6 +37,20 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
|
||||
!defined(OPUS_ARM_PRESUME_NEON_INTR))
|
||||
|
||||
void (*const SILK_BIQUAD_ALT_STRIDE2_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_int16 *in, /* I input signal */
|
||||
const opus_int32 *B_Q28, /* I MA coefficients [3] */
|
||||
const opus_int32 *A_Q28, /* I AR coefficients [2] */
|
||||
opus_int32 *S, /* I/O State vector [4] */
|
||||
opus_int16 *out, /* O output signal */
|
||||
const opus_int32 len /* I signal length (must be even) */
|
||||
) = {
|
||||
silk_biquad_alt_stride2_c, /* ARMv4 */
|
||||
silk_biquad_alt_stride2_c, /* EDSP */
|
||||
silk_biquad_alt_stride2_c, /* Media */
|
||||
MAY_HAVE_NEON(silk_biquad_alt_stride2), /* Neon */
|
||||
};
|
||||
|
||||
opus_int32 (*const SILK_LPC_INVERSE_PRED_GAIN_IMPL[OPUS_ARCHMASK + 1])( /* O Returns inverse prediction gain in energy domain, Q30 */
|
||||
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
|
||||
const opus_int order /* I Prediction order */
|
||||
|
|
68
silk/arm/biquad_alt_arm.h
Normal file
68
silk/arm/biquad_alt_arm.h
Normal file
|
@ -0,0 +1,68 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2017 Google Inc.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef SILK_BIQUAD_ALT_ARM_H
|
||||
# define SILK_BIQUAD_ALT_ARM_H
|
||||
|
||||
# include "celt/arm/armcpu.h"
|
||||
|
||||
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
|
||||
void silk_biquad_alt_stride2_neon(
|
||||
const opus_int16 *in, /* I input signal */
|
||||
const opus_int32 *B_Q28, /* I MA coefficients [3] */
|
||||
const opus_int32 *A_Q28, /* I AR coefficients [2] */
|
||||
opus_int32 *S, /* I/O State vector [4] */
|
||||
opus_int16 *out, /* O output signal */
|
||||
const opus_int32 len /* I signal length (must be even) */
|
||||
);
|
||||
|
||||
# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
|
||||
# define OVERRIDE_silk_biquad_alt_stride2 (1)
|
||||
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), PRESUME_NEON(silk_biquad_alt_stride2)(in, B_Q28, A_Q28, S, out, len))
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# if !defined(OVERRIDE_silk_biquad_alt_stride2)
|
||||
/*Is run-time CPU detection enabled on this platform?*/
|
||||
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
|
||||
extern void (*const SILK_BIQUAD_ALT_STRIDE2_IMPL[OPUS_ARCHMASK+1])(
|
||||
const opus_int16 *in, /* I input signal */
|
||||
const opus_int32 *B_Q28, /* I MA coefficients [3] */
|
||||
const opus_int32 *A_Q28, /* I AR coefficients [2] */
|
||||
opus_int32 *S, /* I/O State vector [4] */
|
||||
opus_int16 *out, /* O output signal */
|
||||
const opus_int32 len /* I signal length (must be even) */
|
||||
);
|
||||
# define OVERRIDE_silk_biquad_alt_stride2 (1)
|
||||
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((*SILK_BIQUAD_ALT_STRIDE2_IMPL[(arch)&OPUS_ARCHMASK])(in, B_Q28, A_Q28, S, out, len))
|
||||
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
|
||||
# define OVERRIDE_silk_biquad_alt_stride2 (1)
|
||||
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_neon(in, B_Q28, A_Q28, S, out, len))
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#endif /* end SILK_BIQUAD_ALT_ARM_H */
|
156
silk/arm/biquad_alt_neon_intr.c
Normal file
156
silk/arm/biquad_alt_neon_intr.c
Normal file
|
@ -0,0 +1,156 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2017 Google Inc.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <arm_neon.h>
|
||||
#ifdef OPUS_CHECK_ASM
|
||||
# include <string.h>
|
||||
# include "stack_alloc.h"
|
||||
#endif
|
||||
#include "SigProc_FIX.h"
|
||||
|
||||
static inline void silk_biquad_alt_stride2_kernel( const int32x4_t A_L_s32x4, const int32x4_t A_U_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2, const int32x4_t in_s32x4, int32x4_t *S_s32x4, int32x2_t *out32_Q14_s32x2 )
|
||||
{
|
||||
int32x4_t t_s32x4, out32_Q14_s32x4;
|
||||
|
||||
*out32_Q14_s32x2 = vadd_s32( vget_low_s32( *S_s32x4 ), t_s32x2 ); /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in{0,1} ) */
|
||||
*S_s32x4 = vcombine_s32( vget_high_s32( *S_s32x4 ), vdup_n_s32( 0 ) ); /* S{0,1} = S{2,3}; S{2,3} = 0; */
|
||||
*out32_Q14_s32x2 = vshl_n_s32( *out32_Q14_s32x2, 2 ); /* out32_Q14_{0,1} = silk_LSHIFT( silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in{0,1} ), 2 ); */
|
||||
out32_Q14_s32x4 = vcombine_s32( *out32_Q14_s32x2, *out32_Q14_s32x2 ); /* out32_Q14_{0,1,0,1} */
|
||||
t_s32x4 = vqdmulhq_s32( out32_Q14_s32x4, A_L_s32x4 ); /* silk_SMULWB( out32_Q14_{0,1,0,1}, A{0,0,1,1}_L_Q28 ) */
|
||||
*S_s32x4 = vrsraq_n_s32( *S_s32x4, t_s32x4, 14 ); /* S{0,1} = S{2,3} + silk_RSHIFT_ROUND(); S{2,3} = silk_RSHIFT_ROUND(); */
|
||||
t_s32x4 = vqdmulhq_s32( out32_Q14_s32x4, A_U_s32x4 ); /* silk_SMULWB( out32_Q14_{0,1,0,1}, A{0,0,1,1}_U_Q28 ) */
|
||||
*S_s32x4 = vaddq_s32( *S_s32x4, t_s32x4 ); /* S0 = silk_SMLAWB( S{0,1,2,3}, out32_Q14_{0,1,0,1}, A{0,0,1,1}_U_Q28 ); */
|
||||
t_s32x4 = vqdmulhq_s32( in_s32x4, B_Q28_s32x4 ); /* silk_SMULWB( B_Q28[ {1,1,2,2} ], in{0,1,0,1} ) */
|
||||
*S_s32x4 = vaddq_s32( *S_s32x4, t_s32x4 ); /* S0 = silk_SMLAWB( S0, B_Q28[ {1,1,2,2} ], in{0,1,0,1} ); */
|
||||
}
|
||||
|
||||
void silk_biquad_alt_stride2_neon(
|
||||
const opus_int16 *in, /* I input signal */
|
||||
const opus_int32 *B_Q28, /* I MA coefficients [3] */
|
||||
const opus_int32 *A_Q28, /* I AR coefficients [2] */
|
||||
opus_int32 *S, /* I/O State vector [4] */
|
||||
opus_int16 *out, /* O output signal */
|
||||
const opus_int32 len /* I signal length (must be even) */
|
||||
)
|
||||
{
|
||||
/* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
|
||||
opus_int k = 0;
|
||||
const int32x2_t offset_s32x2 = vdup_n_s32( (1<<14) - 1 );
|
||||
const int32x4_t offset_s32x4 = vcombine_s32( offset_s32x2, offset_s32x2 );
|
||||
int16x4_t in_s16x4 = vdup_n_s16( 0 );
|
||||
int16x4_t out_s16x4;
|
||||
int32x2_t A_Q28_s32x2, A_L_s32x2, A_U_s32x2, B_Q28_s32x2, t_s32x2;
|
||||
int32x4_t A_L_s32x4, A_U_s32x4, B_Q28_s32x4, S_s32x4, out32_Q14_s32x4;
|
||||
int32x2x2_t t0_s32x2x2, t1_s32x2x2, t2_s32x2x2, S_s32x2x2;
|
||||
|
||||
#ifdef OPUS_CHECK_ASM
|
||||
opus_int32 S_c[ 4 ];
|
||||
VARDECL( opus_int16, out_c );
|
||||
SAVE_STACK;
|
||||
ALLOC( out_c, 2 * len, opus_int16 );
|
||||
|
||||
silk_memcpy( &S_c, S, sizeof( S_c ) );
|
||||
silk_biquad_alt_stride2_c( in, B_Q28, A_Q28, S_c, out_c, len );
|
||||
#endif
|
||||
|
||||
/* Negate A_Q28 values and split in two parts */
|
||||
A_Q28_s32x2 = vld1_s32( A_Q28 );
|
||||
A_Q28_s32x2 = vneg_s32( A_Q28_s32x2 );
|
||||
A_L_s32x2 = vshl_n_s32( A_Q28_s32x2, 18 ); /* ( -A_Q28[] & 0x00003FFF ) << 18 */
|
||||
A_L_s32x2 = vreinterpret_s32_u32( vshr_n_u32( vreinterpret_u32_s32( A_L_s32x2 ), 3 ) ); /* ( -A_Q28[] & 0x00003FFF ) << 15 */
|
||||
A_U_s32x2 = vshr_n_s32( A_Q28_s32x2, 14 ); /* silk_RSHIFT( -A_Q28[], 14 ) */
|
||||
A_U_s32x2 = vshl_n_s32( A_U_s32x2, 16 ); /* silk_RSHIFT( -A_Q28[], 14 ) << 16 (Clip two leading bits to conform to C function.) */
|
||||
A_U_s32x2 = vshr_n_s32( A_U_s32x2, 1 ); /* silk_RSHIFT( -A_Q28[], 14 ) << 15 */
|
||||
|
||||
B_Q28_s32x2 = vld1_s32( B_Q28 );
|
||||
t_s32x2 = vld1_s32( B_Q28 + 1 );
|
||||
t0_s32x2x2 = vzip_s32( A_L_s32x2, A_L_s32x2 );
|
||||
t1_s32x2x2 = vzip_s32( A_U_s32x2, A_U_s32x2 );
|
||||
t2_s32x2x2 = vzip_s32( t_s32x2, t_s32x2 );
|
||||
A_L_s32x4 = vcombine_s32( t0_s32x2x2.val[ 0 ], t0_s32x2x2.val[ 1 ] ); /* A{0,0,1,1}_L_Q28 */
|
||||
A_U_s32x4 = vcombine_s32( t1_s32x2x2.val[ 0 ], t1_s32x2x2.val[ 1 ] ); /* A{0,0,1,1}_U_Q28 */
|
||||
B_Q28_s32x4 = vcombine_s32( t2_s32x2x2.val[ 0 ], t2_s32x2x2.val[ 1 ] ); /* B_Q28[ {1,1,2,2} ] */
|
||||
S_s32x4 = vld1q_s32( S ); /* S0 = S[ 0 ]; S3 = S[ 3 ]; */
|
||||
S_s32x2x2 = vtrn_s32( vget_low_s32( S_s32x4 ), vget_high_s32( S_s32x4 ) ); /* S2 = S[ 1 ]; S1 = S[ 2 ]; */
|
||||
S_s32x4 = vcombine_s32( S_s32x2x2.val[ 0 ], S_s32x2x2.val[ 1 ] );
|
||||
|
||||
for( ; k < len - 1; k += 2 ) {
|
||||
int32x4_t in_s32x4[ 2 ], t_s32x4;
|
||||
int32x2_t out32_Q14_s32x2[ 2 ];
|
||||
|
||||
/* S[ 2 * i + 0 ], S[ 2 * i + 1 ], S[ 2 * i + 2 ], S[ 2 * i + 3 ]: Q12 */
|
||||
in_s16x4 = vld1_s16( &in[ 2 * k ] ); /* in{0,1,2,3} = in[ 2 * k + {0,1,2,3} ]; */
|
||||
in_s32x4[ 0 ] = vshll_n_s16( in_s16x4, 15 ); /* in{0,1,2,3} << 15 */
|
||||
t_s32x4 = vqdmulhq_lane_s32( in_s32x4[ 0 ], B_Q28_s32x2, 0 ); /* silk_SMULWB( B_Q28[ 0 ], in{0,1,2,3} ) */
|
||||
in_s32x4[ 1 ] = vcombine_s32( vget_high_s32( in_s32x4[ 0 ] ), vget_high_s32( in_s32x4[ 0 ] ) ); /* in{2,3,2,3} << 15 */
|
||||
in_s32x4[ 0 ] = vcombine_s32( vget_low_s32 ( in_s32x4[ 0 ] ), vget_low_s32 ( in_s32x4[ 0 ] ) ); /* in{0,1,0,1} << 15 */
|
||||
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, vget_low_s32 ( t_s32x4 ), in_s32x4[ 0 ], &S_s32x4, &out32_Q14_s32x2[ 0 ] );
|
||||
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, vget_high_s32( t_s32x4 ), in_s32x4[ 1 ], &S_s32x4, &out32_Q14_s32x2[ 1 ] );
|
||||
|
||||
/* Scale back to Q0 and saturate */
|
||||
out32_Q14_s32x4 = vcombine_s32( out32_Q14_s32x2[ 0 ], out32_Q14_s32x2[ 1 ] ); /* out32_Q14_{0,1,2,3} */
|
||||
out32_Q14_s32x4 = vaddq_s32( out32_Q14_s32x4, offset_s32x4 ); /* out32_Q14_{0,1,2,3} + (1<<14) - 1 */
|
||||
out_s16x4 = vqshrn_n_s32( out32_Q14_s32x4, 14 ); /* (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,2,3} + (1<<14) - 1, 14 ) ) */
|
||||
vst1_s16( &out[ 2 * k ], out_s16x4 ); /* out[ 2 * k + {0,1,2,3} ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,2,3} + (1<<14) - 1, 14 ) ); */
|
||||
}
|
||||
|
||||
/* Process leftover. */
|
||||
if( k < len ) {
|
||||
int32x4_t in_s32x4;
|
||||
int32x2_t out32_Q14_s32x2;
|
||||
|
||||
/* S[ 2 * i + 0 ], S[ 2 * i + 1 ]: Q12 */
|
||||
in_s16x4 = vld1_lane_s16( &in[ 2 * k + 0 ], in_s16x4, 0 ); /* in{0,1} = in[ 2 * k + {0,1} ]; */
|
||||
in_s16x4 = vld1_lane_s16( &in[ 2 * k + 1 ], in_s16x4, 1 ); /* in{0,1} = in[ 2 * k + {0,1} ]; */
|
||||
in_s32x4 = vshll_n_s16( in_s16x4, 15 ); /* in{0,1} << 15 */
|
||||
t_s32x2 = vqdmulh_lane_s32( vget_low_s32( in_s32x4 ), B_Q28_s32x2, 0 ); /* silk_SMULWB( B_Q28[ 0 ], in{0,1} ) */
|
||||
in_s32x4 = vcombine_s32( vget_low_s32( in_s32x4 ), vget_low_s32( in_s32x4 ) ); /* in{0,1,0,1} << 15 */
|
||||
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, t_s32x2, in_s32x4, &S_s32x4, &out32_Q14_s32x2 );
|
||||
|
||||
/* Scale back to Q0 and saturate */
|
||||
out32_Q14_s32x2 = vadd_s32( out32_Q14_s32x2, offset_s32x2 ); /* out32_Q14_{0,1} + (1<<14) - 1 */
|
||||
out32_Q14_s32x4 = vcombine_s32( out32_Q14_s32x2, out32_Q14_s32x2 ); /* out32_Q14_{0,1,0,1} + (1<<14) - 1 */
|
||||
out_s16x4 = vqshrn_n_s32( out32_Q14_s32x4, 14 ); /* (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,0,1} + (1<<14) - 1, 14 ) ) */
|
||||
vst1_lane_s16( &out[ 2 * k + 0 ], out_s16x4, 0 ); /* out[ 2 * k + 0 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_0 + (1<<14) - 1, 14 ) ); */
|
||||
vst1_lane_s16( &out[ 2 * k + 1 ], out_s16x4, 1 ); /* out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_1 + (1<<14) - 1, 14 ) ); */
|
||||
}
|
||||
|
||||
vst1q_lane_s32( &S[ 0 ], S_s32x4, 0 ); /* S[ 0 ] = S0; */
|
||||
vst1q_lane_s32( &S[ 1 ], S_s32x4, 2 ); /* S[ 1 ] = S2; */
|
||||
vst1q_lane_s32( &S[ 2 ], S_s32x4, 1 ); /* S[ 2 ] = S1; */
|
||||
vst1q_lane_s32( &S[ 3 ], S_s32x4, 3 ); /* S[ 3 ] = S3; */
|
||||
|
||||
#ifdef OPUS_CHECK_ASM
|
||||
silk_assert( !memcmp( S_c, S, sizeof( S_c ) ) );
|
||||
silk_assert( !memcmp( out_c, out, 2 * len * sizeof( opus_int16 ) ) );
|
||||
RESTORE_STACK;
|
||||
#endif
|
||||
}
|
|
@ -76,7 +76,7 @@ void silk_biquad_alt_stride1(
|
|||
}
|
||||
}
|
||||
|
||||
void silk_biquad_alt_stride2(
|
||||
void silk_biquad_alt_stride2_c(
|
||||
const opus_int16 *in, /* I input signal */
|
||||
const opus_int32 *B_Q28, /* I MA coefficients [3] */
|
||||
const opus_int32 *A_Q28, /* I AR coefficients [2] */
|
||||
|
|
|
@ -22,6 +22,7 @@ silk/resampler_rom.h \
|
|||
silk/resampler_structs.h \
|
||||
silk/SigProc_FIX.h \
|
||||
silk/x86/SigProc_FIX_sse.h \
|
||||
silk/arm/biquad_alt_arm.h \
|
||||
silk/arm/LPC_inv_pred_gain_arm.h \
|
||||
silk/arm/macros_armv4.h \
|
||||
silk/arm/macros_armv5e.h \
|
||||
|
|
|
@ -85,6 +85,7 @@ silk/x86/VQ_WMat_EC_sse.c
|
|||
|
||||
SILK_SOURCES_ARM_NEON_INTR = \
|
||||
silk/arm/arm_silk_map.c \
|
||||
silk/arm/biquad_alt_neon_intr.c \
|
||||
silk/arm/LPC_inv_pred_gain_neon_intr.c \
|
||||
silk/arm/NSQ_del_dec_neon_intr.c \
|
||||
silk/arm/NSQ_neon.c
|
||||
|
|
|
@ -341,10 +341,11 @@ static void silk_biquad_float(
|
|||
}
|
||||
#endif
|
||||
|
||||
static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
|
||||
static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs, int arch)
|
||||
{
|
||||
opus_int32 B_Q28[ 3 ], A_Q28[ 2 ];
|
||||
opus_int32 Fc_Q19, r_Q28, r_Q22;
|
||||
(void)arch;
|
||||
|
||||
silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) );
|
||||
Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 );
|
||||
|
@ -367,7 +368,7 @@ static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *ou
|
|||
if( channels == 1 ) {
|
||||
silk_biquad_alt_stride1( in, B_Q28, A_Q28, hp_mem, out, len );
|
||||
} else {
|
||||
silk_biquad_alt_stride2( in, B_Q28, A_Q28, hp_mem, out, len );
|
||||
silk_biquad_alt_stride2( in, B_Q28, A_Q28, hp_mem, out, len, arch );
|
||||
}
|
||||
#else
|
||||
silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels );
|
||||
|
@ -1609,7 +1610,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
|
||||
if (st->application == OPUS_APPLICATION_VOIP)
|
||||
{
|
||||
hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
|
||||
hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch);
|
||||
} else {
|
||||
dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue