mirror of
https://github.com/xiph/opus.git
synced 2025-06-06 15:30:48 +00:00
Trying to use fma instructions when possible
Compilers sometimes replace vmlaq*() with fmul+fadd instead of fmla. Trying to use vfmaq*() instead when possible.
This commit is contained in:
parent
72cc88dfdd
commit
db26e381a4
3 changed files with 21 additions and 0 deletions
|
@ -97,6 +97,14 @@ void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_va
|
|||
}
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__ARM_FEATURE_FMA) && defined(__ARM_ARCH_ISA_A64)
|
||||
/* If we can, force the compiler to use an FMA instruction rather than break
|
||||
* vmlaq_f32() into fmul/fadd. */
|
||||
#define vmlaq_lane_f32(a,b,c,lane) vfmaq_lane_f32(a,b,c,lane)
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Function: xcorr_kernel_neon_float
|
||||
* ---------------------------------
|
||||
|
|
|
@ -130,6 +130,13 @@ void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus
|
|||
|
||||
/* ========================================================================== */
|
||||
|
||||
#ifdef __ARM_FEATURE_FMA
|
||||
/* If we can, force the compiler to use an FMA instruction rather than break
|
||||
vmlaq_f32() into fmul/fadd. */
|
||||
#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef OPUS_CHECK_ASM
|
||||
|
||||
/* This part of code simulates floating-point NEON operations. */
|
||||
|
|
|
@ -49,6 +49,12 @@ static OPUS_INLINE int16x8_t vmull_high_s8(int8x16_t a, int8x16_t b) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef __ARM_FEATURE_FMA
|
||||
/* If we can, force the compiler to use an FMA instruction rather than break
|
||||
vmlaq_f32() into fmul/fadd. */
|
||||
#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
|
||||
#endif
|
||||
|
||||
#ifndef LPCNET_TEST
|
||||
static inline float32x4_t exp4_approx(float32x4_t x) {
|
||||
int32x4_t i;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue