Trying to use fma instructions when possible

Compilers sometimes replace vmlaq*() with fmul+fadd instead of fmla.
Trying to use vfmaq*() instead when possible.
This commit is contained in:
Jean-Marc Valin 2023-11-28 14:16:57 -05:00
parent 72cc88dfdd
commit db26e381a4
No known key found for this signature in database
GPG key ID: 531A52533318F00A
3 changed files with 21 additions and 0 deletions

View file

@ -97,6 +97,14 @@ void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_va
}
#else
#if defined(__ARM_FEATURE_FMA) && defined(__ARM_ARCH_ISA_A64)
/* If we can, force the compiler to use an FMA instruction rather than break
* vmlaq_f32() into fmul/fadd. */
#define vmlaq_lane_f32(a,b,c,lane) vfmaq_lane_f32(a,b,c,lane)
#endif
/*
* Function: xcorr_kernel_neon_float
* ---------------------------------

View file

@ -130,6 +130,13 @@ void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus
/* ========================================================================== */
#ifdef __ARM_FEATURE_FMA
/* If we can, force the compiler to use an FMA instruction rather than break
vmlaq_f32() into fmul/fadd. */
#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
#endif
#ifdef OPUS_CHECK_ASM
/* This part of code simulates floating-point NEON operations. */

View file

@ -49,6 +49,12 @@ static OPUS_INLINE int16x8_t vmull_high_s8(int8x16_t a, int8x16_t b) {
}
#endif
#ifdef __ARM_FEATURE_FMA
/* If we can, force the compiler to use an FMA instruction rather than break
vmlaq_f32() into fmul/fadd. */
#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
#endif
#ifndef LPCNET_TEST
static inline float32x4_t exp4_approx(float32x4_t x) {
int32x4_t i;