Detect AVX/AVX2/FMA instead of just AVX

This commit is contained in:
Jean-Marc Valin 2023-06-14 14:26:20 -04:00
parent 31a8028e97
commit 9a2c0e34ca
No known key found for this signature in database
GPG key ID: 531A52533318F00A
6 changed files with 41 additions and 35 deletions

View file

@ -52,8 +52,8 @@ endif
if HAVE_SSE4_1 if HAVE_SSE4_1
CELT_SOURCES += $(CELT_SOURCES_SSE4_1) CELT_SOURCES += $(CELT_SOURCES_SSE4_1)
endif endif
if HAVE_AVX if HAVE_AVX2
CELT_SOURCES += $(CELT_SOURCES_AVX) CELT_SOURCES += $(CELT_SOURCES_AVX2)
endif endif
endif endif
@ -395,9 +395,9 @@ SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \
$(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS) $(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
endif endif
if HAVE_AVX if HAVE_AVX2
AVX_OBJ = $(CELT_SOURCES_AVX:.c=.lo) AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo)
$(AVX_OBJ): CFLAGS += $(OPUS_X86_AVX_CFLAGS) $(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)
endif endif
if HAVE_ARM_NEON_INTR if HAVE_ARM_NEON_INTR

View file

@ -47,7 +47,7 @@
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))) (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
#include "x86/x86cpu.h" #include "x86/x86cpu.h"
/* We currently support 5 x86 variants: /* We currently support 5 x86 variants:

View file

@ -39,7 +39,7 @@
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))) (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
#if defined(_MSC_VER) #if defined(_MSC_VER)
@ -105,7 +105,7 @@ typedef struct CPU_Feature{
int HW_SSE2; int HW_SSE2;
int HW_SSE41; int HW_SSE41;
/* SIMD: 256-bit */ /* SIMD: 256-bit */
int HW_AVX; int HW_AVX2;
} CPU_Feature; } CPU_Feature;
static void opus_cpu_feature_check(CPU_Feature *cpu_feature) static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
@ -121,13 +121,19 @@ static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0; cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0; cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0; cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0; cpu_feature->HW_AVX2 = (info[2] & (1 << 28)) != 0 && (info[2] & (1 << 12)) != 0;
if (cpu_feature->HW_AVX2 && nIds >= 7) {
cpuid(info, 7);
cpu_feature->HW_AVX2 = cpu_feature->HW_AVX2 && (info[1] & (1 << 5)) != 0;
} else {
cpu_feature->HW_AVX2 = 0;
}
} }
else { else {
cpu_feature->HW_SSE = 0; cpu_feature->HW_SSE = 0;
cpu_feature->HW_SSE2 = 0; cpu_feature->HW_SSE2 = 0;
cpu_feature->HW_SSE41 = 0; cpu_feature->HW_SSE41 = 0;
cpu_feature->HW_AVX = 0; cpu_feature->HW_AVX2 = 0;
} }
} }
@ -157,7 +163,7 @@ static int opus_select_arch_impl(void)
} }
arch++; arch++;
if (!cpu_feature.HW_AVX) if (!cpu_feature.HW_AVX2)
{ {
return arch; return arch;
} }

View file

@ -46,10 +46,10 @@
# define MAY_HAVE_SSE4_1(name) name ## _c # define MAY_HAVE_SSE4_1(name) name ## _c
# endif # endif
# if defined(OPUS_X86_MAY_HAVE_AVX) # if defined(OPUS_X86_MAY_HAVE_AVX2)
# define MAY_HAVE_AVX(name) name ## _avx # define MAY_HAVE_AVX2(name) name ## _avx
# else # else
# define MAY_HAVE_AVX(name) name ## _c # define MAY_HAVE_AVX2(name) name ## _c
# endif # endif
# if defined(OPUS_HAVE_RTCD) # if defined(OPUS_HAVE_RTCD)

View file

@ -33,7 +33,7 @@ CELT_SOURCES_SSE4_1 = \
celt/x86/celt_lpc_sse4_1.c \ celt/x86/celt_lpc_sse4_1.c \
celt/x86/pitch_sse4_1.c celt/x86/pitch_sse4_1.c
CELT_SOURCES_AVX = \ CELT_SOURCES_AVX2 = \
celt/x86/pitch_avx.c celt/x86/pitch_avx.c
CELT_SOURCES_ARM_RTCD = \ CELT_SOURCES_ARM_RTCD = \

View file

@ -368,12 +368,12 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM],
AM_CONDITIONAL([HAVE_SSE], [false]) AM_CONDITIONAL([HAVE_SSE], [false])
AM_CONDITIONAL([HAVE_SSE2], [false]) AM_CONDITIONAL([HAVE_SSE2], [false])
AM_CONDITIONAL([HAVE_SSE4_1], [false]) AM_CONDITIONAL([HAVE_SSE4_1], [false])
AM_CONDITIONAL([HAVE_AVX], [false]) AM_CONDITIONAL([HAVE_AVX2], [false])
m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse]) m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse])
m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2]) m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2])
m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1]) m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1])
m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx]) m4_define([DEFAULT_X86_AVX2_CFLAGS], [-mavx -mfma -avx2])
m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon]) m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon])
# With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify # With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify
# -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu), # -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu),
@ -390,13 +390,13 @@ AS_CASE([$host],
AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@]) AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@])
AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@]) AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@])
AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@]) AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@])
AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@]) AC_ARG_VAR([X86_AVX2_CFLAGS], [C compiler flags to compile AVX2 intrinsics @<:@default=]DEFAULT_X86_AVX2_CFLAGS[@:>@])
AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@]) AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@])
AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")]) AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")])
AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")]) AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")])
AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")]) AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")]) AS_VAR_SET_IF([X86_AVX2_CFLAGS], [], [AS_VAR_SET([X86_AVX2_CFLAGS], "DEFAULT_X86_AVX2_CFLAGS")])
AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])]) AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
AC_DEFUN([OPUS_PATH_NE10], AC_DEFUN([OPUS_PATH_NE10],
@ -617,10 +617,10 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
] ]
) )
OPUS_CHECK_INTRINSICS( OPUS_CHECK_INTRINSICS(
[AVX], [AVX2],
[$X86_AVX_CFLAGS], [$X86_AVX2_CFLAGS],
[OPUS_X86_MAY_HAVE_AVX], [OPUS_X86_MAY_HAVE_AVX2],
[OPUS_X86_PRESUME_AVX], [OPUS_X86_PRESUME_AVX2],
[[#include <immintrin.h> [[#include <immintrin.h>
#include <time.h> #include <time.h>
]], ]],
@ -631,10 +631,10 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0)); return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0));
]] ]]
) )
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"], AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1" && test x"$OPUS_X86_PRESUME_AVX2" != x"1"],
[ [
OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS" OPUS_X86_AVX2_CFLAGS="$X86_AVX2_CFLAGS"
AC_SUBST([OPUS_X86_AVX_CFLAGS]) AC_SUBST([OPUS_X86_AVX2_CFLAGS])
] ]
) )
AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""]) AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""])
@ -676,17 +676,17 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
[ [
AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics]) AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics])
]) ])
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"], AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"],
[ [
AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics]) AC_DEFINE([OPUS_X86_MAY_HAVE_AVX2], 1, [Compiler supports X86 AVX2 Intrinsics])
intrinsics_support="$intrinsics_support AVX" intrinsics_support="$intrinsics_support AVX2"
AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"], AS_IF([test x"$OPUS_X86_PRESUME_AVX2" = x"1"],
[AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])], [AC_DEFINE([OPUS_X86_PRESUME_AVX2], 1, [Define if binary requires AVX2 intrinsics support])],
[rtcd_support="$rtcd_support AVX"]) [rtcd_support="$rtcd_support AVX2"])
], ],
[ [
AC_MSG_WARN([Compiler does not support AVX intrinsics]) AC_MSG_WARN([Compiler does not support AVX2 intrinsics])
]) ])
AS_IF([test x"$intrinsics_support" = x""], AS_IF([test x"$intrinsics_support" = x""],
@ -769,8 +769,8 @@ AM_CONDITIONAL([HAVE_SSE2],
[test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"]) [test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"])
AM_CONDITIONAL([HAVE_SSE4_1], AM_CONDITIONAL([HAVE_SSE4_1],
[test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"]) [test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"])
AM_CONDITIONAL([HAVE_AVX], AM_CONDITIONAL([HAVE_AVX2],
[test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"]) [test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"])
AM_CONDITIONAL([HAVE_RTCD], AM_CONDITIONAL([HAVE_RTCD],
[test x"$enable_rtcd" = x"yes" -a x"$rtcd_support" != x"no"]) [test x"$enable_rtcd" = x"yes" -a x"$rtcd_support" != x"no"])