From 9a2c0e34cad4d6f81103a8b6560fef69e8cd4047 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Wed, 14 Jun 2023 14:26:20 -0400 Subject: [PATCH] Detect AVX/AVX2/FMA instead of just AVX --- Makefile.am | 10 +++++----- celt/cpu_support.h | 2 +- celt/x86/x86cpu.c | 16 +++++++++++----- celt/x86/x86cpu.h | 6 +++--- celt_sources.mk | 2 +- configure.ac | 40 ++++++++++++++++++++-------------------- 6 files changed, 41 insertions(+), 35 deletions(-) diff --git a/Makefile.am b/Makefile.am index 2a3cef2c..d7c95594 100644 --- a/Makefile.am +++ b/Makefile.am @@ -52,8 +52,8 @@ endif if HAVE_SSE4_1 CELT_SOURCES += $(CELT_SOURCES_SSE4_1) endif -if HAVE_AVX -CELT_SOURCES += $(CELT_SOURCES_AVX) +if HAVE_AVX2 +CELT_SOURCES += $(CELT_SOURCES_AVX2) endif endif @@ -395,9 +395,9 @@ SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \ $(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS) endif -if HAVE_AVX -AVX_OBJ = $(CELT_SOURCES_AVX:.c=.lo) -$(AVX_OBJ): CFLAGS += $(OPUS_X86_AVX_CFLAGS) +if HAVE_AVX2 +AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo) +$(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS) endif if HAVE_ARM_NEON_INTR diff --git a/celt/cpu_support.h b/celt/cpu_support.h index 7b5c56ca..fdd9fb64 100644 --- a/celt/cpu_support.h +++ b/celt/cpu_support.h @@ -47,7 +47,7 @@ ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))) + (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2))) #include "x86/x86cpu.h" /* We currently support 5 x86 variants: diff --git a/celt/x86/x86cpu.c b/celt/x86/x86cpu.c index 6a1914de..2e7c32ae 100644 --- a/celt/x86/x86cpu.c +++ b/celt/x86/x86cpu.c @@ -39,7 +39,7 @@ ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))) + (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2))) #if defined(_MSC_VER) @@ -105,7 +105,7 @@ typedef struct CPU_Feature{ int HW_SSE2; int HW_SSE41; /* SIMD: 256-bit */ - int HW_AVX; + int HW_AVX2; } CPU_Feature; static void opus_cpu_feature_check(CPU_Feature *cpu_feature) @@ -121,13 +121,19 @@ static void opus_cpu_feature_check(CPU_Feature *cpu_feature) cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0; cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0; cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0; - cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0; + cpu_feature->HW_AVX2 = (info[2] & (1 << 28)) != 0 && (info[2] & (1 << 12)) != 0; + if (cpu_feature->HW_AVX2 && nIds >= 7) { + cpuid(info, 7); + cpu_feature->HW_AVX2 = cpu_feature->HW_AVX2 && (info[1] & (1 << 5)) != 0; + } else { + cpu_feature->HW_AVX2 = 0; + } } else { cpu_feature->HW_SSE = 0; cpu_feature->HW_SSE2 = 0; cpu_feature->HW_SSE41 = 0; - cpu_feature->HW_AVX = 0; + cpu_feature->HW_AVX2 = 0; } } @@ -157,7 +163,7 @@ static int opus_select_arch_impl(void) } arch++; - if (!cpu_feature.HW_AVX) + if (!cpu_feature.HW_AVX2) { return arch; } diff --git a/celt/x86/x86cpu.h b/celt/x86/x86cpu.h index 04e80489..8bd69551 100644 --- a/celt/x86/x86cpu.h +++ b/celt/x86/x86cpu.h @@ -46,10 +46,10 @@ # define MAY_HAVE_SSE4_1(name) name ## _c # endif -# if defined(OPUS_X86_MAY_HAVE_AVX) -# define MAY_HAVE_AVX(name) name ## _avx +# if defined(OPUS_X86_MAY_HAVE_AVX2) +# define MAY_HAVE_AVX2(name) name ## _avx # else -# define MAY_HAVE_AVX(name) name ## _c +# define MAY_HAVE_AVX2(name) name ## _c # endif # if defined(OPUS_HAVE_RTCD) diff --git a/celt_sources.mk b/celt_sources.mk index 25d84cd2..6b73d48e 100644 --- a/celt_sources.mk +++ b/celt_sources.mk @@ -33,7 +33,7 @@ CELT_SOURCES_SSE4_1 = \ celt/x86/celt_lpc_sse4_1.c \ celt/x86/pitch_sse4_1.c -CELT_SOURCES_AVX = \ +CELT_SOURCES_AVX2 = \ celt/x86/pitch_avx.c CELT_SOURCES_ARM_RTCD = \ diff --git a/configure.ac b/configure.ac index cb346556..d3a56e16 100644 --- a/configure.ac +++ b/configure.ac @@ -368,12 +368,12 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM], AM_CONDITIONAL([HAVE_SSE], [false]) AM_CONDITIONAL([HAVE_SSE2], [false]) AM_CONDITIONAL([HAVE_SSE4_1], [false]) -AM_CONDITIONAL([HAVE_AVX], [false]) +AM_CONDITIONAL([HAVE_AVX2], [false]) m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse]) m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2]) m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1]) -m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx]) +m4_define([DEFAULT_X86_AVX2_CFLAGS], [-mavx -mfma -avx2]) m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon]) # With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify # -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu), @@ -390,13 +390,13 @@ AS_CASE([$host], AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@]) AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@]) AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@]) -AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@]) +AC_ARG_VAR([X86_AVX2_CFLAGS], [C compiler flags to compile AVX2 intrinsics @<:@default=]DEFAULT_X86_AVX2_CFLAGS[@:>@]) AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@]) AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")]) AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")]) AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")]) -AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")]) +AS_VAR_SET_IF([X86_AVX2_CFLAGS], [], [AS_VAR_SET([X86_AVX2_CFLAGS], "DEFAULT_X86_AVX2_CFLAGS")]) AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])]) AC_DEFUN([OPUS_PATH_NE10], @@ -617,10 +617,10 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ ] ) OPUS_CHECK_INTRINSICS( - [AVX], - [$X86_AVX_CFLAGS], - [OPUS_X86_MAY_HAVE_AVX], - [OPUS_X86_PRESUME_AVX], + [AVX2], + [$X86_AVX2_CFLAGS], + [OPUS_X86_MAY_HAVE_AVX2], + [OPUS_X86_PRESUME_AVX2], [[#include #include ]], @@ -631,10 +631,10 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0)); ]] ) - AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"], + AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1" && test x"$OPUS_X86_PRESUME_AVX2" != x"1"], [ - OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS" - AC_SUBST([OPUS_X86_AVX_CFLAGS]) + OPUS_X86_AVX2_CFLAGS="$X86_AVX2_CFLAGS" + AC_SUBST([OPUS_X86_AVX2_CFLAGS]) ] ) AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""]) @@ -676,17 +676,17 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ [ AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics]) ]) - AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"], + AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"], [ - AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics]) - intrinsics_support="$intrinsics_support AVX" + AC_DEFINE([OPUS_X86_MAY_HAVE_AVX2], 1, [Compiler supports X86 AVX2 Intrinsics]) + intrinsics_support="$intrinsics_support AVX2" - AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"], - [AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])], - [rtcd_support="$rtcd_support AVX"]) + AS_IF([test x"$OPUS_X86_PRESUME_AVX2" = x"1"], + [AC_DEFINE([OPUS_X86_PRESUME_AVX2], 1, [Define if binary requires AVX2 intrinsics support])], + [rtcd_support="$rtcd_support AVX2"]) ], [ - AC_MSG_WARN([Compiler does not support AVX intrinsics]) + AC_MSG_WARN([Compiler does not support AVX2 intrinsics]) ]) AS_IF([test x"$intrinsics_support" = x""], @@ -769,8 +769,8 @@ AM_CONDITIONAL([HAVE_SSE2], [test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"]) AM_CONDITIONAL([HAVE_SSE4_1], [test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"]) -AM_CONDITIONAL([HAVE_AVX], - [test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"]) +AM_CONDITIONAL([HAVE_AVX2], + [test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"]) AM_CONDITIONAL([HAVE_RTCD], [test x"$enable_rtcd" = x"yes" -a x"$rtcd_support" != x"no"])