mirror of
https://github.com/xiph/opus.git
synced 2025-05-14 15:38:32 +00:00
Detect AVX/AVX2/FMA instead of just AVX
This commit is contained in:
parent
31a8028e97
commit
9a2c0e34ca
6 changed files with 41 additions and 35 deletions
10
Makefile.am
10
Makefile.am
|
@ -52,8 +52,8 @@ endif
|
|||
if HAVE_SSE4_1
|
||||
CELT_SOURCES += $(CELT_SOURCES_SSE4_1)
|
||||
endif
|
||||
if HAVE_AVX
|
||||
CELT_SOURCES += $(CELT_SOURCES_AVX)
|
||||
if HAVE_AVX2
|
||||
CELT_SOURCES += $(CELT_SOURCES_AVX2)
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -395,9 +395,9 @@ SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \
|
|||
$(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
|
||||
endif
|
||||
|
||||
if HAVE_AVX
|
||||
AVX_OBJ = $(CELT_SOURCES_AVX:.c=.lo)
|
||||
$(AVX_OBJ): CFLAGS += $(OPUS_X86_AVX_CFLAGS)
|
||||
if HAVE_AVX2
|
||||
AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo)
|
||||
$(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)
|
||||
endif
|
||||
|
||||
if HAVE_ARM_NEON_INTR
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
|
||||
(defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
|
||||
|
||||
#include "x86/x86cpu.h"
|
||||
/* We currently support 5 x86 variants:
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
|
||||
(defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
|
@ -105,7 +105,7 @@ typedef struct CPU_Feature{
|
|||
int HW_SSE2;
|
||||
int HW_SSE41;
|
||||
/* SIMD: 256-bit */
|
||||
int HW_AVX;
|
||||
int HW_AVX2;
|
||||
} CPU_Feature;
|
||||
|
||||
static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
|
||||
|
@ -121,13 +121,19 @@ static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
|
|||
cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
|
||||
cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
|
||||
cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
|
||||
cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
|
||||
cpu_feature->HW_AVX2 = (info[2] & (1 << 28)) != 0 && (info[2] & (1 << 12)) != 0;
|
||||
if (cpu_feature->HW_AVX2 && nIds >= 7) {
|
||||
cpuid(info, 7);
|
||||
cpu_feature->HW_AVX2 = cpu_feature->HW_AVX2 && (info[1] & (1 << 5)) != 0;
|
||||
} else {
|
||||
cpu_feature->HW_AVX2 = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
cpu_feature->HW_SSE = 0;
|
||||
cpu_feature->HW_SSE2 = 0;
|
||||
cpu_feature->HW_SSE41 = 0;
|
||||
cpu_feature->HW_AVX = 0;
|
||||
cpu_feature->HW_AVX2 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -157,7 +163,7 @@ static int opus_select_arch_impl(void)
|
|||
}
|
||||
arch++;
|
||||
|
||||
if (!cpu_feature.HW_AVX)
|
||||
if (!cpu_feature.HW_AVX2)
|
||||
{
|
||||
return arch;
|
||||
}
|
||||
|
|
|
@ -46,10 +46,10 @@
|
|||
# define MAY_HAVE_SSE4_1(name) name ## _c
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_X86_MAY_HAVE_AVX)
|
||||
# define MAY_HAVE_AVX(name) name ## _avx
|
||||
# if defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||
# define MAY_HAVE_AVX2(name) name ## _avx
|
||||
# else
|
||||
# define MAY_HAVE_AVX(name) name ## _c
|
||||
# define MAY_HAVE_AVX2(name) name ## _c
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_HAVE_RTCD)
|
||||
|
|
|
@ -33,7 +33,7 @@ CELT_SOURCES_SSE4_1 = \
|
|||
celt/x86/celt_lpc_sse4_1.c \
|
||||
celt/x86/pitch_sse4_1.c
|
||||
|
||||
CELT_SOURCES_AVX = \
|
||||
CELT_SOURCES_AVX2 = \
|
||||
celt/x86/pitch_avx.c
|
||||
|
||||
CELT_SOURCES_ARM_RTCD = \
|
||||
|
|
40
configure.ac
40
configure.ac
|
@ -368,12 +368,12 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM],
|
|||
AM_CONDITIONAL([HAVE_SSE], [false])
|
||||
AM_CONDITIONAL([HAVE_SSE2], [false])
|
||||
AM_CONDITIONAL([HAVE_SSE4_1], [false])
|
||||
AM_CONDITIONAL([HAVE_AVX], [false])
|
||||
AM_CONDITIONAL([HAVE_AVX2], [false])
|
||||
|
||||
m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse])
|
||||
m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2])
|
||||
m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1])
|
||||
m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx])
|
||||
m4_define([DEFAULT_X86_AVX2_CFLAGS], [-mavx -mfma -avx2])
|
||||
m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon])
|
||||
# With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify
|
||||
# -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu),
|
||||
|
@ -390,13 +390,13 @@ AS_CASE([$host],
|
|||
AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@])
|
||||
AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@])
|
||||
AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@])
|
||||
AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@])
|
||||
AC_ARG_VAR([X86_AVX2_CFLAGS], [C compiler flags to compile AVX2 intrinsics @<:@default=]DEFAULT_X86_AVX2_CFLAGS[@:>@])
|
||||
AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@])
|
||||
|
||||
AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")])
|
||||
AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")])
|
||||
AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
|
||||
AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")])
|
||||
AS_VAR_SET_IF([X86_AVX2_CFLAGS], [], [AS_VAR_SET([X86_AVX2_CFLAGS], "DEFAULT_X86_AVX2_CFLAGS")])
|
||||
AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
|
||||
|
||||
AC_DEFUN([OPUS_PATH_NE10],
|
||||
|
@ -617,10 +617,10 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
|
|||
]
|
||||
)
|
||||
OPUS_CHECK_INTRINSICS(
|
||||
[AVX],
|
||||
[$X86_AVX_CFLAGS],
|
||||
[OPUS_X86_MAY_HAVE_AVX],
|
||||
[OPUS_X86_PRESUME_AVX],
|
||||
[AVX2],
|
||||
[$X86_AVX2_CFLAGS],
|
||||
[OPUS_X86_MAY_HAVE_AVX2],
|
||||
[OPUS_X86_PRESUME_AVX2],
|
||||
[[#include <immintrin.h>
|
||||
#include <time.h>
|
||||
]],
|
||||
|
@ -631,10 +631,10 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
|
|||
return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0));
|
||||
]]
|
||||
)
|
||||
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"],
|
||||
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1" && test x"$OPUS_X86_PRESUME_AVX2" != x"1"],
|
||||
[
|
||||
OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS"
|
||||
AC_SUBST([OPUS_X86_AVX_CFLAGS])
|
||||
OPUS_X86_AVX2_CFLAGS="$X86_AVX2_CFLAGS"
|
||||
AC_SUBST([OPUS_X86_AVX2_CFLAGS])
|
||||
]
|
||||
)
|
||||
AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""])
|
||||
|
@ -676,17 +676,17 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
|
|||
[
|
||||
AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics])
|
||||
])
|
||||
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"],
|
||||
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"],
|
||||
[
|
||||
AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics])
|
||||
intrinsics_support="$intrinsics_support AVX"
|
||||
AC_DEFINE([OPUS_X86_MAY_HAVE_AVX2], 1, [Compiler supports X86 AVX2 Intrinsics])
|
||||
intrinsics_support="$intrinsics_support AVX2"
|
||||
|
||||
AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"],
|
||||
[AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])],
|
||||
[rtcd_support="$rtcd_support AVX"])
|
||||
AS_IF([test x"$OPUS_X86_PRESUME_AVX2" = x"1"],
|
||||
[AC_DEFINE([OPUS_X86_PRESUME_AVX2], 1, [Define if binary requires AVX2 intrinsics support])],
|
||||
[rtcd_support="$rtcd_support AVX2"])
|
||||
],
|
||||
[
|
||||
AC_MSG_WARN([Compiler does not support AVX intrinsics])
|
||||
AC_MSG_WARN([Compiler does not support AVX2 intrinsics])
|
||||
])
|
||||
|
||||
AS_IF([test x"$intrinsics_support" = x""],
|
||||
|
@ -769,8 +769,8 @@ AM_CONDITIONAL([HAVE_SSE2],
|
|||
[test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"])
|
||||
AM_CONDITIONAL([HAVE_SSE4_1],
|
||||
[test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"])
|
||||
AM_CONDITIONAL([HAVE_AVX],
|
||||
[test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"])
|
||||
AM_CONDITIONAL([HAVE_AVX2],
|
||||
[test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"])
|
||||
|
||||
AM_CONDITIONAL([HAVE_RTCD],
|
||||
[test x"$enable_rtcd" = x"yes" -a x"$rtcd_support" != x"no"])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue