Start enabling AVX2 silk_inner_product_FLP()

Not yet with rtcd
2023-11-21 02:13:06 -05:00 · 2023-11-21 02:13:06 -05:00 · b93e4a149c
commit b93e4a149c
parent ed90060389
6 changed files with 33 additions and 3 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -39,6 +39,9 @@ SILK_SOURCES += $(SILK_SOURCES_FLOAT)
 if HAVE_SSE4_1
 SILK_SOURCES += $(SILK_SOURCES_SSE4_1)
 endif
+if HAVE_AVX2
+SILK_SOURCES += $(SILK_SOURCES_FLOAT_AVX2)
+endif
 endif

 if DISABLE_FLOAT_API
@ -427,6 +430,7 @@ endif
 if HAVE_AVX2
 AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo) \
           $(SILK_SOURCES_AVX2:.c=.lo) \
+           $(SILK_SOURCES_FLOAT_AVX2:.c=.lo) \
           $(DNN_SOURCES_AVX2:.c=.lo)
 $(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)
 endif
--- a/silk/float/SigProc_FLP.h
+++ b/silk/float/SigProc_FLP.h
@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.

 #include "SigProc_FIX.h"
 #include "float_cast.h"
+#include "main.h"
 #include <math.h>

 #ifdef  __cplusplus
@ -124,12 +125,17 @@ void silk_scale_copy_vector_FLP(
 );

 /* inner product of two silk_float arrays, with result as double */
-double silk_inner_product_FLP(
+double silk_inner_product_FLP_c(
    const silk_float    *data1,
    const silk_float    *data2,
    opus_int            dataSize
 );

+#ifndef OVERRIDE_inner_product_FLP
+#define silk_inner_product_FLP(data1, data2, dataSize) silk_inner_product_FLP_c(data1, data2, dataSize)
+#endif
+
+
 /* sum of squares of a silk_float array, with result as double */
 double silk_energy_FLP(
    const silk_float    *data,
--- a/silk/float/inner_product_FLP.c
+++ b/silk/float/inner_product_FLP.c
@ -32,7 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "SigProc_FLP.h"

 /* inner product of two silk_float arrays, with result as double */
-double silk_inner_product_FLP(
+double silk_inner_product_FLP_c(
    const silk_float    *data1,
    const silk_float    *data2,
    opus_int            dataSize
--- a/silk/float/x86/inner_product_FLP_avx2.c
+++ b/silk/float/x86/inner_product_FLP_avx2.c
@ -35,7 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.


 /* inner product of two silk_float arrays, with result as double */
-double silk_inner_product_FLP(
+double silk_inner_product_FLP_avx2(
    const silk_float    *data1,
    const silk_float    *data2,
    opus_int            dataSize
--- a/silk/x86/main_sse.h
+++ b/silk/x86/main_sse.h
@ -269,5 +269,22 @@ extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(

 #  endif

+double silk_inner_product_FLP_avx2(
+    const silk_float    *data1,
+    const silk_float    *data2,
+    opus_int            dataSize
+);
+
+#if defined (OPUS_X86_PRESUME_AVX2)
+
+#define OVERRIDE_inner_product_FLP
+#define silk_inner_product_FLP(data1, data2, dataSize) silk_inner_product_FLP_avx2(data1, data2, dataSize)
+
+#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_X86_MAY_HAVE_AVX2)
+
+/*#define OVERRIDE_inner_product_FLP*/
+
+#endif
+
 # endif
 #endif
--- a/silk_sources.mk
+++ b/silk_sources.mk
@ -159,3 +159,6 @@ silk/float/scale_copy_vector_FLP.c \
 silk/float/scale_vector_FLP.c \
 silk/float/schur_FLP.c \
 silk/float/sort_FLP.c
+
+SILK_SOURCES_FLOAT_AVX2 = \
+silk/float/x86/inner_product_FLP_avx2.c