From 31a8028e9786fd1f463e797f979feb7df3a96947 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Wed, 14 Jun 2023 01:34:14 -0400 Subject: [PATCH] AVX version of celt_pitch_xcorr() Not used by anything yet --- Makefile.am | 8 ++++ celt/x86/pitch_avx.c | 97 ++++++++++++++++++++++++++++++++++++++++++++ celt_sources.mk | 3 ++ 3 files changed, 108 insertions(+) create mode 100644 celt/x86/pitch_avx.c diff --git a/Makefile.am b/Makefile.am index cfe74016..2a3cef2c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -52,6 +52,9 @@ endif if HAVE_SSE4_1 CELT_SOURCES += $(CELT_SOURCES_SSE4_1) endif +if HAVE_AVX +CELT_SOURCES += $(CELT_SOURCES_AVX) +endif endif if CPU_ARM @@ -392,6 +395,11 @@ SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \ $(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS) endif +if HAVE_AVX +AVX_OBJ = $(CELT_SOURCES_AVX:.c=.lo) +$(AVX_OBJ): CFLAGS += $(OPUS_X86_AVX_CFLAGS) +endif + if HAVE_ARM_NEON_INTR ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \ $(SILK_SOURCES_ARM_NEON_INTR:.c=.lo) \ diff --git a/celt/x86/pitch_avx.c b/celt/x86/pitch_avx.c new file mode 100644 index 00000000..1a667dd7 --- /dev/null +++ b/celt/x86/pitch_avx.c @@ -0,0 +1,97 @@ +/* Copyright (c) 2023 Amazon */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#include +#include "x86cpu.h" +#include "pitch.h" + +/* Like the "regular" xcorr_kernel(), but computes 8 results at a time. */ +static void xcorr_kernel_avx(const float *x, const float *y, float sum[8], int len) +{ + __m256 xsum0, xsum1, xsum2, xsum3, xsum4, xsum5, xsum6, xsum7; + xsum7 = xsum6 = xsum5 = xsum4 = xsum3 = xsum2 = xsum1 = xsum0 = _mm256_setzero_ps(); + int i; + __m256 x0; + /* Compute 8 inner products using partial sums. */ + for (i=0;i0); + (void)arch; + for (i=0;i