From cc11c078cd8e1baf642ef0f1d2deaa98af596581 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sun, 26 Nov 2023 03:36:46 -0500 Subject: [PATCH] First step towards DNN optimization for ARMv7 Neon Still missing some intrinsics --- Makefile.am | 1 + dnn/vec_neon.h | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/Makefile.am b/Makefile.am index f99e7c31..1b772446 100644 --- a/Makefile.am +++ b/Makefile.am @@ -450,6 +450,7 @@ endif if HAVE_ARM_NEON_INTR ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \ $(SILK_SOURCES_ARM_NEON_INTR:.c=.lo) \ + $(DNN_SOURCES_NEON:.c=.lo) \ $(SILK_SOURCES_FIXED_ARM_NEON_INTR:.c=.lo) $(ARM_NEON_INTR_OBJ): CFLAGS += \ $(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS) diff --git a/dnn/vec_neon.h b/dnn/vec_neon.h index e9959b97..18e4b3a4 100644 --- a/dnn/vec_neon.h +++ b/dnn/vec_neon.h @@ -34,6 +34,13 @@ #include #include "os_support.h" +#if defined(__arm__) && !defined(__aarch64__) +/* Emulate vcvtnq_s32_f32() for ARMv7 Neon. */ +static OPUS_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t x) { + return vrshrq_n_s32(vcvtq_n_s32_f32(x, 8), 8); +} +#endif + #ifndef LPCNET_TEST static inline float32x4_t exp4_approx(float32x4_t x) { int32x4_t i;