First step towards DNN optimization for ARMv7 Neon
Still missing some intrinsics
This commit is contained in:
parent
c9af8f80f7
commit
cc11c078cd
2 changed files with 8 additions and 0 deletions
|
@ -450,6 +450,7 @@ endif
|
||||||
if HAVE_ARM_NEON_INTR
|
if HAVE_ARM_NEON_INTR
|
||||||
ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
|
ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
|
||||||
$(SILK_SOURCES_ARM_NEON_INTR:.c=.lo) \
|
$(SILK_SOURCES_ARM_NEON_INTR:.c=.lo) \
|
||||||
|
$(DNN_SOURCES_NEON:.c=.lo) \
|
||||||
$(SILK_SOURCES_FIXED_ARM_NEON_INTR:.c=.lo)
|
$(SILK_SOURCES_FIXED_ARM_NEON_INTR:.c=.lo)
|
||||||
$(ARM_NEON_INTR_OBJ): CFLAGS += \
|
$(ARM_NEON_INTR_OBJ): CFLAGS += \
|
||||||
$(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS)
|
$(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS)
|
||||||
|
|
|
@ -34,6 +34,13 @@
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
#include "os_support.h"
|
#include "os_support.h"
|
||||||
|
|
||||||
|
#if defined(__arm__) && !defined(__aarch64__)
|
||||||
|
/* Emulate vcvtnq_s32_f32() for ARMv7 Neon. */
|
||||||
|
static OPUS_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t x) {
|
||||||
|
return vrshrq_n_s32(vcvtq_n_s32_f32(x, 8), 8);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef LPCNET_TEST
|
#ifndef LPCNET_TEST
|
||||||
static inline float32x4_t exp4_approx(float32x4_t x) {
|
static inline float32x4_t exp4_approx(float32x4_t x) {
|
||||||
int32x4_t i;
|
int32x4_t i;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue