From cd4c8249bc0e091789495a09b8942d28b687273c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Zanelli?= Date: Fri, 31 May 2013 15:07:00 +0200 Subject: [PATCH] Add run-time CPU detection and support for ARM architecture Run-time CPU detection (RTCD) is enabled by default if target platform support it. It can be disable at compile time with --disable-rtcd option. Add RTCD support for ARM architecture. Thanks to Timothy B. Terriberry for help and code review Signed-off-by: Timothy B. Terriberry --- Makefile.am | 4 ++ celt/arm/armcpu.c | 166 ++++++++++++++++++++++++++++++++++++++++++++ celt/arm/armcpu.h | 35 ++++++++++ celt/celt_decoder.c | 3 + celt/celt_encoder.c | 4 ++ celt/cpu_support.h | 51 ++++++++++++++ celt_headers.mk | 2 + celt_sources.mk | 3 + configure.ac | 20 ++++++ src/opus_decoder.c | 3 + src/opus_encoder.c | 4 ++ 11 files changed, 295 insertions(+) create mode 100644 celt/arm/armcpu.c create mode 100644 celt/arm/armcpu.h create mode 100644 celt/cpu_support.h diff --git a/Makefile.am b/Makefile.am index f04e3bc8..c9489f4d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,6 +18,10 @@ SILK_SOURCES += $(SILK_SOURCES_FLOAT) OPUS_SOURCES += $(OPUS_SOURCES_FLOAT) endif +if CPU_ARM +CELT_SOURCES += $(CELT_SOURCES_ARM) +endif + include celt_headers.mk include silk_headers.mk include opus_headers.mk diff --git a/celt/arm/armcpu.c b/celt/arm/armcpu.c new file mode 100644 index 00000000..aabcc716 --- /dev/null +++ b/celt/arm/armcpu.c @@ -0,0 +1,166 @@ +/* Copyright (c) 2010 Xiph.Org Foundation + * Copyright (c) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from libtheora modified to suit to Opus */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef OPUS_HAVE_RTCD + +#include "armcpu.h" +#include "cpu_support.h" +#include "os_support.h" +#include "opus_types.h" + +#define OPUS_CPU_ARM_V4 (1) +#define OPUS_CPU_ARM_EDSP (1<<1) +#define OPUS_CPU_ARM_MEDIA (1<<2) +#define OPUS_CPU_ARM_NEON (1<<3) + +#if defined(_MSC_VER) +/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ +# define WIN32_LEAN_AND_MEAN +# define WIN32_EXTRA_LEAN +# include + +static inline opus_uint32 opus_cpu_capabilities(void){ + opus_uint32 flags; + flags=0; + /* MSVC has no inline __asm support for ARM, but it does let you __emit + * instructions via their assembled hex code. + * All of these instructions should be essentially nops. */ +# if defined(ARMv5E_ASM) + __try{ + /*PLD [r13]*/ + __emit(0xF5DDF000); + flags|=OPUS_CPU_ARM_EDSP; + } + __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ + /*Ignore exception.*/ + } +# if defined(ARMv6E_ASM) + __try{ + /*SHADD8 r3,r3,r3*/ + __emit(0xE6333F93); + flags|=OPUS_CPU_ARM_MEDIA; + } + __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ + /*Ignore exception.*/ + } +# if defined(ARM_HAVE_NEON) + __try{ + /*VORR q0,q0,q0*/ + __emit(0xF2200150); + flags|=OPUS_CPU_ARM_NEON; + } + __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ + /*Ignore exception.*/ + } +# endif +# endif +# endif + return flags; +} + +#elif defined(__linux__) +/* Linux based */ +opus_uint32 opus_cpu_capabilities(void) +{ + opus_uint32 flags = 0; + FILE *cpuinfo; + + /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on + * Android */ + cpuinfo = fopen("/proc/cpuinfo", "r"); + + if(cpuinfo != NULL) + { + /* 512 should be enough for anybody (it's even enough for all the flags that + * x86 has accumulated... so far). */ + char buf[512]; + + while(fgets(buf, 512, cpuinfo) != NULL) + { + /* Search for edsp and neon flag */ + if(memcmp(buf, "Features", 8) == 0) + { + char *p; + p = strstr(buf, " edsp"); + if(p != NULL && (p[5] == ' ' || p[5] == '\n')) + flags |= OPUS_CPU_ARM_EDSP; + + p = strstr(buf, " neon"); + if(p != NULL && (p[5] == ' ' || p[5] == '\n')) + flags |= OPUS_CPU_ARM_NEON; + } + + /* Search for media capabilities (>= ARMv6) */ + if(memcmp(buf, "CPU architecture:", 17) == 0) + { + int version; + version = atoi(buf+17); + + if(version >= 6) + flags |= OPUS_CPU_ARM_MEDIA; + } + } + + fclose(cpuinfo); + } + return flags; +} +#else +/* The feature registers which can tell us what the processor supports are + * accessible in priveleged modes only, so we can't have a general user-space + * detection method like on x86.*/ +# error "Configured to use ARM asm but no CPU detection method available for " \ + "your platform. Reconfigure with --disable-rtcd (or send patches)." +#endif + +int opus_select_arch(void) +{ + opus_uint32 flags = opus_cpu_capabilities(); + int arch = 0; + + if(!(flags & OPUS_CPU_ARM_EDSP)) + return arch; + arch++; + + if(!(flags & OPUS_CPU_ARM_MEDIA)) + return arch; + arch++; + + if(!(flags & OPUS_CPU_ARM_NEON)) + return arch; + arch++; + + return arch; +} + +#endif diff --git a/celt/arm/armcpu.h b/celt/arm/armcpu.h new file mode 100644 index 00000000..68d80fe2 --- /dev/null +++ b/celt/arm/armcpu.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2010 Xiph.Org Foundation + * Copyright (c) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from libtheora modified to suit to Opus */ + +#ifndef ARMCPU_H +#define ARMCPU_H + +int opus_select_arch(void); + +#endif diff --git a/celt/celt_decoder.c b/celt/celt_decoder.c index cfb5b36d..4d1903af 100644 --- a/celt/celt_decoder.c +++ b/celt/celt_decoder.c @@ -33,6 +33,7 @@ #define CELT_DECODER_C +#include "cpu_support.h" #include "os_support.h" #include "mdct.h" #include @@ -69,6 +70,7 @@ struct OpusCustomDecoder { int downsample; int start, end; int signalling; + int arch; /* Everything beyond this point gets cleared on a reset */ #define DECODER_RESET_START rng @@ -157,6 +159,7 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod st->start = 0; st->end = st->mode->effEBands; st->signalling = 1; + st->arch = opus_select_arch(); st->loss_count = 0; diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 26e6ebbc..21ad4f8b 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -33,6 +33,7 @@ #define CELT_ENCODER_C +#include "cpu_support.h" #include "os_support.h" #include "mdct.h" #include @@ -75,6 +76,7 @@ struct OpusCustomEncoder { int lsb_depth; int variable_duration; int lfe; + int arch; /* Everything beyond this point gets cleared on a reset */ #define ENCODER_RESET_START rng @@ -188,6 +190,8 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMod st->end = st->mode->effEBands; st->signalling = 1; + st->arch = opus_select_arch(); + st->constrained_vbr = 1; st->clip = 1; diff --git a/celt/cpu_support.h b/celt/cpu_support.h new file mode 100644 index 00000000..41481feb --- /dev/null +++ b/celt/cpu_support.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2010 Xiph.Org Foundation + * Copyright (c) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef CPU_SUPPORT_H +#define CPU_SUPPORT_H + +#if defined(OPUS_HAVE_RTCD) && defined(ARMv4_ASM) +#include "arm/armcpu.h" + +/* We currently support 4 ARM variants: + * arch[0] -> ARMv4 + * arch[1] -> ARMv5E + * arch[2] -> ARMv6 + * arch[3] -> NEON + */ +#define OPUS_ARCHMASK 3 + +#else +#define OPUS_ARCHMASK 0 + +static inline int opus_select_arch(void) +{ + return 0; +} +#endif + +#endif diff --git a/celt_headers.mk b/celt_headers.mk index 1febcdea..49b1befd 100644 --- a/celt_headers.mk +++ b/celt_headers.mk @@ -2,6 +2,7 @@ CELT_HEAD = \ celt/arch.h \ celt/bands.h \ celt/celt.h \ +celt/cpu_support.h \ include/opus_types.h \ include/opus_defines.h \ include/opus_custom.h \ @@ -29,6 +30,7 @@ celt/stack_alloc.h \ celt/vq.h \ celt/static_modes_float.h \ celt/static_modes_fixed.h \ +celt/arm/armcpu.h \ celt/arm/fixed_armv4.h \ celt/arm/fixed_armv5e.h \ celt/arm/kiss_fft_armv4.h \ diff --git a/celt_sources.mk b/celt_sources.mk index 2a8e9f07..e1aee8fa 100644 --- a/celt_sources.mk +++ b/celt_sources.mk @@ -16,3 +16,6 @@ celt/celt_lpc.c \ celt/quant_bands.c \ celt/rate.c \ celt/vq.c + +CELT_SOURCES_ARM = \ +celt/arm/armcpu.c diff --git a/configure.ac b/configure.ac index 87ef2a55..25949948 100644 --- a/configure.ac +++ b/configure.ac @@ -155,6 +155,7 @@ if test "x${float_approx}" = "xyes"; then AC_DEFINE([FLOAT_APPROX], , [Float approximations]) fi +rtcd_support=no cpu_arm=no AC_ARG_ENABLE(asm, AS_HELP_STRING([--disable-asm], [Disable assembly optimizations]), @@ -167,6 +168,7 @@ if test "x${ac_enable_asm}" = xyes ; then AS_GCC_INLINE_ASSEMBLY([asm_optimization="ARM"], [asm_optimization="disabled"]) if test "x${asm_optimization}" = "xARM" ; then + rtcd_support=yes AC_DEFINE([ARMv4_ASM], 1, [Use generic ARMv4 asm optimizations]) AS_ASM_ARM_EDSP([ARMv5E_ASM=1],[ARMv5E_ASM=0]) if test "x${ARMv5E_ASM}" = "x1" ; then @@ -178,6 +180,11 @@ if test "x${ac_enable_asm}" = xyes ; then AC_DEFINE(ARMv6_ASM, 1, [Use ARMv6 asm optimizations]) asm_optimization="${asm_optimization} (Media)" fi + AS_ASM_ARM_NEON([ARM_HAVE_NEON=1],[ARM_HAVE_NEON=0]) + if test "x${ARM_HAVE_NEON}" = "x1" ; then + AC_DEFINE([ARM_HAVE_NEON], 1, [Use ARM NEON optimizations]) + asm_optimization="${asm_optimization} (NEON)" + fi fi ;; esac @@ -185,6 +192,17 @@ else asm_optimization="disabled" fi +AC_ARG_ENABLE(rtcd, + AS_HELP_STRING([--disable-rtcd], [Disable run-time CPU capabilities detection]), + [ ac_enable_rtcd=$enableval ], [ ac_enable_rtcd=yes] ) +if test "x${ac_enable_rtcd}" = xyes -a "x${rtcd_support}" = xyes ; then + AC_DEFINE([OPUS_HAVE_RTCD], 1, [Use run-time CPU capabilities detection]) +elif test "x${rtcd_support}" = xno ; then + rtcd_support="no rtcd for your platform, please send patches" +else + rtcd_support="no" +fi + ac_enable_assertions="no" AC_ARG_ENABLE(assertions, [ --enable-assertions enable additional software error checking], [if test "$enableval" = yes; then @@ -281,6 +299,7 @@ AC_SUBST(SIZE32) AM_CONDITIONAL([FIXED_POINT], [test x$ac_enable_fixed = xyes]) AM_CONDITIONAL([CUSTOM_MODES], [test x$ac_enable_custom_modes = xyes]) AM_CONDITIONAL([EXTRA_PROGRAMS], [test x$ac_enable_extra_programs = xyes]) +AM_CONDITIONAL([CPU_ARM], [test x$cpu_arm = xyes]) dnl subsitutions for the pkg-config files if test x$ac_enable_float = xyes; then @@ -321,6 +340,7 @@ AC_MSG_RESULT([ Fast float approximations: ..... ${float_approx} Fixed point debugging: ......... ${ac_enable_fixed_debug} Assembly optimization: ......... ${asm_optimization} + Run-time CPU detection: ........ ${rtcd_support} Custom modes: .................. ${ac_enable_custom_modes} Assertion checking: ............ ${ac_enable_assertions} Fuzzing: ....................... ${ac_enable_fuzzing} diff --git a/src/opus_decoder.c b/src/opus_decoder.c index f0b2b6f9..6bc70919 100644 --- a/src/opus_decoder.c +++ b/src/opus_decoder.c @@ -46,6 +46,7 @@ #include "structs.h" #include "define.h" #include "mathops.h" +#include "cpu_support.h" struct OpusDecoder { int celt_dec_offset; @@ -70,6 +71,7 @@ struct OpusDecoder { #endif opus_uint32 rangeFinal; + int arch; }; #ifdef FIXED_POINT @@ -119,6 +121,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels) st->Fs = Fs; st->DecControl.API_sampleRate = st->Fs; st->DecControl.nChannelsAPI = st->channels; + st->arch = opus_select_arch(); /* Reset decoder */ ret = silk_InitDecoder( silk_dec ); diff --git a/src/opus_encoder.c b/src/opus_encoder.c index b6424d61..305fad99 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -40,6 +40,7 @@ #include "arch.h" #include "opus_private.h" #include "os_support.h" +#include "cpu_support.h" #include "analysis.h" #include "mathops.h" #include "tuning_parameters.h" @@ -103,6 +104,7 @@ struct OpusEncoder { int analysis_offset; #endif opus_uint32 rangeFinal; + int arch; }; /* Transition tables for the voice and music. First column is the @@ -184,6 +186,8 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat st->Fs = Fs; + st->arch = opus_select_arch(); + ret = silk_InitEncoder( silk_enc, &st->silk_mode ); if(ret)return OPUS_INTERNAL_ERROR;