Add run-time CPU detection and support for ARM architecture

Run-time CPU detection (RTCD) is enabled by default if target platform support
it.
It can be disable at compile time with --disable-rtcd option.

Add RTCD support for ARM architecture.

Thanks to Timothy B. Terriberry for help and code review

Signed-off-by: Timothy B. Terriberry <tterribe@xiph.org>
This commit is contained in:
Aurélien Zanelli 2013-05-31 15:07:00 +02:00 committed by Timothy B. Terriberry
parent aa6a1a16ad
commit cd4c8249bc
11 changed files with 295 additions and 0 deletions

View file

@ -18,6 +18,10 @@ SILK_SOURCES += $(SILK_SOURCES_FLOAT)
OPUS_SOURCES += $(OPUS_SOURCES_FLOAT) OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
endif endif
if CPU_ARM
CELT_SOURCES += $(CELT_SOURCES_ARM)
endif
include celt_headers.mk include celt_headers.mk
include silk_headers.mk include silk_headers.mk
include opus_headers.mk include opus_headers.mk

166
celt/arm/armcpu.c Normal file
View file

@ -0,0 +1,166 @@
/* Copyright (c) 2010 Xiph.Org Foundation
* Copyright (c) 2013 Parrot */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from libtheora modified to suit to Opus */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef OPUS_HAVE_RTCD
#include "armcpu.h"
#include "cpu_support.h"
#include "os_support.h"
#include "opus_types.h"
#define OPUS_CPU_ARM_V4 (1)
#define OPUS_CPU_ARM_EDSP (1<<1)
#define OPUS_CPU_ARM_MEDIA (1<<2)
#define OPUS_CPU_ARM_NEON (1<<3)
#if defined(_MSC_VER)
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
# define WIN32_LEAN_AND_MEAN
# define WIN32_EXTRA_LEAN
# include <windows.h>
static inline opus_uint32 opus_cpu_capabilities(void){
opus_uint32 flags;
flags=0;
/* MSVC has no inline __asm support for ARM, but it does let you __emit
* instructions via their assembled hex code.
* All of these instructions should be essentially nops. */
# if defined(ARMv5E_ASM)
__try{
/*PLD [r13]*/
__emit(0xF5DDF000);
flags|=OPUS_CPU_ARM_EDSP;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/
}
# if defined(ARMv6E_ASM)
__try{
/*SHADD8 r3,r3,r3*/
__emit(0xE6333F93);
flags|=OPUS_CPU_ARM_MEDIA;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/
}
# if defined(ARM_HAVE_NEON)
__try{
/*VORR q0,q0,q0*/
__emit(0xF2200150);
flags|=OPUS_CPU_ARM_NEON;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/
}
# endif
# endif
# endif
return flags;
}
#elif defined(__linux__)
/* Linux based */
opus_uint32 opus_cpu_capabilities(void)
{
opus_uint32 flags = 0;
FILE *cpuinfo;
/* Reading /proc/self/auxv would be easier, but that doesn't work reliably on
* Android */
cpuinfo = fopen("/proc/cpuinfo", "r");
if(cpuinfo != NULL)
{
/* 512 should be enough for anybody (it's even enough for all the flags that
* x86 has accumulated... so far). */
char buf[512];
while(fgets(buf, 512, cpuinfo) != NULL)
{
/* Search for edsp and neon flag */
if(memcmp(buf, "Features", 8) == 0)
{
char *p;
p = strstr(buf, " edsp");
if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_EDSP;
p = strstr(buf, " neon");
if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_NEON;
}
/* Search for media capabilities (>= ARMv6) */
if(memcmp(buf, "CPU architecture:", 17) == 0)
{
int version;
version = atoi(buf+17);
if(version >= 6)
flags |= OPUS_CPU_ARM_MEDIA;
}
}
fclose(cpuinfo);
}
return flags;
}
#else
/* The feature registers which can tell us what the processor supports are
* accessible in priveleged modes only, so we can't have a general user-space
* detection method like on x86.*/
# error "Configured to use ARM asm but no CPU detection method available for " \
"your platform. Reconfigure with --disable-rtcd (or send patches)."
#endif
int opus_select_arch(void)
{
opus_uint32 flags = opus_cpu_capabilities();
int arch = 0;
if(!(flags & OPUS_CPU_ARM_EDSP))
return arch;
arch++;
if(!(flags & OPUS_CPU_ARM_MEDIA))
return arch;
arch++;
if(!(flags & OPUS_CPU_ARM_NEON))
return arch;
arch++;
return arch;
}
#endif

35
celt/arm/armcpu.h Normal file
View file

@ -0,0 +1,35 @@
/* Copyright (c) 2010 Xiph.Org Foundation
* Copyright (c) 2013 Parrot */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from libtheora modified to suit to Opus */
#ifndef ARMCPU_H
#define ARMCPU_H
int opus_select_arch(void);
#endif

View file

@ -33,6 +33,7 @@
#define CELT_DECODER_C #define CELT_DECODER_C
#include "cpu_support.h"
#include "os_support.h" #include "os_support.h"
#include "mdct.h" #include "mdct.h"
#include <math.h> #include <math.h>
@ -69,6 +70,7 @@ struct OpusCustomDecoder {
int downsample; int downsample;
int start, end; int start, end;
int signalling; int signalling;
int arch;
/* Everything beyond this point gets cleared on a reset */ /* Everything beyond this point gets cleared on a reset */
#define DECODER_RESET_START rng #define DECODER_RESET_START rng
@ -157,6 +159,7 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod
st->start = 0; st->start = 0;
st->end = st->mode->effEBands; st->end = st->mode->effEBands;
st->signalling = 1; st->signalling = 1;
st->arch = opus_select_arch();
st->loss_count = 0; st->loss_count = 0;

View file

@ -33,6 +33,7 @@
#define CELT_ENCODER_C #define CELT_ENCODER_C
#include "cpu_support.h"
#include "os_support.h" #include "os_support.h"
#include "mdct.h" #include "mdct.h"
#include <math.h> #include <math.h>
@ -75,6 +76,7 @@ struct OpusCustomEncoder {
int lsb_depth; int lsb_depth;
int variable_duration; int variable_duration;
int lfe; int lfe;
int arch;
/* Everything beyond this point gets cleared on a reset */ /* Everything beyond this point gets cleared on a reset */
#define ENCODER_RESET_START rng #define ENCODER_RESET_START rng
@ -188,6 +190,8 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMod
st->end = st->mode->effEBands; st->end = st->mode->effEBands;
st->signalling = 1; st->signalling = 1;
st->arch = opus_select_arch();
st->constrained_vbr = 1; st->constrained_vbr = 1;
st->clip = 1; st->clip = 1;

51
celt/cpu_support.h Normal file
View file

@ -0,0 +1,51 @@
/* Copyright (c) 2010 Xiph.Org Foundation
* Copyright (c) 2013 Parrot */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CPU_SUPPORT_H
#define CPU_SUPPORT_H
#if defined(OPUS_HAVE_RTCD) && defined(ARMv4_ASM)
#include "arm/armcpu.h"
/* We currently support 4 ARM variants:
* arch[0] -> ARMv4
* arch[1] -> ARMv5E
* arch[2] -> ARMv6
* arch[3] -> NEON
*/
#define OPUS_ARCHMASK 3
#else
#define OPUS_ARCHMASK 0
static inline int opus_select_arch(void)
{
return 0;
}
#endif
#endif

View file

@ -2,6 +2,7 @@ CELT_HEAD = \
celt/arch.h \ celt/arch.h \
celt/bands.h \ celt/bands.h \
celt/celt.h \ celt/celt.h \
celt/cpu_support.h \
include/opus_types.h \ include/opus_types.h \
include/opus_defines.h \ include/opus_defines.h \
include/opus_custom.h \ include/opus_custom.h \
@ -29,6 +30,7 @@ celt/stack_alloc.h \
celt/vq.h \ celt/vq.h \
celt/static_modes_float.h \ celt/static_modes_float.h \
celt/static_modes_fixed.h \ celt/static_modes_fixed.h \
celt/arm/armcpu.h \
celt/arm/fixed_armv4.h \ celt/arm/fixed_armv4.h \
celt/arm/fixed_armv5e.h \ celt/arm/fixed_armv5e.h \
celt/arm/kiss_fft_armv4.h \ celt/arm/kiss_fft_armv4.h \

View file

@ -16,3 +16,6 @@ celt/celt_lpc.c \
celt/quant_bands.c \ celt/quant_bands.c \
celt/rate.c \ celt/rate.c \
celt/vq.c celt/vq.c
CELT_SOURCES_ARM = \
celt/arm/armcpu.c

View file

@ -155,6 +155,7 @@ if test "x${float_approx}" = "xyes"; then
AC_DEFINE([FLOAT_APPROX], , [Float approximations]) AC_DEFINE([FLOAT_APPROX], , [Float approximations])
fi fi
rtcd_support=no
cpu_arm=no cpu_arm=no
AC_ARG_ENABLE(asm, AC_ARG_ENABLE(asm,
AS_HELP_STRING([--disable-asm], [Disable assembly optimizations]), AS_HELP_STRING([--disable-asm], [Disable assembly optimizations]),
@ -167,6 +168,7 @@ if test "x${ac_enable_asm}" = xyes ; then
AS_GCC_INLINE_ASSEMBLY([asm_optimization="ARM"], AS_GCC_INLINE_ASSEMBLY([asm_optimization="ARM"],
[asm_optimization="disabled"]) [asm_optimization="disabled"])
if test "x${asm_optimization}" = "xARM" ; then if test "x${asm_optimization}" = "xARM" ; then
rtcd_support=yes
AC_DEFINE([ARMv4_ASM], 1, [Use generic ARMv4 asm optimizations]) AC_DEFINE([ARMv4_ASM], 1, [Use generic ARMv4 asm optimizations])
AS_ASM_ARM_EDSP([ARMv5E_ASM=1],[ARMv5E_ASM=0]) AS_ASM_ARM_EDSP([ARMv5E_ASM=1],[ARMv5E_ASM=0])
if test "x${ARMv5E_ASM}" = "x1" ; then if test "x${ARMv5E_ASM}" = "x1" ; then
@ -178,6 +180,11 @@ if test "x${ac_enable_asm}" = xyes ; then
AC_DEFINE(ARMv6_ASM, 1, [Use ARMv6 asm optimizations]) AC_DEFINE(ARMv6_ASM, 1, [Use ARMv6 asm optimizations])
asm_optimization="${asm_optimization} (Media)" asm_optimization="${asm_optimization} (Media)"
fi fi
AS_ASM_ARM_NEON([ARM_HAVE_NEON=1],[ARM_HAVE_NEON=0])
if test "x${ARM_HAVE_NEON}" = "x1" ; then
AC_DEFINE([ARM_HAVE_NEON], 1, [Use ARM NEON optimizations])
asm_optimization="${asm_optimization} (NEON)"
fi
fi fi
;; ;;
esac esac
@ -185,6 +192,17 @@ else
asm_optimization="disabled" asm_optimization="disabled"
fi fi
AC_ARG_ENABLE(rtcd,
AS_HELP_STRING([--disable-rtcd], [Disable run-time CPU capabilities detection]),
[ ac_enable_rtcd=$enableval ], [ ac_enable_rtcd=yes] )
if test "x${ac_enable_rtcd}" = xyes -a "x${rtcd_support}" = xyes ; then
AC_DEFINE([OPUS_HAVE_RTCD], 1, [Use run-time CPU capabilities detection])
elif test "x${rtcd_support}" = xno ; then
rtcd_support="no rtcd for your platform, please send patches"
else
rtcd_support="no"
fi
ac_enable_assertions="no" ac_enable_assertions="no"
AC_ARG_ENABLE(assertions, [ --enable-assertions enable additional software error checking], AC_ARG_ENABLE(assertions, [ --enable-assertions enable additional software error checking],
[if test "$enableval" = yes; then [if test "$enableval" = yes; then
@ -281,6 +299,7 @@ AC_SUBST(SIZE32)
AM_CONDITIONAL([FIXED_POINT], [test x$ac_enable_fixed = xyes]) AM_CONDITIONAL([FIXED_POINT], [test x$ac_enable_fixed = xyes])
AM_CONDITIONAL([CUSTOM_MODES], [test x$ac_enable_custom_modes = xyes]) AM_CONDITIONAL([CUSTOM_MODES], [test x$ac_enable_custom_modes = xyes])
AM_CONDITIONAL([EXTRA_PROGRAMS], [test x$ac_enable_extra_programs = xyes]) AM_CONDITIONAL([EXTRA_PROGRAMS], [test x$ac_enable_extra_programs = xyes])
AM_CONDITIONAL([CPU_ARM], [test x$cpu_arm = xyes])
dnl subsitutions for the pkg-config files dnl subsitutions for the pkg-config files
if test x$ac_enable_float = xyes; then if test x$ac_enable_float = xyes; then
@ -321,6 +340,7 @@ AC_MSG_RESULT([
Fast float approximations: ..... ${float_approx} Fast float approximations: ..... ${float_approx}
Fixed point debugging: ......... ${ac_enable_fixed_debug} Fixed point debugging: ......... ${ac_enable_fixed_debug}
Assembly optimization: ......... ${asm_optimization} Assembly optimization: ......... ${asm_optimization}
Run-time CPU detection: ........ ${rtcd_support}
Custom modes: .................. ${ac_enable_custom_modes} Custom modes: .................. ${ac_enable_custom_modes}
Assertion checking: ............ ${ac_enable_assertions} Assertion checking: ............ ${ac_enable_assertions}
Fuzzing: ....................... ${ac_enable_fuzzing} Fuzzing: ....................... ${ac_enable_fuzzing}

View file

@ -46,6 +46,7 @@
#include "structs.h" #include "structs.h"
#include "define.h" #include "define.h"
#include "mathops.h" #include "mathops.h"
#include "cpu_support.h"
struct OpusDecoder { struct OpusDecoder {
int celt_dec_offset; int celt_dec_offset;
@ -70,6 +71,7 @@ struct OpusDecoder {
#endif #endif
opus_uint32 rangeFinal; opus_uint32 rangeFinal;
int arch;
}; };
#ifdef FIXED_POINT #ifdef FIXED_POINT
@ -119,6 +121,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels)
st->Fs = Fs; st->Fs = Fs;
st->DecControl.API_sampleRate = st->Fs; st->DecControl.API_sampleRate = st->Fs;
st->DecControl.nChannelsAPI = st->channels; st->DecControl.nChannelsAPI = st->channels;
st->arch = opus_select_arch();
/* Reset decoder */ /* Reset decoder */
ret = silk_InitDecoder( silk_dec ); ret = silk_InitDecoder( silk_dec );

View file

@ -40,6 +40,7 @@
#include "arch.h" #include "arch.h"
#include "opus_private.h" #include "opus_private.h"
#include "os_support.h" #include "os_support.h"
#include "cpu_support.h"
#include "analysis.h" #include "analysis.h"
#include "mathops.h" #include "mathops.h"
#include "tuning_parameters.h" #include "tuning_parameters.h"
@ -103,6 +104,7 @@ struct OpusEncoder {
int analysis_offset; int analysis_offset;
#endif #endif
opus_uint32 rangeFinal; opus_uint32 rangeFinal;
int arch;
}; };
/* Transition tables for the voice and music. First column is the /* Transition tables for the voice and music. First column is the
@ -184,6 +186,8 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
st->Fs = Fs; st->Fs = Fs;
st->arch = opus_select_arch();
ret = silk_InitEncoder( silk_enc, &st->silk_mode ); ret = silk_InitEncoder( silk_enc, &st->silk_mode );
if(ret)return OPUS_INTERNAL_ERROR; if(ret)return OPUS_INTERNAL_ERROR;