mirror of
https://github.com/xiph/opus.git
synced 2025-06-03 09:07:42 +00:00
Optimize silk_NSQ_del_dec() for ARM NEON
The optimization is bit exact with C function. This optimization speeds up SILK encoder on NEON as following. Fixed-point: Complexity 0-5: 0% Complexity 6-7: 6% Complexity 8-9: 10% Complexity 10: 8% Got similar results on floating-point. Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
This commit is contained in:
parent
68afa490cc
commit
cfdaf365b9
7 changed files with 1257 additions and 1 deletions
|
@ -580,7 +580,9 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
|
|||
/* Make sure to store the result as the seed for the next call (also in between */
|
||||
/* frames), otherwise result won't be random at all. When only using some of the */
|
||||
/* bits, take the most significant bits by right-shifting. */
|
||||
#define silk_RAND(seed) (silk_MLA_ovflw(907633515, (seed), 196314165))
|
||||
#define RAND_MULTIPLIER 196314165
|
||||
#define RAND_INCREMENT 907633515
|
||||
#define silk_RAND(seed) (silk_MLA_ovflw((RAND_INCREMENT), (seed), (RAND_MULTIPLIER)))
|
||||
|
||||
/* Add some multiplication functions that can be easily mapped to ARM. */
|
||||
|
||||
|
|
100
silk/arm/NSQ_del_dec_arm.h
Normal file
100
silk/arm/NSQ_del_dec_arm.h
Normal file
|
@ -0,0 +1,100 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2017 Google Inc.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef SILK_NSQ_DEL_DEC_ARM_H
|
||||
#define SILK_NSQ_DEL_DEC_ARM_H
|
||||
|
||||
#include "celt/arm/armcpu.h"
|
||||
|
||||
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
|
||||
void silk_NSQ_del_dec_neon(
|
||||
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
|
||||
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
|
||||
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
|
||||
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
|
||||
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
|
||||
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
|
||||
const opus_int Tilt_Q14[MAX_NB_SUBFR],
|
||||
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
|
||||
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
|
||||
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
|
||||
const opus_int LTP_scale_Q14);
|
||||
#endif
|
||||
|
||||
#if !defined(OPUS_HAVE_RTCD)
|
||||
#define OVERRIDE_silk_NSQ_del_dec (1)
|
||||
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
|
||||
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
|
||||
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
|
||||
LTP_scale_Q14, arch) \
|
||||
((void)(arch), \
|
||||
PRESUME_NEON(silk_NSQ_del_dec)( \
|
||||
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
|
||||
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
|
||||
Lambda_Q10, LTP_scale_Q14))
|
||||
#endif
|
||||
|
||||
#if !defined(OVERRIDE_silk_NSQ_del_dec)
|
||||
/*Is run-time CPU detection enabled on this platform?*/
|
||||
#if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
|
||||
!defined(OPUS_ARM_PRESUME_NEON_INTR))
|
||||
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
|
||||
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
|
||||
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
|
||||
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
|
||||
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
|
||||
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
|
||||
const opus_int Tilt_Q14[MAX_NB_SUBFR],
|
||||
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
|
||||
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
|
||||
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
|
||||
const opus_int LTP_scale_Q14);
|
||||
#define OVERRIDE_silk_NSQ_del_dec (1)
|
||||
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
|
||||
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
|
||||
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
|
||||
LTP_scale_Q14, arch) \
|
||||
((*SILK_NSQ_DEL_DEC_IMPL[(arch)&OPUS_ARCHMASK])( \
|
||||
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
|
||||
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
|
||||
Lambda_Q10, LTP_scale_Q14))
|
||||
#elif defined(OPUS_ARM_PRESUME_NEON_INTR)
|
||||
#define OVERRIDE_silk_NSQ_del_dec (1)
|
||||
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
|
||||
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
|
||||
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
|
||||
LTP_scale_Q14, arch) \
|
||||
((void)(arch), \
|
||||
silk_NSQ_del_dec_neon(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
|
||||
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
|
||||
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
|
||||
LTP_scale_Q14))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* end SILK_NSQ_DEL_DEC_ARM_H */
|
1124
silk/arm/NSQ_del_dec_neon_intr.c
Normal file
1124
silk/arm/NSQ_del_dec_neon_intr.c
Normal file
File diff suppressed because it is too large
Load diff
|
@ -28,6 +28,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include "main_FIX.h"
|
||||
#include "NSQ.h"
|
||||
|
||||
#if defined(OPUS_HAVE_RTCD)
|
||||
|
@ -35,6 +36,29 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
|
||||
!defined(OPUS_ARM_PRESUME_NEON_INTR))
|
||||
|
||||
void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const silk_encoder_state *psEncC, /* I Encoder State */
|
||||
silk_nsq_state *NSQ, /* I/O NSQ state */
|
||||
SideInfoIndices *psIndices, /* I/O Quantization Indices */
|
||||
const opus_int16 x16[], /* I Input */
|
||||
opus_int8 pulses[], /* O Quantized pulse signal */
|
||||
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
|
||||
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
|
||||
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
|
||||
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
|
||||
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
|
||||
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
|
||||
const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
|
||||
const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
|
||||
const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
|
||||
const opus_int LTP_scale_Q14 /* I LTP state scaling */
|
||||
) = {
|
||||
silk_NSQ_del_dec_c, /* ARMv4 */
|
||||
silk_NSQ_del_dec_c, /* EDSP */
|
||||
silk_NSQ_del_dec_c, /* Media */
|
||||
MAY_HAVE_NEON(silk_NSQ_del_dec), /* Neon */
|
||||
};
|
||||
|
||||
/*There is no table for silk_noise_shape_quantizer_short_prediction because the
|
||||
NEON version takes different parameters than the C version.
|
||||
Instead RTCD is done via if statements at the call sites.
|
||||
|
|
|
@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "x86/main_sse.h"
|
||||
#endif
|
||||
|
||||
#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
|
||||
#include "arm/NSQ_del_dec_arm.h"
|
||||
#endif
|
||||
|
||||
/* Convert Left/Right stereo signal to adaptive Mid/Side representation */
|
||||
void silk_stereo_LR_to_MS(
|
||||
stereo_enc_state *state, /* I/O State */
|
||||
|
|
|
@ -27,6 +27,7 @@ silk/arm/macros_armv5e.h \
|
|||
silk/arm/macros_arm64.h \
|
||||
silk/arm/SigProc_FIX_armv4.h \
|
||||
silk/arm/SigProc_FIX_armv5e.h \
|
||||
silk/arm/NSQ_del_dec_arm.h \
|
||||
silk/arm/NSQ_neon.h \
|
||||
silk/fixed/main_FIX.h \
|
||||
silk/fixed/structs_FIX.h \
|
||||
|
|
|
@ -85,6 +85,7 @@ silk/x86/VQ_WMat_EC_sse.c
|
|||
|
||||
SILK_SOURCES_ARM_NEON_INTR = \
|
||||
silk/arm/arm_silk_map.c \
|
||||
silk/arm/NSQ_del_dec_neon_intr.c \
|
||||
silk/arm/NSQ_neon.c
|
||||
|
||||
SILK_SOURCES_FIXED = \
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue