Optimize silk_NSQ_del_dec() for ARM NEON

The optimization is bit exact with C function.

This optimization speeds up SILK encoder on NEON as following.

Fixed-point:
Complexity 0-5:  0%
Complexity 6-7:  6%
Complexity 8-9: 10%
Complexity  10:  8%

Got similar results on floating-point.

Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
This commit is contained in:
Linfeng Zhang 2016-08-25 18:12:54 -07:00 committed by Jean-Marc Valin
parent 68afa490cc
commit cfdaf365b9
No known key found for this signature in database
GPG key ID: 5E5DD9A36F9189C8
7 changed files with 1257 additions and 1 deletions

View file

@ -580,7 +580,9 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
/* Make sure to store the result as the seed for the next call (also in between */
/* frames), otherwise result won't be random at all. When only using some of the */
/* bits, take the most significant bits by right-shifting. */
#define silk_RAND(seed) (silk_MLA_ovflw(907633515, (seed), 196314165))
#define RAND_MULTIPLIER 196314165
#define RAND_INCREMENT 907633515
#define silk_RAND(seed) (silk_MLA_ovflw((RAND_INCREMENT), (seed), (RAND_MULTIPLIER)))
/* Add some multiplication functions that can be easily mapped to ARM. */

100
silk/arm/NSQ_del_dec_arm.h Normal file
View file

@ -0,0 +1,100 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_NSQ_DEL_DEC_ARM_H
#define SILK_NSQ_DEL_DEC_ARM_H
#include "celt/arm/armcpu.h"
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void silk_NSQ_del_dec_neon(
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
const opus_int Tilt_Q14[MAX_NB_SUBFR],
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
const opus_int LTP_scale_Q14);
#endif
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((void)(arch), \
PRESUME_NEON(silk_NSQ_del_dec)( \
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
Lambda_Q10, LTP_scale_Q14))
#endif
#if !defined(OVERRIDE_silk_NSQ_del_dec)
/*Is run-time CPU detection enabled on this platform?*/
#if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
!defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
const opus_int Tilt_Q14[MAX_NB_SUBFR],
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
const opus_int LTP_scale_Q14);
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((*SILK_NSQ_DEL_DEC_IMPL[(arch)&OPUS_ARCHMASK])( \
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
Lambda_Q10, LTP_scale_Q14))
#elif defined(OPUS_ARM_PRESUME_NEON_INTR)
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((void)(arch), \
silk_NSQ_del_dec_neon(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14))
#endif
#endif
#endif /* end SILK_NSQ_DEL_DEC_ARM_H */

File diff suppressed because it is too large Load diff

View file

@ -28,6 +28,7 @@ POSSIBILITY OF SUCH DAMAGE.
# include "config.h"
#endif
#include "main_FIX.h"
#include "NSQ.h"
#if defined(OPUS_HAVE_RTCD)
@ -35,6 +36,29 @@ POSSIBILITY OF SUCH DAMAGE.
# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
!defined(OPUS_ARM_PRESUME_NEON_INTR))
void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
const opus_int LTP_scale_Q14 /* I LTP state scaling */
) = {
silk_NSQ_del_dec_c, /* ARMv4 */
silk_NSQ_del_dec_c, /* EDSP */
silk_NSQ_del_dec_c, /* Media */
MAY_HAVE_NEON(silk_NSQ_del_dec), /* Neon */
};
/*There is no table for silk_noise_shape_quantizer_short_prediction because the
NEON version takes different parameters than the C version.
Instead RTCD is done via if statements at the call sites.

View file

@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include "x86/main_sse.h"
#endif
#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "arm/NSQ_del_dec_arm.h"
#endif
/* Convert Left/Right stereo signal to adaptive Mid/Side representation */
void silk_stereo_LR_to_MS(
stereo_enc_state *state, /* I/O State */

View file

@ -27,6 +27,7 @@ silk/arm/macros_armv5e.h \
silk/arm/macros_arm64.h \
silk/arm/SigProc_FIX_armv4.h \
silk/arm/SigProc_FIX_armv5e.h \
silk/arm/NSQ_del_dec_arm.h \
silk/arm/NSQ_neon.h \
silk/fixed/main_FIX.h \
silk/fixed/structs_FIX.h \

View file

@ -85,6 +85,7 @@ silk/x86/VQ_WMat_EC_sse.c
SILK_SOURCES_ARM_NEON_INTR = \
silk/arm/arm_silk_map.c \
silk/arm/NSQ_del_dec_neon_intr.c \
silk/arm/NSQ_neon.c
SILK_SOURCES_FIXED = \