Fix celt_pitch_xcorr_c signature.

This should not take an arch parameter, so it can properly be used
 as a fallback for accelerated versions which do not.
This patch instead provides a separate version which can call
 accelerated helpers for platforms that have taken that approach.
This commit is contained in:
Timothy B. Terriberry 2014-12-01 10:47:25 -08:00
parent 25b27a9c16
commit aad281878d
4 changed files with 45 additions and 27 deletions

View file

@ -58,8 +58,8 @@ static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, c
*xy2 = xy02; *xy2 = xy02;
} }
#define OVERRIDE_XCORR_KERNEL static inline void xcorr_kernel_mips(const opus_val16 * x,
static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) const opus_val16 * y, opus_val32 sum[4], int len)
{ {
int j; int j;
opus_val16 y_0, y_1, y_2, y_3; opus_val16 y_0, y_1, y_2, y_3;
@ -151,4 +151,8 @@ static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus
sum[3] = (opus_val32)sum_3; sum[3] = (opus_val32)sum_3;
} }
#define OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((void)(arch), xcorr_kernel_mips(x, y, sum, len))
#endif /* PITCH_MIPSR1_H */ #endif /* PITCH_MIPSR1_H */

View file

@ -214,25 +214,35 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
} }
#if 0 /* This is a simple version of the pitch correlation that should work /* Pure C implementation. */
well on DSPs like Blackfin and TI C5x/C6x */
#ifdef FIXED_POINT #ifdef FIXED_POINT
opus_val32 opus_val32
#else #else
void void
#endif #endif
celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch) #if defined(OVERRIDE_PITCH_XCORR)
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch)
#else
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch)
#endif
{ {
#if 0 /* This is a simple version of the pitch correlation that should work
well on DSPs like Blackfin and TI C5x/C6x */
int i, j; int i, j;
#ifdef FIXED_POINT #ifdef FIXED_POINT
opus_val32 maxcorr=1; opus_val32 maxcorr=1;
#endif
#if !defined(OVERRIDE_PITCH_XCORR)
(void)arch;
#endif #endif
for (i=0;i<max_pitch;i++) for (i=0;i<max_pitch;i++)
{ {
opus_val32 sum = 0; opus_val32 sum = 0;
for (j=0;j<len;j++) for (j=0;j<len;j++)
sum = MAC16_16(sum, x[j],y[i+j]); sum = MAC16_16(sum, _x[j], _y[i+j]);
xcorr[i] = sum; xcorr[i] = sum;
#ifdef FIXED_POINT #ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum); maxcorr = MAX32(maxcorr, sum);
@ -241,18 +251,8 @@ celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int m
#ifdef FIXED_POINT #ifdef FIXED_POINT
return maxcorr; return maxcorr;
#endif #endif
}
#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */ #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch)
{
int i; int i;
/*The EDSP version requires that max_pitch is at least 1, and that _x is /*The EDSP version requires that max_pitch is at least 1, and that _x is
32-bit aligned. 32-bit aligned.
@ -265,7 +265,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
for (i=0;i<max_pitch-3;i+=4) for (i=0;i<max_pitch-3;i+=4)
{ {
opus_val32 sum[4]={0,0,0,0}; opus_val32 sum[4]={0,0,0,0};
#if defined(OVERRIDE_PITCH_XCORR)
xcorr_kernel_c(_x, _y+i, sum, len);
#else
xcorr_kernel(_x, _y+i, sum, len, arch); xcorr_kernel(_x, _y+i, sum, len, arch);
#endif
xcorr[i]=sum[0]; xcorr[i]=sum[0];
xcorr[i+1]=sum[1]; xcorr[i+1]=sum[1];
xcorr[i+2]=sum[2]; xcorr[i+2]=sum[2];
@ -281,7 +285,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
for (;i<max_pitch;i++) for (;i<max_pitch;i++)
{ {
opus_val32 sum; opus_val32 sum;
#if defined(OVERRIDE_PITCH_XCORR)
sum = celt_inner_prod_c(_x, _y+i, len);
#else
sum = celt_inner_prod(_x, _y+i, len, arch); sum = celt_inner_prod(_x, _y+i, len, arch);
#endif
xcorr[i] = sum; xcorr[i] = sum;
#ifdef FIXED_POINT #ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum); maxcorr = MAX32(maxcorr, sum);
@ -290,9 +298,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
#ifdef FIXED_POINT #ifdef FIXED_POINT
return maxcorr; return maxcorr;
#endif #endif
#endif
} }
#endif
void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y, void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
int len, int max_pitch, int *pitch, int arch) int len, int max_pitch, int *pitch, int arch)
{ {

View file

@ -62,7 +62,6 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
/* OPT: This is the kernel you really want to optimize. It gets used a lot /* OPT: This is the kernel you really want to optimize. It gets used a lot
by the prefilter and by the PLC. */ by the prefilter and by the PLC. */
#ifndef OVERRIDE_XCORR_KERNEL
static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
{ {
int j; int j;
@ -129,11 +128,9 @@ static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 *
} }
} }
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) #ifndef OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \ #define xcorr_kernel(x, y, sum, len, arch) \
((void)(arch),xcorr_kernel_c(x, y, sum, len)) ((void)(arch),xcorr_kernel_c(x, y, sum, len))
#endif
#endif /* OVERRIDE_XCORR_KERNEL */ #endif /* OVERRIDE_XCORR_KERNEL */
@ -177,7 +174,7 @@ opus_val32
void void
#endif #endif
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch); opus_val32 *xcorr, int len, int max_pitch);
#if !defined(OVERRIDE_PITCH_XCORR) #if !defined(OVERRIDE_PITCH_XCORR)
/*Is run-time CPU detection enabled on this platform?*/ /*Is run-time CPU detection enabled on this platform?*/
@ -191,12 +188,20 @@ void
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int); const opus_val16 *, opus_val32 *, int, int);
# define OVERRIDE_PITCH_XCORR
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch)) xcorr, len, max_pitch))
# else # else
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch, arch)) #ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch);
# endif # endif
#endif #endif

View file

@ -43,14 +43,15 @@ void xcorr_kernel_sse4_1(
const opus_int16 *x, const opus_int16 *x,
const opus_int16 *y, const opus_int16 *y,
opus_val32 sum[4], opus_val32 sum[4],
int len ); int len);
extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_int16 *x, const opus_int16 *x,
const opus_int16 *y, const opus_int16 *y,
opus_val32 sum[4], opus_val32 sum[4],
int len ); int len);
#define OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \ #define xcorr_kernel(x, y, sum, len, arch) \
((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len)) ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))