Fix celt_pitch_xcorr_c signature.

This should not take an arch parameter, so it can properly be used
 as a fallback for accelerated versions which do not.
This patch instead provides a separate version which can call
 accelerated helpers for platforms that have taken that approach.
This commit is contained in:
Timothy B. Terriberry 2014-12-01 10:47:25 -08:00
parent 25b27a9c16
commit aad281878d
4 changed files with 45 additions and 27 deletions

View file

@ -58,8 +58,8 @@ static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, c
*xy2 = xy02;
}
#define OVERRIDE_XCORR_KERNEL
static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
static inline void xcorr_kernel_mips(const opus_val16 * x,
const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
opus_val16 y_0, y_1, y_2, y_3;
@ -151,4 +151,8 @@ static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus
sum[3] = (opus_val32)sum_3;
}
#define OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((void)(arch), xcorr_kernel_mips(x, y, sum, len))
#endif /* PITCH_MIPSR1_H */

View file

@ -214,25 +214,35 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
}
#if 0 /* This is a simple version of the pitch correlation that should work
well on DSPs like Blackfin and TI C5x/C6x */
/* Pure C implementation. */
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
#if defined(OVERRIDE_PITCH_XCORR)
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch)
#else
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch)
#endif
{
#if 0 /* This is a simple version of the pitch correlation that should work
well on DSPs like Blackfin and TI C5x/C6x */
int i, j;
#ifdef FIXED_POINT
opus_val32 maxcorr=1;
#endif
#if !defined(OVERRIDE_PITCH_XCORR)
(void)arch;
#endif
for (i=0;i<max_pitch;i++)
{
opus_val32 sum = 0;
for (j=0;j<len;j++)
sum = MAC16_16(sum, x[j],y[i+j]);
sum = MAC16_16(sum, _x[j], _y[i+j]);
xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@ -241,18 +251,8 @@ celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int m
#ifdef FIXED_POINT
return maxcorr;
#endif
}
#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch)
{
int i;
/*The EDSP version requires that max_pitch is at least 1, and that _x is
32-bit aligned.
@ -265,7 +265,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
for (i=0;i<max_pitch-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
#if defined(OVERRIDE_PITCH_XCORR)
xcorr_kernel_c(_x, _y+i, sum, len);
#else
xcorr_kernel(_x, _y+i, sum, len, arch);
#endif
xcorr[i]=sum[0];
xcorr[i+1]=sum[1];
xcorr[i+2]=sum[2];
@ -281,7 +285,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
for (;i<max_pitch;i++)
{
opus_val32 sum;
#if defined(OVERRIDE_PITCH_XCORR)
sum = celt_inner_prod_c(_x, _y+i, len);
#else
sum = celt_inner_prod(_x, _y+i, len, arch);
#endif
xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@ -290,9 +298,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
#ifdef FIXED_POINT
return maxcorr;
#endif
#endif
}
#endif
void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
int len, int max_pitch, int *pitch, int arch)
{

View file

@ -62,7 +62,6 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
/* OPT: This is the kernel you really want to optimize. It gets used a lot
by the prefilter and by the PLC. */
#ifndef OVERRIDE_XCORR_KERNEL
static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
@ -129,11 +128,9 @@ static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 *
}
}
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
#ifndef OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((void)(arch),xcorr_kernel_c(x, y, sum, len))
#endif
#endif /* OVERRIDE_XCORR_KERNEL */
@ -177,7 +174,7 @@ opus_val32
void
#endif
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch);
opus_val32 *xcorr, int len, int max_pitch);
#if !defined(OVERRIDE_PITCH_XCORR)
/*Is run-time CPU detection enabled on this platform?*/
@ -191,12 +188,20 @@ void
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
# define OVERRIDE_PITCH_XCORR
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# else
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch, arch))
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch);
# endif
#endif

View file

@ -43,14 +43,15 @@ void xcorr_kernel_sse4_1(
const opus_int16 *x,
const opus_int16 *y,
opus_val32 sum[4],
int len );
int len);
extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_int16 *x,
const opus_int16 *y,
opus_val32 sum[4],
int len );
int len);
#define OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))