Further simplifications to the forward mdct
This commit is contained in:
parent
41880805ab
commit
ef0d5f15c7
5 changed files with 28 additions and 32 deletions
|
@ -189,6 +189,7 @@ typedef float celt_mask_t;
|
||||||
#define MULT16_32_Q13(a,b) ((a)*(b))
|
#define MULT16_32_Q13(a,b) ((a)*(b))
|
||||||
#define MULT16_32_Q14(a,b) ((a)*(b))
|
#define MULT16_32_Q14(a,b) ((a)*(b))
|
||||||
#define MULT16_32_Q15(a,b) ((a)*(b))
|
#define MULT16_32_Q15(a,b) ((a)*(b))
|
||||||
|
#define MULT16_32_Q16(a,b) ((a)*(b))
|
||||||
#define MULT16_32_P15(a,b) ((a)*(b))
|
#define MULT16_32_P15(a,b) ((a)*(b))
|
||||||
|
|
||||||
#define MULT32_32_Q31(a,b) ((a)*(b))
|
#define MULT32_32_Q31(a,b) ((a)*(b))
|
||||||
|
|
|
@ -153,29 +153,26 @@ static inline celt_int16_t SIG2INT16(celt_sig_t x)
|
||||||
/** Apply window and compute the MDCT for all sub-frames and all channels in a frame */
|
/** Apply window and compute the MDCT for all sub-frames and all channels in a frame */
|
||||||
static void compute_mdcts(const CELTMode *mode, const celt_word16_t * restrict window, celt_sig_t * restrict in, celt_sig_t * restrict out)
|
static void compute_mdcts(const CELTMode *mode, const celt_word16_t * restrict window, celt_sig_t * restrict in, celt_sig_t * restrict out)
|
||||||
{
|
{
|
||||||
int c, N4;
|
|
||||||
const mdct_lookup *lookup = MDCT(mode);
|
const mdct_lookup *lookup = MDCT(mode);
|
||||||
const int N = FRAMESIZE(mode);
|
const int N = FRAMESIZE(mode);
|
||||||
const int C = CHANNELS(mode);
|
const int C = CHANNELS(mode);
|
||||||
const int overlap = OVERLAP(mode);
|
const int overlap = OVERLAP(mode);
|
||||||
N4 = (N-overlap)>>1;
|
|
||||||
if (C==1)
|
if (C==1)
|
||||||
{
|
{
|
||||||
mdct_forward(lookup, in, out, window, overlap);
|
mdct_forward(lookup, in, out, window, overlap);
|
||||||
} else {
|
} else {
|
||||||
|
int c;
|
||||||
VARDECL(celt_word32_t, x);
|
VARDECL(celt_word32_t, x);
|
||||||
VARDECL(celt_word32_t, tmp);
|
VARDECL(celt_word32_t, tmp);
|
||||||
SAVE_STACK;
|
SAVE_STACK;
|
||||||
ALLOC(x, 2*N, celt_word32_t);
|
ALLOC(x, N+overlap, celt_word32_t);
|
||||||
ALLOC(tmp, N, celt_word32_t);
|
ALLOC(tmp, N, celt_word32_t);
|
||||||
for (c=0;c<C;c++)
|
for (c=0;c<C;c++)
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
for (j=0;j<2*N-2*N4;j++)
|
for (j=0;j<N+overlap;j++)
|
||||||
x[j+N4] = in[C*j+c];
|
x[j] = in[C*j+c];
|
||||||
CELT_MEMSET(x, 0, N4);
|
mdct_forward(lookup, x, tmp, window, overlap);
|
||||||
CELT_MEMSET(x+2*N-N4, 0, N4);
|
|
||||||
mdct_forward(lookup, x+N4, tmp, window, overlap);
|
|
||||||
/* Interleaving the sub-frames */
|
/* Interleaving the sub-frames */
|
||||||
for (j=0;j<N;j++)
|
for (j=0;j<N;j++)
|
||||||
out[C*j+c] = tmp[j];
|
out[C*j+c] = tmp[j];
|
||||||
|
|
|
@ -44,6 +44,9 @@ static long long celt_mips = 0;
|
||||||
#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
|
#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
|
||||||
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
|
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
|
||||||
|
|
||||||
|
/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
|
||||||
|
#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
|
||||||
|
|
||||||
#define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
|
#define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
|
||||||
#define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
|
#define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
|
||||||
|
|
||||||
|
|
|
@ -38,6 +38,9 @@
|
||||||
/** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */
|
/** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */
|
||||||
#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
|
#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
|
||||||
|
|
||||||
|
/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
|
||||||
|
#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
|
||||||
|
|
||||||
/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
|
/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
|
||||||
#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
|
#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
|
||||||
|
|
||||||
|
|
|
@ -111,8 +111,11 @@ void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * r
|
||||||
{
|
{
|
||||||
kiss_fft_scalar re, im;
|
kiss_fft_scalar re, im;
|
||||||
/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
|
/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
|
||||||
re = -HALF32(MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2));
|
re = -(MULT16_32_Q16(*wp2, xp1[N2]) + MULT16_32_Q16(*wp1,*xp2));
|
||||||
im = -HALF32(MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]));
|
im = -(MULT16_32_Q16(*wp1, *xp1) - MULT16_32_Q16(*wp2, xp2[-N2]));
|
||||||
|
#ifndef FIXED_POINT
|
||||||
|
re *= .5; im *= .5;
|
||||||
|
#endif
|
||||||
xp1+=2;
|
xp1+=2;
|
||||||
xp2-=2;
|
xp2-=2;
|
||||||
wp1+=2;
|
wp1+=2;
|
||||||
|
@ -123,10 +126,12 @@ void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * r
|
||||||
*yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]);
|
*yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]);
|
||||||
t++;
|
t++;
|
||||||
}
|
}
|
||||||
for(;i<N/8;i++)
|
wp1 = window;
|
||||||
|
wp2 = window+overlap-1;
|
||||||
|
for(;i<N4-overlap/4;i++)
|
||||||
{
|
{
|
||||||
kiss_fft_scalar re, im;
|
kiss_fft_scalar re, im;
|
||||||
/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
|
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
|
||||||
re = -HALF32(*xp2);
|
re = -HALF32(*xp2);
|
||||||
im = -HALF32(*xp1);
|
im = -HALF32(*xp1);
|
||||||
xp1+=2;
|
xp1+=2;
|
||||||
|
@ -135,30 +140,17 @@ void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * r
|
||||||
(MIXED_PRECISION only) */
|
(MIXED_PRECISION only) */
|
||||||
*yp++ = S_MUL(re,t[0]) - S_MUL(im,t[N4]);
|
*yp++ = S_MUL(re,t[0]) - S_MUL(im,t[N4]);
|
||||||
*yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]);
|
*yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]);
|
||||||
t++;
|
t++;
|
||||||
}
|
|
||||||
wp1 = window;
|
|
||||||
wp2 = window+overlap-1;
|
|
||||||
for(;i<N4-overlap/4;i++)
|
|
||||||
{
|
|
||||||
kiss_fft_scalar re, im;
|
|
||||||
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
|
|
||||||
re = HALF32(-*xp2);
|
|
||||||
im = -HALF32(*xp1);
|
|
||||||
xp1+=2;
|
|
||||||
xp2-=2;
|
|
||||||
/* We could remove the HALF32 above and just use MULT16_32_Q16 below
|
|
||||||
(MIXED_PRECISION only) */
|
|
||||||
*yp++ = S_MUL(re,t[0]) - S_MUL(im,t[N4]);
|
|
||||||
*yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]);
|
|
||||||
t++;
|
|
||||||
}
|
}
|
||||||
for(;i<N4;i++)
|
for(;i<N4;i++)
|
||||||
{
|
{
|
||||||
kiss_fft_scalar re, im;
|
kiss_fft_scalar re, im;
|
||||||
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
|
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
|
||||||
re = HALF32(MULT16_32_Q15(*wp1, xp1[-N2]) - MULT16_32_Q15(*wp2, *xp2));
|
re = (MULT16_32_Q16(*wp1, xp1[-N2]) - MULT16_32_Q16(*wp2, *xp2));
|
||||||
im = -HALF32(MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]));
|
im = -(MULT16_32_Q16(*wp2, *xp1) + MULT16_32_Q16(*wp1, xp2[N2]));
|
||||||
|
#ifndef FIXED_POINT
|
||||||
|
re *= .5; im *= .5;
|
||||||
|
#endif
|
||||||
xp1+=2;
|
xp1+=2;
|
||||||
xp2-=2;
|
xp2-=2;
|
||||||
wp1+=2;
|
wp1+=2;
|
||||||
|
@ -189,7 +181,7 @@ void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * r
|
||||||
fp += 2;
|
fp += 2;
|
||||||
yp1 += 2;
|
yp1 += 2;
|
||||||
yp2 -= 2;
|
yp2 -= 2;
|
||||||
t++;
|
t++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
RESTORE_STACK;
|
RESTORE_STACK;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue