diff --git a/celt/arch.h b/celt/arch.h index 89c3a86c..e8321ac8 100644 --- a/celt/arch.h +++ b/celt/arch.h @@ -330,6 +330,8 @@ static OPUS_INLINE int celt_isnan(float x) #define SUB32(a,b) ((a)-(b)) #define ADD32_ovflw(a,b) ((a)+(b)) #define SUB32_ovflw(a,b) ((a)-(b)) +#define PSHR32_ovflw(a,shift) (a) + #define MULT16_16_16(a,b) ((a)*(b)) #define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b)) #define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b)) diff --git a/celt/fixed_debug.h b/celt/fixed_debug.h index 9731168d..4b92a1e3 100644 --- a/celt/fixed_debug.h +++ b/celt/fixed_debug.h @@ -69,6 +69,8 @@ extern opus_int64 celt_mips; /* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */ /** Negate 32-bit value, ignore any overflows */ #define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(0-(opus_uint32)(a))) +/** 32-bit arithmetic shift right with rounding-to-nearest, ignoring overflows */ +#define PSHR32_ovflw(a,shift) (SHR32(ADD32_ovflw(a, (EXTEND32(1)<<(shift)>>1)),shift)) static OPUS_INLINE short NEG16(int x) { diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h index dc6c93e7..3de7bd6a 100644 --- a/celt/fixed_generic.h +++ b/celt/fixed_generic.h @@ -147,6 +147,8 @@ /* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */ /** Negate 32-bit value, ignore any overflows */ #define NEG32_ovflw(a) ((opus_val32)(0-(opus_uint32)(a))) +/** 32-bit arithmetic shift right with rounding-to-nearest, ignoring overflows */ +#define PSHR32_ovflw(a,shift) (SHR32(ADD32_ovflw(a, (EXTEND32(1)<<(shift)>>1)),shift)) /** 16x16 multiplication where the result fits in 16 bits */ #define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b)))) diff --git a/celt/mdct.c b/celt/mdct.c index 8798bc82..ba6b22eb 100644 --- a/celt/mdct.c +++ b/celt/mdct.c @@ -330,8 +330,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca t0 = t[i]; t1 = t[N4+i]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = PSHR32(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM); - yi = PSHR32(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM); + yr = PSHR32_ovflw(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM); + yi = PSHR32_ovflw(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM); /* We swap real and imag because we're using an FFT instead of an IFFT. */ re = yp1[1]; im = yp1[0]; @@ -341,8 +341,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca t0 = t[(N4-i-1)]; t1 = t[(N2-i-1)]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = PSHR32(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM); - yi = PSHR32(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM); + yr = PSHR32_ovflw(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM); + yi = PSHR32_ovflw(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM); yp1[0] = yr; yp0[1] = yi; yp0 += 2;