diff --git a/celt/arch.h b/celt/arch.h
index 89c3a86c..e8321ac8 100644
--- a/celt/arch.h
+++ b/celt/arch.h
@@ -330,6 +330,8 @@ static OPUS_INLINE int celt_isnan(float x)
 #define SUB32(a,b) ((a)-(b))
 #define ADD32_ovflw(a,b) ((a)+(b))
 #define SUB32_ovflw(a,b) ((a)-(b))
+#define PSHR32_ovflw(a,shift) (a)
+
 #define MULT16_16_16(a,b)     ((a)*(b))
 #define MULT16_16(a,b)     ((opus_val32)(a)*(opus_val32)(b))
 #define MAC16_16(c,a,b)     ((c)+(opus_val32)(a)*(opus_val32)(b))
diff --git a/celt/fixed_debug.h b/celt/fixed_debug.h
index 9731168d..4b92a1e3 100644
--- a/celt/fixed_debug.h
+++ b/celt/fixed_debug.h
@@ -69,6 +69,8 @@ extern opus_int64 celt_mips;
 /* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
 /** Negate 32-bit value, ignore any overflows */
 #define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(0-(opus_uint32)(a)))
+/** 32-bit arithmetic shift right with rounding-to-nearest, ignoring overflows */
+#define PSHR32_ovflw(a,shift) (SHR32(ADD32_ovflw(a, (EXTEND32(1)<<(shift)>>1)),shift))
 
 static OPUS_INLINE short NEG16(int x)
 {
diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h
index dc6c93e7..3de7bd6a 100644
--- a/celt/fixed_generic.h
+++ b/celt/fixed_generic.h
@@ -147,6 +147,8 @@
 /* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
 /** Negate 32-bit value, ignore any overflows */
 #define NEG32_ovflw(a) ((opus_val32)(0-(opus_uint32)(a)))
+/** 32-bit arithmetic shift right with rounding-to-nearest, ignoring overflows */
+#define PSHR32_ovflw(a,shift) (SHR32(ADD32_ovflw(a, (EXTEND32(1)<<(shift)>>1)),shift))
 
 /** 16x16 multiplication where the result fits in 16 bits */
 #define MULT16_16_16(a,b)     ((((opus_val16)(a))*((opus_val16)(b))))
diff --git a/celt/mdct.c b/celt/mdct.c
index 8798bc82..ba6b22eb 100644
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -330,8 +330,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
          t0 = t[i];
          t1 = t[N4+i];
          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
-         yr = PSHR32(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM);
-         yi = PSHR32(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM);
+         yr = PSHR32_ovflw(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM);
+         yi = PSHR32_ovflw(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM);
          /* We swap real and imag because we're using an FFT instead of an IFFT. */
          re = yp1[1];
          im = yp1[0];
@@ -341,8 +341,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
          t0 = t[(N4-i-1)];
          t1 = t[(N2-i-1)];
          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
-         yr = PSHR32(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM);
-         yi = PSHR32(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM);
+         yr = PSHR32_ovflw(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM);
+         yi = PSHR32_ovflw(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM);
          yp1[0] = yr;
          yp0[1] = yi;
          yp0 += 2;