MIPS optimizations

Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
2025-05-28 22:29:14 +00:00 · 2014-06-19 03:40:09 -04:00 · 2014-06-19 03:40:09 -04:00 · f133bac6f9
commit f133bac6f9
parent a88d8365d4
26 changed files with 2338 additions and 3 deletions
--- a/Makefile.unix
+++ b/Makefile.unix
@ -1,7 +1,7 @@
 #################### COMPILE OPTIONS #######################

 # Uncomment this for fixed-point build
-#FIXED_POINT=1
+FIXED_POINT=1

 # It is strongly recommended to uncomment one of these
 # VAR_ARRAYS: Use C99 variable-length arrays for stack allocation
@ -45,7 +45,9 @@ ldflags-from-ldlibdirs  = $(addprefix -L,$(1))
 ldlibs-from-libs        = $(addprefix -l,$(1))

 WARNINGS = -Wall -W -Wstrict-prototypes -Wextra -Wcast-align -Wnested-externs -Wshadow
-CFLAGS  += -O2 -g $(WARNINGS) -DOPUS_BUILD
+
+CFLAGS  += -mips32r2 -mno-mips16 -std=gnu99 -O2 -g $(WARNINGS) -DENABLE_ASSERTIONS -DMIPSr1_ASM -DOPUS_BUILD -mdspr2 -march=74kc -mtune=74kc -mmt -mgp32
+
 CINCLUDES = include silk celt

 ifdef FIXED_POINT
--- a/celt/_kiss_fft_guts.h
+++ b/celt/_kiss_fft_guts.h
@ -97,6 +97,9 @@
 #if defined(OPUS_ARM_INLINE_EDSP)
 #include "arm/kiss_fft_armv5e.h"
 #endif
+#if defined(MIPSr1_ASM)
+#include "mips/kiss_fft_mipsr1.h"
+#endif

 #else  /* not FIXED_POINT*/

--- a/celt/celt.c
+++ b/celt/celt.c
@ -54,6 +54,10 @@
 #define PACKAGE_VERSION "unknown"
 #endif

+#if defined(MIPSr1_ASM)
+#include "mips/celt_mipsr1.h"
+#endif
+

 int resampling_factor(opus_int32 rate)
 {
@ -169,6 +173,7 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
 #endif
 #endif

+#ifndef OVERRIDE_comb_filter
 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
      const opus_val16 *window, int overlap)
@ -231,6 +236,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
   /* Compute the part with the constant filter. */
   comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
 }
+#endif /* OVERRIDE_comb_filter */

 const signed char tf_select_table[4][8] = {
      {0, -1, 0, -1,    0,-1, 0,-1},
--- a/celt/fixed_generic.h
+++ b/celt/fixed_generic.h
@ -135,6 +135,10 @@
 /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
 #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))

+#if defined(MIPSr1_ASM)
+#include "mips/fixed_generic_mipsr1.h"
+#endif
+
 static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
 {
   x = PSHR32(x, SIG_SHIFT);
--- a/celt/kiss_fft.c
+++ b/celt/kiss_fft.c
@ -231,6 +231,7 @@ static void kf_bfly3(
 }


+#ifndef OVERRIDE_kf_bfly5
 static void kf_bfly5(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
@ -305,6 +306,7 @@ static void kf_bfly5(
      }
   }
 }
+#endif /* OVERRIDE_kf_bfly5 */


 #endif
--- a/celt/mdct.c
+++ b/celt/mdct.c
@ -53,6 +53,11 @@
 #include "mathops.h"
 #include "stack_alloc.h"

+#if defined(MIPSr1_ASM)
+#include "mips/mdct_mipsr1.h"
+#endif
+
+
 #ifdef CUSTOM_MODES

 int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
@ -110,6 +115,7 @@ void clt_mdct_clear(mdct_lookup *l)
 #endif /* CUSTOM_MODES */

 /* Forward MDCT trashes the input array */
+#ifndef OVERRIDE_clt_mdct_forward
 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
      const opus_val16 *window, int overlap, int shift, int stride)
 {
@ -229,7 +235,9 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
   }
   RESTORE_STACK;
 }
+#endif /* OVERRIDE_clt_mdct_forward */

+#ifndef OVERRIDE_clt_mdct_backward
 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
 {
@ -330,3 +338,4 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
      }
   }
 }
+#endif /* OVERRIDE_clt_mdct_backward */
--- a/celt/mips/celt_mipsr1.h
+++ b/celt/mips/celt_mipsr1.h
@ -0,0 +1,149 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __CELT_MIPSR1_H__
+#define __CELT_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define CELT_C
+
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+#define OVERRIDE_comb_filter
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+      const opus_val16 *window, int overlap)
+{
+   int i;
+   opus_val32 x0, x1, x2, x3, x4;
+
+   /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
+   opus_val16 g00, g01, g02, g10, g11, g12;
+   static const opus_val16 gains[3][3] = {
+         {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
+         {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
+         {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
+
+   if (g0==0 && g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y, x, N);
+      return;
+   }
+
+   g00 = MULT16_16_P15(g0, gains[tapset0][0]);
+   g01 = MULT16_16_P15(g0, gains[tapset0][1]);
+   g02 = MULT16_16_P15(g0, gains[tapset0][2]);
+   g10 = MULT16_16_P15(g1, gains[tapset1][0]);
+   g11 = MULT16_16_P15(g1, gains[tapset1][1]);
+   g12 = MULT16_16_P15(g1, gains[tapset1][2]);
+   x1 = x[-T1+1];
+   x2 = x[-T1  ];
+   x3 = x[-T1-1];
+   x4 = x[-T1-2];
+   /* If the filter didn't change, we don't need the overlap */
+   if (g0==g1 && T0==T1 && tapset0==tapset1)
+      overlap=0;
+
+   for (i=0;i<overlap;i++)
+   {
+      opus_val16 f;
+      opus_val32 res;
+      f = MULT16_16_Q15(window[i],window[i]);
+      x0= x[i-T1+2];
+
+      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g00)), "r" ((int)x[i-T0]));
+
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g01)), "r" ((int)ADD32(x[i-T0-1],x[i-T0+1])));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g02)), "r" ((int)ADD32(x[i-T0-2],x[i-T0+2])));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g10)), "r" ((int)x2));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g11)), "r" ((int)ADD32(x3,x1)));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g12)), "r" ((int)ADD32(x4,x0)));
+       
+      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
+
+      y[i] = x[i] + res;
+
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+
+   x4 = x[i-T1-2];
+   x3 = x[i-T1-1];
+   x2 = x[i-T1];
+   x1 = x[i-T1+1];
+
+   if (g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y+overlap, x+overlap, N-overlap);
+      return;
+   }
+
+   for (i=overlap;i<N;i++)
+   {
+      opus_val32 res;
+      x0=x[i-T1+2];
+
+      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)g10), "r" ((int)x2));
+
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g11), "r" ((int)ADD32(x3,x1)));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g12), "r" ((int)ADD32(x4,x0)));
+      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
+      y[i] = x[i] + res;
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+}
+
+#endif /* __CELT_MIPSR1_H__ */
--- a/celt/mips/fixed_generic_mipsr1.h
+++ b/celt/mips/fixed_generic_mipsr1.h
@ -0,0 +1,127 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+   Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2008 CSIRO */
+/**
+   @file fixed_generic.h
+   @brief Generic fixed-point operations
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CELT_FIXED_GENERIC_MIPSR1_H
+#define CELT_FIXED_GENERIC_MIPSR1_H
+
+#undef MULT16_32_Q15_ADD
+static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) {
+	int m;
+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+	asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+	return m;
+}
+
+#undef MULT16_32_Q15_SUB
+static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) {
+	int m;
+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+	asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+	return m;
+}
+
+#undef MULT16_16_Q15_ADD
+static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) {
+	int m;
+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+	asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+	return m;
+}
+
+#undef MULT16_16_Q15_SUB
+static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) {
+	int m;
+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+	asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+	return m;
+}
+
+
+#undef MULT16_32_Q16
+static inline int MULT16_32_Q16(int a, int b)
+{
+    int c;
+    asm volatile("MULT $ac1,%0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (16));
+    return c;
+}
+
+#undef MULT16_32_P16
+static inline int MULT16_32_P16(int a, int b)
+{
+    int c;
+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR_R.W %0,$ac1, %1" : "=r" (c): "i" (16));
+    return c;
+}
+
+#undef MULT16_32_Q15
+static inline int MULT16_32_Q15(int a, int b)
+{
+    int c;
+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (15));
+    return c;
+}
+
+#undef MULT32_32_Q31
+static inline int MULT32_32_Q31(int a, int b)
+{
+    int r;
+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (r): "i" (31));
+    return r;
+}
+
+#undef PSHR32
+static inline int PSHR32(int a, int shift)
+{
+    int r;
+    asm volatile ("SHRAV_R.W %0, %1, %2" :"=r" (r): "r" (a), "r" (shift));
+    return r;
+}
+
+#undef MULT16_16_P15
+static inline int MULT16_16_P15(int a, int b)
+{
+    int r;
+    asm volatile ("mul %0, %1, %2" :"=r" (r): "r" (a), "r" (b));
+    asm volatile ("SHRA_R.W %0, %1, %2" : "+r" (r):  "0" (r), "i"(15));
+    return r;
+}
+
+#endif /* CELT_FIXED_GENERIC_MIPSR1_H */
+
--- a/celt/mips/kiss_fft_mipsr1.h
+++ b/celt/mips/kiss_fft_mipsr1.h
@ -0,0 +1,167 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_MIPSR1_H
+#define KISS_FFT_MIPSR1_H
+
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+
+#ifdef FIXED_POINT
+
+#define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d))
+#define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d))
+
+#undef S_MUL_ADD
+static inline int S_MUL_ADD(int a, int b, int c, int d) {
+	int m;
+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+	asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+	return m;
+}
+
+#undef S_MUL_SUB
+static inline int S_MUL_SUB(int a, int b, int c, int d) {
+	int m;
+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+	asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+	return m;
+}
+
+#undef C_MUL
+#   define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
+static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
+    kiss_fft_cpx m;
+
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
+
+    return m;
+}
+#undef C_MULC
+#   define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
+static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
+    kiss_fft_cpx m;
+
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
+
+    return m;
+}
+
+#endif /* FIXED_POINT */
+
+#define OVERRIDE_kf_bfly5
+static void kf_bfly5(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+   int i, u;
+   kiss_fft_cpx scratch[13];
+
+   const kiss_twiddle_cpx *tw;
+   kiss_twiddle_cpx ya,yb;
+   kiss_fft_cpx * Fout_beg = Fout;
+
+#ifdef FIXED_POINT
+   ya.r = 10126;
+   ya.i = -31164;
+   yb.r = -26510;
+   yb.i = -19261;
+#else
+   ya = st->twiddles[fstride*m];
+   yb = st->twiddles[fstride*2*m];
+#endif
+
+   tw=st->twiddles;
+
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
+      for ( u=0; u<m; ++u ) {
+         scratch[0] = *Fout0;
+
+
+         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+         C_ADD( scratch[7],scratch[1],scratch[4]);
+         C_SUB( scratch[10],scratch[1],scratch[4]);
+         C_ADD( scratch[8],scratch[2],scratch[3]);
+         C_SUB( scratch[9],scratch[2],scratch[3]);
+
+         Fout0->r += scratch[7].r + scratch[8].r;
+         Fout0->i += scratch[7].i + scratch[8].i;
+         scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r);
+         scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r);
+
+         scratch[6].r =  S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i);
+         scratch[6].i =  -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i);
+
+         C_SUB(*Fout1,scratch[5],scratch[6]);
+         C_ADD(*Fout4,scratch[5],scratch[6]);
+
+         scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r);
+         scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r);
+
+         scratch[12].r =  S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i);
+         scratch[12].i =  S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i);
+
+         C_ADD(*Fout2,scratch[11],scratch[12]);
+         C_SUB(*Fout3,scratch[11],scratch[12]);
+
+         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+   }
+}
+
+
+#endif /* KISS_FFT_MIPSR1_H */
--- a/celt/mips/mdct_mipsr1.h
+++ b/celt/mips/mdct_mipsr1.h
@ -0,0 +1,283 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+   to do most of the work. It should be relatively straightforward to
+   plug in pretty much and FFT here.
+
+   This replaces the Vorbis FFT (and uses the exact same API), which
+   was a bit too messy and that was ending up duplicating code
+   (might as well use the same FFT everywhere).
+
+   The algorithm is similar to (and inspired from) Fabrice Bellard's
+   MDCT implementation in FFMPEG, but has differences in signs, ordering
+   and scaling in many places.
+*/
+#ifndef __MDCT_MIPSR1_H__
+#define __MDCT_MIPSR1_H__
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include "mdct.h"
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include <math.h>
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+
+/* Forward MDCT trashes the input array */
+#define OVERRIDE_clt_mdct_forward
+void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 *window, int overlap, int shift, int stride)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_cpx, f2);
+   const kiss_fft_state *st = l->kfft[shift];
+   const kiss_twiddle_scalar *trig;
+   opus_val16 scale;
+#ifdef FIXED_POINT
+   /* Allows us to scale with MULT16_32_Q16(), which is faster than
+      MULT16_32_Q15() on ARM. */
+   int scale_shift = st->scale_shift-1;
+#endif
+   SAVE_STACK;
+   scale = st->scale;
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N4, kiss_fft_cpx);
+
+   /* Consider the input to be composed of four blocks: [a, b, c, d] */
+   /* Window, shuffle, fold */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      for(i=0;i<((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+          *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2);
+          *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ = *xp2;
+         *yp++ = *xp1;
+         xp1+=2;
+         xp2-=2;
+      }
+      for(;i<N4;i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+          *yp++ =  S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]);
+          *yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+   }
+   /* Pre-rotation */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar *t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_cpx yc;
+         kiss_twiddle_scalar t0, t1;
+         kiss_fft_scalar re, im, yr, yi;
+         t0 = t[i];
+         t1 = t[N4+i];
+         re = *yp++;
+         im = *yp++;
+
+         yr = S_MUL_SUB(re,t0,im,t1);
+         yi = S_MUL_ADD(im,t0,re,t1);
+
+         yc.r = yr;
+         yc.i = yi;
+         yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
+         yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
+         f2[st->bitrev[i]] = yc;
+      }
+   }
+
+   /* N/4 complex FFT, does not downscale anymore */
+   opus_fft_impl(st, f2);
+
+   /* Post-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]);
+         yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]);
+         *yp1 = yr;
+         *yp2 = yi;
+         fp++;
+         yp1 += 2*stride;
+         yp2 -= 2*stride;
+      }
+   }
+   RESTORE_STACK;
+}
+
+#define OVERRIDE_clt_mdct_backward
+void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
+{
+   int i;
+   int N, N2, N4;
+   const kiss_twiddle_scalar *trig;
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   /* Pre-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
+      const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
+      const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
+      for(i=0;i<N4;i++)
+      {
+         int rev;
+         kiss_fft_scalar yr, yi;
+         rev = *bitrev++;
+         yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]);
+         yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]);
+         /* We swap real and imag because we use an FFT instead of an IFFT. */
+         yp[2*rev+1] = yr;
+         yp[2*rev] = yi;
+         /* Storing the pre-rotation directly in the bitrev order. */
+         xp1+=2*stride;
+         xp2-=2*stride;
+      }
+   }
+
+   opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
+
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         kiss_twiddle_scalar t0, t1;
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = yp0[1];
+         im = yp0[0];
+         t0 = t[i];
+         t1 = t[N4+i];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL_ADD(re,t0 , im,t1);
+         yi = S_MUL_SUB(re,t1 , im,t0);
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = yp1[1];
+         im = yp1[0];
+         yp0[0] = yr;
+         yp1[1] = yi;
+
+         t0 = t[(N4-i-1)];
+         t1 = t[(N2-i-1)];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL_ADD(re,t0,im,t1);
+         yi = S_MUL_SUB(re,t1,im,t0);
+         yp1[0] = yr;
+         yp0[1] = yi;
+         yp0 += 2;
+         yp1 -= 2;
+      }
+   }
+
+   /* Mirror on both sides for TDAC */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+
+      for(i = 0; i < overlap/2; i++)
+      {
+         kiss_fft_scalar x1, x2;
+         x1 = *xp1;
+         x2 = *yp1;
+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+         wp1++;
+         wp2--;
+      }
+   }
+}
+#endif /* __MDCT_MIPSR1_H__ */
--- a/celt/mips/pitch_mipsr1.h
+++ b/celt/mips/pitch_mipsr1.h
@ -0,0 +1,155 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file pitch.h
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_MIPSR1_H
+#define PITCH_MIPSR1_H
+
+#define OVERRIDE_DUAL_INNER_PROD
+static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+   int j;
+   opus_val32 xy01=0;
+   opus_val32 xy02=0;
+   asm volatile("MULT $ac1, $0, $0");
+   asm volatile("MULT $ac2, $0, $0");
+   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
+   for (j=0;j<N;j++)
+   {
+	  asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
+	  asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
+	  ++j;
+	  asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
+	  asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
+   }
+   asm volatile ("mflo %0, $ac1": "=r"(xy01));
+   asm volatile ("mflo %0, $ac2": "=r"(xy02));
+   *xy1 = xy01;
+   *xy2 = xy02;
+}
+
+#define OVERRIDE_XCORR_KERNEL
+static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   opus_val16 y_0, y_1, y_2, y_3;
+
+    opus_int64 sum_0, sum_1, sum_2, sum_3;
+    sum_0 =  (opus_int64)sum[0];
+    sum_1 =  (opus_int64)sum[1];
+    sum_2 =  (opus_int64)sum[2];
+    sum_3 =  (opus_int64)sum[3];
+
+    y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
+    y_0=*y++;
+    y_1=*y++;
+    y_2=*y++;
+    for (j=0;j<len-3;j+=4)
+    {
+        opus_val16 tmp;
+        tmp = *x++;
+        y_3=*y++;
+
+        sum_0 = __builtin_mips_madd( sum_0, tmp, y_0);
+        sum_1 = __builtin_mips_madd( sum_1, tmp, y_1);
+        sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
+        sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
+
+        tmp=*x++;
+        y_0=*y++;
+
+        sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
+        sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
+        sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
+        sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
+
+       tmp=*x++;
+       y_1=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
+
+
+      tmp=*x++;
+      y_2=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_3 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_0 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_1);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_2);
+
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp = *x++;
+      y_3=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_0 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_1 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
+   }
+
+   if (j++<len)
+   {
+      opus_val16 tmp=*x++;
+      y_0=*y++;
+
+      sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
+      sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
+      sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
+      sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
+   }
+
+   if (j<len)
+   {
+      opus_val16 tmp=*x++;
+      y_1=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
+
+   }
+
+   sum[0] = (opus_val32)sum_0;
+   sum[1] = (opus_val32)sum_1;
+   sum[2] = (opus_val32)sum_2;
+   sum[3] = (opus_val32)sum_3;
+}
+
+#endif /* PITCH_MIPSR1_H */
+
--- a/celt/mips/vq_mipsr1.h
+++ b/celt/mips/vq_mipsr1.h
@ -0,0 +1,123 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __VQ_MIPSR1_H__
+#define __VQ_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mathops.h"
+#include "cwrs.h"
+#include "vq.h"
+#include "arch.h"
+#include "os_support.h"
+#include "bands.h"
+#include "rate.h"
+
+static unsigned extract_collapse_mask(int *iy, int N, int B);
+static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, int N, opus_val32 Ryy, opus_val16 gain);
+static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread);
+
+#define OVERRIDE_vq_exp_rotation1
+static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
+{
+   int i;
+   opus_val16 ms;
+   celt_norm *Xptr;
+   Xptr = X;
+   ms = NEG16(s);
+   for (i=0;i<len-stride;i++)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
+      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
+   }
+   Xptr = &X[len-2*stride-1];
+   for (i=len-2*stride-1;i>=0;i--)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
+      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
+   }
+}
+
+#define OVERRIDE_renormalise_vector
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
+{
+   int i;
+#ifdef FIXED_POINT
+   int k;
+#endif
+   opus_val32 E = EPSILON;
+   opus_val16 g;
+   opus_val32 t;
+   celt_norm *xptr = X;
+
+   int X0, X2, X3, X1;
+   asm volatile("mult $ac1, $0, $0");
+   asm volatile("MTLO %0, $ac1" : :"r" (E));
+   /*if(N %4)
+       printf("error");*/
+   for (i=0;i<N-2;i+=2)
+   {
+      X0 = (int)*xptr++;
+      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
+
+      X1 = (int)*xptr++;
+      asm volatile("MADD $ac1, %0, %1" : : "r" (X1), "r" (X1));
+   }
+
+   for (;i<N;i++)
+   {
+      X0 = (int)*xptr++;
+      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
+   }
+
+   asm volatile("MFLO %0, $ac1" : "=r" (E));
+#ifdef FIXED_POINT
+   k = celt_ilog2(E)>>1;
+#endif
+   t = VSHR32(E, 2*(k-7));
+   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+
+   xptr = X;
+   for (i=0;i<N;i++)
+   {
+      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
+      xptr++;
+   }
+   /*return celt_sqrt(E);*/
+}
+
+#endif /* __VQ_MIPSR1_H__ */
--- a/celt/pitch.h
+++ b/celt/pitch.h
@ -41,6 +41,10 @@
 #include "x86/pitch_sse.h"
 #endif

+#if defined(MIPSr1_ASM)
+#include "mips/pitch_mipsr1.h"
+#endif
+
 #if defined(OPUS_ARM_ASM) && defined(FIXED_POINT)
 # include "arm/pitch_arm.h"
 #endif
--- a/celt/vq.c
+++ b/celt/vq.c
@ -39,6 +39,11 @@
 #include "rate.h"
 #include "pitch.h"

+#if defined(MIPSr1_ASM)
+#include "mips/vq_mipsr1.h"
+#endif
+
+#ifndef OVERRIDE_vq_exp_rotation1
 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
 {
   int i;
@ -64,6 +69,7 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
   }
 }
+#endif /* OVERRIDE_vq_exp_rotation1 */

 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
 {
@ -343,6 +349,7 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
   return collapse_mask;
 }

+#ifndef OVERRIDE_renormalise_vector
 void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
 {
   int i;
@ -368,6 +375,7 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
   }
   /*return celt_sqrt(E);*/
 }
+#endif /* OVERRIDE_renormalise_vector */

 int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N)
 {
--- a/silk/NSQ_del_dec.c
+++ b/silk/NSQ_del_dec.c
@ -57,6 +57,9 @@ typedef struct {

 typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];

+#if defined(MIPSr1_ASM)
+#include "mips/NSQ_del_dec_mipsr1.h"
+#endif
 static OPUS_INLINE void silk_nsq_del_dec_scale_states(
    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
@ -303,6 +306,7 @@ void silk_NSQ_del_dec(
 /******************************************/
 /* Noise shape quantizer for one subframe */
 /******************************************/
+#ifndef OVERRIDE_silk_noise_shape_quantizer_del_dec
 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
@ -629,6 +633,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
    }
    RESTORE_STACK;
 }
+#endif /* OVERRIDE_silk_noise_shape_quantizer_del_dec */

 static OPUS_INLINE void silk_nsq_del_dec_scale_states(
    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
--- a/silk/SigProc_FIX.h
+++ b/silk/SigProc_FIX.h
@ -587,6 +587,11 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 #include "arm/SigProc_FIX_armv5e.h"
 #endif

+#if defined(MIPSr1_ASM)
+#include "mips/sigproc_fix_mipsr1.h"
+#endif
+
+
 #ifdef  __cplusplus
 }
 #endif
--- a/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h
+++ b/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h
@ -0,0 +1,337 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+
+/**************************************************************/
+/* Compute noise shaping coefficients and initial gain values */
+/**************************************************************/
+#define OVERRIDE_silk_noise_shape_analysis_FIX
+
+void silk_noise_shape_analysis_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
+    const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */
+    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */
+    int                              arch                                   /* I    Run-time architecture                                                       */
+)
+{
+    silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
+    opus_int     k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0;
+    opus_int32   SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;
+    opus_int32   nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
+    opus_int32   delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
+    opus_int32   auto_corr[     MAX_SHAPE_LPC_ORDER + 1 ];
+    opus_int32   refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];
+    opus_int32   AR1_Q24[       MAX_SHAPE_LPC_ORDER ];
+    opus_int32   AR2_Q24[       MAX_SHAPE_LPC_ORDER ];
+    VARDECL( opus_int16, x_windowed );
+    const opus_int16 *x_ptr, *pitch_res_ptr;
+    SAVE_STACK;
+
+    /* Point to start of first LPC analysis block */
+    x_ptr = x - psEnc->sCmn.la_shape;
+
+    /****************/
+    /* GAIN CONTROL */
+    /****************/
+    SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7;
+
+    /* Input quality is the average of the quality in the lowest two VAD bands */
+    psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ]
+        + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 );
+
+    /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */
+    psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 -
+        SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 );
+
+    /* Reduce coding SNR during low speech activity */
+    if( psEnc->sCmn.useCBR == 0 ) {
+        b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8;
+        b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 );
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+            silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ),                                       /* Q11*/
+            silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) );     /* Q12*/
+    }
+
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce gains for periodic signals */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 );
+    } else {
+        /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+            silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ),
+            SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 );
+    }
+
+    /*************************/
+    /* SPARSENESS PROCESSING */
+    /*************************/
+    /* Set quantizer offset */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Initially set to 0; may be overruled in process_gains(..) */
+        psEnc->sCmn.indices.quantOffsetType = 0;
+        psEncCtrl->sparseness_Q8 = 0;
+    } else {
+        /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
+        nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 );
+        energy_variation_Q7 = 0;
+        log_energy_prev_Q7  = 0;
+        pitch_res_ptr = pitch_res;
+        for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {
+            silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples );
+            nrg += silk_RSHIFT( nSamples, scale );           /* Q(-scale)*/
+
+            log_energy_Q7 = silk_lin2log( nrg );
+            if( k > 0 ) {
+                energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 );
+            }
+            log_energy_prev_Q7 = log_energy_Q7;
+            pitch_res_ptr += nSamples;
+        }
+
+        psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 -
+            SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 );
+
+        /* Set quantization offset depending on sparseness measure */
+        if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) {
+            psEnc->sCmn.indices.quantOffsetType = 0;
+        } else {
+            psEnc->sCmn.indices.quantOffsetType = 1;
+        }
+
+        /* Increase coding SNR for sparse signals */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) );
+    }
+
+    /*******************************/
+    /* Control bandwidth expansion */
+    /*******************************/
+    /* More BWE for signals with high prediction gain */
+    strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) );
+    BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ),
+        silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 );
+    delta_Q16  = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ),
+        SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) );
+    BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 );
+    BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 );
+    /* BWExp1 will be applied after BWExp2, so make it relative */
+    BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) );
+
+    if( psEnc->sCmn.warping_Q16 > 0 ) {
+        /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
+        warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) );
+    } else {
+        warping_Q16 = 0;
+    }
+
+    /********************************************/
+    /* Compute noise shaping AR coefs and gains */
+    /********************************************/
+    ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Apply window: sine slope followed by flat part followed by cosine slope */
+        opus_int shift, slope_part, flat_part;
+        flat_part = psEnc->sCmn.fs_kHz * 3;
+        slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 );
+
+        silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part );
+        shift = slope_part;
+        silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) );
+        shift += flat_part;
+        silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part );
+
+        /* Update pointer: next LPC analysis block */
+        x_ptr += psEnc->sCmn.subfr_length;
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Calculate warped auto correlation */
+            silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
+        } else {
+            /* Calculate regular auto correlation */
+            silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );
+        }
+
+        /* Add white noise, as a fraction of energy */
+        auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ),
+            SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) );
+
+        /* Calculate the reflection coefficients using schur */
+        nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );
+        silk_assert( nrg >= 0 );
+
+        /* Convert reflection coefficients to prediction coefficients */
+        silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );
+
+        Qnrg = -scale;          /* range: -12...30*/
+        silk_assert( Qnrg >= -12 );
+        silk_assert( Qnrg <=  30 );
+
+        /* Make sure that Qnrg is an even number */
+        if( Qnrg & 1 ) {
+            Qnrg -= 1;
+            nrg >>= 1;
+        }
+
+        tmp32 = silk_SQRT_APPROX( nrg );
+        Qnrg >>= 1;             /* range: -6...15*/
+
+        psEncCtrl->Gains_Q16[ k ] = (silk_LSHIFT32( silk_LIMIT( (tmp32), silk_RSHIFT32( silk_int32_MIN, (16 - Qnrg) ), \
+                            silk_RSHIFT32( silk_int32_MAX, (16 - Qnrg) ) ), (16 - Qnrg) ));
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Adjust gain for warping */
+            gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder );
+            silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+            if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) {
+               psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX;
+            } else {
+               psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+            }
+        }
+
+        /* Bandwidth expansion for synthesis filter shaping */
+        silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 );
+
+        /* Compute noise shaping filter coefficients */
+        silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) );
+
+        /* Bandwidth expansion for analysis filter shaping */
+        silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) );
+        silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 );
+
+        /* Ratio of prediction gains, in energy domain */
+        pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder );
+        nrg         = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder );
+
+        /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/
+        pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 );
+        psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 );
+
+        /* Convert to monic warped prediction coefficients and limit absolute values */
+        limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder );
+
+        /* Convert from Q24 to Q13 and store in int16 */
+        for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) {
+            psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) );
+            psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) );
+        }
+    }
+
+    /*****************/
+    /* Gain tweaking */
+    /*****************/
+    /* Increase gains during low speech activity and put lower limit on gains */
+    gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) );
+    gain_add_Q16  = silk_log2lin(  silk_SMLAWB(  SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) );
+    silk_assert( gain_mult_Q16 > 0 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+        silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+        psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 );
+    }
+
+    gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ),
+        psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] );
+    }
+
+    /************************************************/
+    /* Control low-frequency shaping and noise tilt */
+    /************************************************/
+    /* Less low frequency shaping for noisy inputs */
+    strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ),
+        SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) );
+    strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 );
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */
+        /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/
+        opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz );
+        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+            b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] );
+            /* Pack two coefficients in one int32 */
+            psEncCtrl->LF_shp_Q14[ k ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 );
+            psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+        }
+        silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/
+        Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) -
+            silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ),
+                silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) );
+    } else {
+        b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/
+        /* Pack two coefficients in one int32 */
+        psEncCtrl->LF_shp_Q14[ 0 ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 -
+            silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 );
+        psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+        for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ];
+        }
+        Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 );
+    }
+
+    /****************************/
+    /* HARMONIC SHAPING CONTROL */
+    /****************************/
+    /* Control boosting of harmonic frequencies */
+    HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ),
+        psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) );
+
+    /* More harmonic boost for noisy input signals */
+    HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16,
+        SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) );
+
+    if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* More harmonic noise shaping for high bitrates or noisy input */
+        HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ),
+                SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ),
+                psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) );
+
+        /* Less harmonic noise shaping for less periodic signals */
+        HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ),
+            silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) );
+    } else {
+        HarmShapeGain_Q16 = 0;
+    }
+
+    /*************************/
+    /* Smooth over subframes */
+    /*************************/
+    for( k = 0; k < MAX_NB_SUBFR; k++ ) {
+        psShapeSt->HarmBoost_smth_Q16 =
+            silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16,     HarmBoost_Q16     - psShapeSt->HarmBoost_smth_Q16,     SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+        psShapeSt->HarmShapeGain_smth_Q16 =
+            silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+        psShapeSt->Tilt_smth_Q16 =
+            silk_SMLAWB( psShapeSt->Tilt_smth_Q16,          Tilt_Q16          - psShapeSt->Tilt_smth_Q16,          SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+
+        psEncCtrl->HarmBoost_Q14[ k ]     = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16,     2 );
+        psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 );
+        psEncCtrl->Tilt_Q14[ k ]          = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16,          2 );
+    }
+    RESTORE_STACK;
+}
+
--- a/silk/fixed/mips/prefilter_FIX_mipsr1.h
+++ b/silk/fixed/mips/prefilter_FIX_mipsr1.h
@ -0,0 +1,182 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifndef __PREFILTER_FIX_MIPSR1_H__
+#define __PREFILTER_FIX_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+#define OVERRIDE_silk_warped_LPC_analysis_filter_FIX
+void silk_warped_LPC_analysis_filter_FIX(
+          opus_int32            state[],                    /* I/O  State [order + 1]                   */
+          opus_int32            res_Q2[],                   /* O    Residual signal [length]            */
+    const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */
+    const opus_int16            input[],                    /* I    Input signal [length]               */
+    const opus_int16            lambda_Q16,                 /* I    Warping factor                      */
+    const opus_int              length,                     /* I    Length of input signal              */
+    const opus_int              order                       /* I    Filter order (even)                 */
+)
+{
+    opus_int     n, i;
+    opus_int32   acc_Q11, acc_Q22, tmp1, tmp2, tmp3, tmp4;
+    opus_int32   state_cur, state_next;
+
+    /* Order must be even */
+    /*Length must be even */
+
+    silk_assert( ( order & 1 ) == 0 );
+    silk_assert( ( length & 1 ) == 0 );
+
+    for( n = 0; n < length; n+=2 ) {
+        /* Output of lowpass section */
+        tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
+        state_cur = silk_LSHIFT( input[ n ], 14 );
+        /* Output of allpass section */
+        tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
+        state_next = tmp2;
+        acc_Q11 = silk_RSHIFT( order, 1 );
+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );
+
+
+        /* Output of lowpass section */
+        tmp4 = silk_SMLAWB( state_cur, state_next, lambda_Q16 );
+        state[ 0 ] = silk_LSHIFT( input[ n+1 ], 14 );
+        /* Output of allpass section */
+        tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 );
+        state[ 1 ] = tmp4;
+        acc_Q22 = silk_RSHIFT( order, 1 );
+        acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ 0 ] );
+
+        /* Loop over allpass sections */
+        for( i = 2; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
+            state_cur = tmp1;
+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
+            /* Output of allpass section */
+            tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
+            state_next = tmp2;
+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
+
+
+            /* Output of allpass section */
+            tmp4 = silk_SMLAWB( state_cur, state_next - tmp3, lambda_Q16 );
+            state[ i ] = tmp3;
+            acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ i - 1 ] );
+            /* Output of allpass section */
+            tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 );
+            state[ i + 1 ] = tmp4;
+            acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ i ] );
+        }
+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
+        res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
+
+        state[ order ] = tmp3;
+        acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ order - 1 ] );
+        res_Q2[ n+1 ] = silk_LSHIFT( (opus_int32)input[ n+1 ], 2 ) - silk_RSHIFT_ROUND( acc_Q22, 9 );
+    }
+}
+
+
+
+/* Prefilter for finding Quantizer input signal */
+#define OVERRIDE_silk_prefilt_FIX
+static inline void silk_prefilt_FIX(
+    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
+    opus_int32                  st_res_Q12[],               /* I    short term residual signal          */
+    opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */
+    opus_int32                  HarmShapeFIRPacked_Q12,     /* I    Harmonic shaping coeficients        */
+    opus_int                    Tilt_Q14,                   /* I    Tilt shaping coeficient             */
+    opus_int32                  LF_shp_Q14,                 /* I    Low-frequancy shaping coeficients   */
+    opus_int                    lag,                        /* I    Lag for harmonic shaping            */
+    opus_int                    length                      /* I    Length of signals                   */
+)
+{
+    opus_int   i, idx, LTP_shp_buf_idx;
+    opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10;
+    opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12;
+    opus_int16 *LTP_shp_buf;
+
+    /* To speed up use temp variables instead of using the struct */
+    LTP_shp_buf     = P->sLTP_shp;
+    LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
+    sLF_AR_shp_Q12  = P->sLF_AR_shp_Q12;
+    sLF_MA_shp_Q12  = P->sLF_MA_shp_Q12;
+
+    if( lag > 0 ) {
+        for( i = 0; i < length; i++ ) {
+        
+            /* unrolled loop */
+            silk_assert( HARM_SHAPE_FIR_TAPS == 3 );
+            idx = lag + LTP_shp_buf_idx;
+            n_LTP_Q12 = silk_SMULBB(            LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+            n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2    ) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+            n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+
+            n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
+            n_LF_Q10   = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
+
+            sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
+            sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12,  silk_LSHIFT( n_LF_Q10,   2 ) );
+
+            LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+            LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
+
+            xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 );
+        }
+    }
+    else
+    {
+        for( i = 0; i < length; i++ ) {
+
+            n_LTP_Q12 = 0;
+
+            n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
+            n_LF_Q10   = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
+
+            sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
+            sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12,  silk_LSHIFT( n_LF_Q10,   2 ) );
+
+            LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+            LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
+
+            xw_Q3[i] = silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 9 );
+        }
+    }
+
+    /* Copy temp variable back to state */
+    P->sLF_AR_shp_Q12   = sLF_AR_shp_Q12;
+    P->sLF_MA_shp_Q12   = sLF_MA_shp_Q12;
+    P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
+}
+
+#endif /* __PREFILTER_FIX_MIPSR1_H__ */
--- a/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h
+++ b/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h
@ -0,0 +1,168 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__
+#define __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+
+#undef QC
+#define QC  10
+
+#undef QS
+#define QS  14
+
+/* Autocorrelations for a warped frequency axis */
+#define OVERRIDE_silk_warped_autocorrelation_FIX
+void silk_warped_autocorrelation_FIX(
+          opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */
+          opus_int                  *scale,                                 /* O    Scaling of the correlation vector                                           */
+    const opus_int16                *input,                                 /* I    Input data to correlate                                                     */
+    const opus_int                  warping_Q16,                            /* I    Warping coefficient                                                         */
+    const opus_int                  length,                                 /* I    Length of input                                                             */
+    const opus_int                  order                                   /* I    Correlation order (even)                                                    */
+)
+{
+    opus_int   n, i, lsh;
+    opus_int32 tmp1_QS=0, tmp2_QS=0, tmp3_QS=0, tmp4_QS=0, tmp5_QS=0, tmp6_QS=0, tmp7_QS=0, tmp8_QS=0, start_1=0, start_2=0, start_3=0;
+    opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+    opus_int64 corr_QC[  MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+    opus_int64 temp64;
+
+    opus_int32 val;
+    val = 2 * QS - QC;
+
+
+    /* Order must be even */
+    silk_assert( ( order & 1 ) == 0 );
+    silk_assert( 2 * QS - QC >= 0 );
+
+    /* Loop over samples */
+    for( n = 0; n < length; n=n+4 ) {
+
+        tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );
+        start_1 = tmp1_QS;
+        tmp3_QS = silk_LSHIFT32( (opus_int32)input[ n+1], QS );
+        start_2 = tmp3_QS;        
+        tmp5_QS = silk_LSHIFT32( (opus_int32)input[ n+2], QS );
+        start_3 = tmp5_QS;        
+        tmp7_QS = silk_LSHIFT32( (opus_int32)input[ n+3], QS );
+
+        /* Loop over allpass sections */
+        for( i = 0; i < order; i += 2 ) {
+
+            /* Output of allpass section */
+            tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp1_QS,  start_1);
+
+            tmp4_QS = silk_SMLAWB( tmp1_QS, tmp2_QS - tmp3_QS, warping_Q16 );
+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp3_QS,  start_2);
+
+            tmp6_QS = silk_SMLAWB( tmp3_QS, tmp4_QS - tmp5_QS, warping_Q16 );
+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp5_QS,  start_3);
+
+            tmp8_QS = silk_SMLAWB( tmp5_QS, tmp6_QS - tmp7_QS, warping_Q16 );
+            state_QS[ i ]  = tmp7_QS;
+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp7_QS, state_QS[0]);
+
+            /* Output of allpass section */
+            tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp2_QS,  start_1);
+
+            tmp3_QS = silk_SMLAWB( tmp2_QS, tmp1_QS - tmp4_QS, warping_Q16 );
+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp4_QS,  start_2);
+
+            tmp5_QS = silk_SMLAWB( tmp4_QS, tmp3_QS - tmp6_QS, warping_Q16 );
+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp6_QS,  start_3);
+
+            tmp7_QS = silk_SMLAWB( tmp6_QS, tmp5_QS - tmp8_QS, warping_Q16 );
+            state_QS[ i + 1 ]  = tmp8_QS;
+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp8_QS,  state_QS[ 0 ]);
+
+        }
+        state_QS[ order ] = tmp7_QS;
+
+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp1_QS,  start_1);
+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp3_QS,  start_2);
+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp5_QS,  start_3);
+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp7_QS,  state_QS[ 0 ]);
+    }
+
+    for(;n< length; n++ ) {
+
+        tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );
+
+        /* Loop over allpass sections */
+        for( i = 0; i < order; i += 2 ) {
+
+            /* Output of allpass section */
+            tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
+            state_QS[ i ] = tmp1_QS;
+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp1_QS,   state_QS[ 0 ]);
+
+            /* Output of allpass section */
+            tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
+            state_QS[ i + 1 ]  = tmp2_QS;
+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp2_QS,   state_QS[ 0 ]);
+        }
+        state_QS[ order ] = tmp1_QS;
+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp1_QS,   state_QS[ 0 ]);
+    }
+
+    temp64 =  corr_QC[ 0 ];
+    temp64 = __builtin_mips_shilo(temp64, val);
+
+    lsh = silk_CLZ64( temp64 ) - 35;
+    lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC );
+    *scale = -( QC + lsh );
+    silk_assert( *scale >= -30 && *scale <= 12 );
+    if( lsh >= 0 ) {
+        for( i = 0; i < order + 1; i++ ) {
+            temp64 = corr_QC[ i ];
+            //temp64 = __builtin_mips_shilo(temp64, val);
+            temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val);
+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) );
+        }
+    } else {
+        for( i = 0; i < order + 1; i++ ) {
+            temp64 = corr_QC[ i ];
+            //temp64 = __builtin_mips_shilo(temp64, val);
+            temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val);
+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) );
+        }
+    }
+
+     corr_QC[ 0 ] = __builtin_mips_shilo(corr_QC[ 0 ], val);
+
+     silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/
+}
+#endif /* __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ */
+
--- a/silk/fixed/noise_shape_analysis_FIX.c
+++ b/silk/fixed/noise_shape_analysis_FIX.c
@ -138,9 +138,14 @@ static OPUS_INLINE void limit_warped_coefs(
    silk_assert( 0 );
 }

+#if defined(MIPSr1_ASM)
+#include "mips/noise_shape_analysis_FIX_mipsr1.h"
+#endif
+
 /**************************************************************/
 /* Compute noise shaping coefficients and initial gain values */
 /**************************************************************/
+#ifndef OVERRIDE_silk_noise_shape_analysis_FIX
 void silk_noise_shape_analysis_FIX(
    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
@ -443,3 +448,5 @@ void silk_noise_shape_analysis_FIX(
    }
    RESTORE_STACK;
 }
+#endif /* OVERRIDE_silk_noise_shape_analysis_FIX */
+
--- a/silk/fixed/prefilter_FIX.c
+++ b/silk/fixed/prefilter_FIX.c
@ -33,6 +33,11 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "stack_alloc.h"
 #include "tuning_parameters.h"

+#if defined(MIPSr1_ASM)
+#include "mips/prefilter_FIX_mipsr1.h"
+#endif
+
+
 /* Prefilter for finding Quantizer input signal */
 static OPUS_INLINE void silk_prefilt_FIX(
    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
@ -45,6 +50,7 @@ static OPUS_INLINE void silk_prefilt_FIX(
    opus_int                    length                      /* I    Length of signals                   */
 );

+#ifndef OVERRIDE_silk_warped_LPC_analysis_filter_FIX
 void silk_warped_LPC_analysis_filter_FIX(
          opus_int32            state[],                    /* I/O  State [order + 1]                   */
          opus_int32            res_Q2[],                   /* O    Residual signal [length]            */
@ -86,6 +92,7 @@ void silk_warped_LPC_analysis_filter_FIX(
        res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
    }
 }
+#endif /* OVERRIDE_silk_warped_LPC_analysis_filter_FIX */

 void silk_prefilter_FIX(
    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state                                                               */
@ -155,6 +162,7 @@ void silk_prefilter_FIX(
    RESTORE_STACK;
 }

+#ifndef OVERRIDE_silk_prefilt_FIX
 /* Prefilter for finding Quantizer input signal */
 static OPUS_INLINE void silk_prefilt_FIX(
    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
@ -207,3 +215,5 @@ static OPUS_INLINE void silk_prefilt_FIX(
    P->sLF_MA_shp_Q12   = sLF_MA_shp_Q12;
    P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
 }
+#endif /* OVERRIDE_silk_prefilt_FIX */
+
--- a/silk/fixed/warped_autocorrelation_FIX.c
+++ b/silk/fixed/warped_autocorrelation_FIX.c
@ -34,6 +34,12 @@ POSSIBILITY OF SUCH DAMAGE.
 #define QC  10
 #define QS  14

+#if defined(MIPSr1_ASM)
+#include "mips/warped_autocorrelation_FIX_mipsr1.h"
+#endif
+
+
+#ifndef OVERRIDE_silk_warped_autocorrelation_FIX
 /* Autocorrelations for a warped frequency axis */
 void silk_warped_autocorrelation_FIX(
          opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */
@ -86,3 +92,4 @@ void silk_warped_autocorrelation_FIX(
    }
    silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/
 }
+#endif /* OVERRIDE_silk_warped_autocorrelation_FIX */
--- a/silk/macros.h
+++ b/silk/macros.h
@ -79,17 +79,24 @@ POSSIBILITY OF SUCH DAMAGE.
                                        (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) :    \
                                        ((((a)^0x80000000) & (b)  & 0x80000000) ? silk_int32_MAX : (a)-(b)) )

-#include "ecintrin.h"
+#if defined(MIPSr1_ASM)
+#include "mips/macros_mipsr1.h"
+#endif

+#include "ecintrin.h"
+#ifndef OVERRIDE_silk_CLZ16
 static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16)
 {
    return 32 - EC_ILOG(in16<<16|0x8000);
 }
+#endif

+#ifndef OVERRIDE_silk_CLZ32
 static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)
 {
    return in32 ? 32 - EC_ILOG(in32) : 32;
 }
+#endif

 /* Row based */
 #define matrix_ptr(Matrix_base_adr, row, column, N) \
--- a/silk/mips/NSQ_del_dec_mipsr1.h
+++ b/silk/mips/NSQ_del_dec_mipsr1.h
@ -0,0 +1,406 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef __NSQ_DEL_DEC_MIPSR1_H__
+#define __NSQ_DEL_DEC_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+
+#define OVERRIDE_silk_noise_shape_quantizer_del_dec
+static inline void silk_noise_shape_quantizer_del_dec(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
+    opus_int            signalType,             /* I    Signal type                         */
+    const opus_int32    x_Q10[],                /* I                                        */
+    opus_int8           pulses[],               /* O                                        */
+    opus_int16          xq[],                   /* O                                        */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
+    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
+    opus_int            lag,                    /* I    Pitch lag                           */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
+    opus_int32          LF_shp_Q14,             /* I                                        */
+    opus_int32          Gain_Q16,               /* I                                        */
+    opus_int            Lambda_Q10,             /* I                                        */
+    opus_int            offset_Q10,             /* I                                        */
+    opus_int            length,                 /* I    Input length                        */
+    opus_int            subfr,                  /* I    Subframe number                     */
+    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
+    opus_int            warping_Q16,            /* I                                        */
+    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
+    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
+    opus_int            decisionDelay           /* I                                        */
+)
+{
+    opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
+    opus_int32   Winner_rand_state;
+    opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
+    opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
+    opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
+    opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
+    NSQ_sample_struct  psSampleState[ MAX_DEL_DEC_STATES ][ 2 ];
+    NSQ_del_dec_struct *psDD;
+    NSQ_sample_struct  *psSS;
+    opus_int16 b_Q14_0, b_Q14_1, b_Q14_2, b_Q14_3, b_Q14_4;
+    opus_int16 a_Q12_0, a_Q12_1, a_Q12_2, a_Q12_3, a_Q12_4, a_Q12_5, a_Q12_6;
+    opus_int16 a_Q12_7, a_Q12_8, a_Q12_9, a_Q12_10, a_Q12_11, a_Q12_12, a_Q12_13;
+    opus_int16 a_Q12_14, a_Q12_15;
+
+    opus_int32 cur, prev, next;
+
+    //Intialize b_Q14 variables
+    b_Q14_0 = b_Q14[ 0 ];
+    b_Q14_1 = b_Q14[ 1 ];
+    b_Q14_2 = b_Q14[ 2 ];
+    b_Q14_3 = b_Q14[ 3 ];
+    b_Q14_4 = b_Q14[ 4 ];
+
+    //Intialize a_Q12 variables
+    a_Q12_0 = a_Q12[0];
+    a_Q12_1 = a_Q12[1];
+    a_Q12_2 = a_Q12[2];
+    a_Q12_3 = a_Q12[3];
+    a_Q12_4 = a_Q12[4];
+    a_Q12_5 = a_Q12[5];
+    a_Q12_6 = a_Q12[6];
+    a_Q12_7 = a_Q12[7];
+    a_Q12_8 = a_Q12[8];
+    a_Q12_9 = a_Q12[9];
+    a_Q12_10 = a_Q12[10];
+    a_Q12_11 = a_Q12[11];
+    a_Q12_12 = a_Q12[12];
+    a_Q12_13 = a_Q12[13];
+    a_Q12_14 = a_Q12[14];
+    a_Q12_15 = a_Q12[15];
+
+    long long temp64;
+
+    silk_assert( nStatesDelayedDecision > 0 );
+
+    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
+
+    for( i = 0; i < length; i++ ) {
+        /* Perform common calculations used in all states */
+
+        /* Long-term prediction */
+        if( signalType == TYPE_VOICED ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            temp64 = __builtin_mips_mult(pred_lag_ptr[ 0 ], b_Q14_0 );
+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -1 ], b_Q14_1 );
+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -2 ], b_Q14_2 );
+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -3 ], b_Q14_3 );
+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -4 ], b_Q14_4 );
+			temp64 += 32768;
+            LTP_pred_Q14 = __builtin_mips_extr_w(temp64, 16);
+            LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );                          /* Q13 -> Q14 */
+            pred_lag_ptr++;
+        } else {
+            LTP_pred_Q14 = 0;
+        }
+
+        /* Long-term shaping */
+        if( lag > 0 ) {
+            /* Symmetric, packed FIR coefficients */
+            n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );            /* Q12 -> Q14 */
+            shp_lag_ptr++;
+        } else {
+            n_LTP_Q14 = 0;
+        }
+
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            /* Delayed decision state */
+            psDD = &psDelDec[ k ];
+
+            /* Sample state */
+            psSS = psSampleState[ k ];
+
+            /* Generate dither */
+            psDD->Seed = silk_RAND( psDD->Seed );
+
+            /* Pointer used in short term prediction and shaping */
+            psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
+            /* Short-term prediction */
+            silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
+            temp64 = __builtin_mips_mult(psLPC_Q14[  0 ], a_Q12_0 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -1 ], a_Q12_1 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -2 ], a_Q12_2 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -3 ], a_Q12_3 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -4 ], a_Q12_4 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -5 ], a_Q12_5 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -6 ], a_Q12_6 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -7 ], a_Q12_7 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -8 ], a_Q12_8 );
+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -9 ], a_Q12_9 );
+            if( predictLPCOrder == 16 ) {
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -10 ], a_Q12_10 );
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -11 ], a_Q12_11 );
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -12 ], a_Q12_12 );
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -13 ], a_Q12_13 );
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -14 ], a_Q12_14 );
+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -15 ], a_Q12_15 );
+            }
+            temp64 += 32768;
+            LPC_pred_Q14 = __builtin_mips_extr_w(temp64, 16);
+
+            LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */
+
+            /* Noise shape feedback */
+            silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
+            /* Output of lowpass section */
+            tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
+            /* Output of allpass section */
+            tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
+            psDD->sAR2_Q14[ 0 ] = tmp2;
+
+            temp64 = __builtin_mips_mult(tmp2, AR_shp_Q13[ 0 ] );
+
+            prev = psDD->sAR2_Q14[ 1 ];
+
+            /* Loop over allpass sections */
+            for( j = 2; j < shapingLPCOrder; j += 2 ) {
+                cur = psDD->sAR2_Q14[ j ];
+                next = psDD->sAR2_Q14[ j+1 ];
+                /* Output of allpass section */
+                tmp2 = silk_SMLAWB( prev, cur - tmp1, warping_Q16 );
+                psDD->sAR2_Q14[ j - 1 ] = tmp1;
+                temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ j - 1 ] );
+                temp64 = __builtin_mips_madd( temp64, tmp2, AR_shp_Q13[ j ] );
+                /* Output of allpass section */
+                tmp1 = silk_SMLAWB( cur, next - tmp2, warping_Q16 );
+                psDD->sAR2_Q14[ j + 0 ] = tmp2;
+                prev = next;
+            }
+            psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
+            temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+            temp64 += 32768;
+            n_AR_Q14 = __builtin_mips_extr_w(temp64, 16);
+            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );                                      /* Q11 -> Q12 */
+            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );              /* Q12 */
+            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );                                      /* Q12 -> Q14 */
+
+            n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 );     /* Q12 */
+            n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );            /* Q12 */
+            n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );                                      /* Q12 -> Q14 */
+
+            /* Input minus prediction plus noise feedback                       */
+            /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */
+            tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );                                    /* Q14 */
+            tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );                               /* Q13 */
+            tmp1 = silk_SUB32( tmp2, tmp1 );                                            /* Q13 */
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );                                        /* Q10 */
+
+            r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );                                     /* residual error Q10 */
+
+            /* Flip sign depending on dither */
+            if ( psDD->Seed < 0 ) {
+                r_Q10 = -r_Q10;
+            }
+            r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+            /* Find two quantization level candidates and measure their rate-distortion */
+            q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+            q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+            if( q1_Q0 > 0 ) {
+                q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+            } else if( q1_Q0 == 0 ) {
+                q1_Q10  = offset_Q10;
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+            } else if( q1_Q0 == -1 ) {
+                q2_Q10  = offset_Q10;
+                q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
+            } else {            /* q1_Q0 < -1 */
+                q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
+            }
+            rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
+            rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
+            rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
+            rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
+
+            if( rd1_Q10 < rd2_Q10 ) {
+                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                psSS[ 0 ].Q_Q10  = q1_Q10;
+                psSS[ 1 ].Q_Q10  = q2_Q10;
+            } else {
+                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                psSS[ 0 ].Q_Q10  = q2_Q10;
+                psSS[ 1 ].Q_Q10  = q1_Q10;
+            }
+
+            /* Update states for best quantization */
+
+            /* Quantized excitation */
+            exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
+            if ( psDD->Seed < 0 ) {
+                exc_Q14 = -exc_Q14;
+            }
+
+            /* Add predictions */
+            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+            /* Update states */
+            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+            psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+            psSS[ 0 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+            psSS[ 0 ].LPC_exc_Q14  = LPC_exc_Q14;
+            psSS[ 0 ].xq_Q14       = xq_Q14;
+
+            /* Update states for second best quantization */
+
+            /* Quantized excitation */
+            exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
+            if ( psDD->Seed < 0 ) {
+                exc_Q14 = -exc_Q14;
+            }
+
+
+            /* Add predictions */
+            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+            /* Update states */
+            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+            psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+            psSS[ 1 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+            psSS[ 1 ].LPC_exc_Q14  = LPC_exc_Q14;
+            psSS[ 1 ].xq_Q14       = xq_Q14;
+        }
+
+        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;                   /* Index to newest samples              */
+        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK;       /* Index to decisionDelay old samples   */
+
+        /* Find winner */
+        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
+        Winner_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                Winner_ind = k;
+            }
+        }
+
+        /* Increase RD values of expired states */
+        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
+                psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 );
+                psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 );
+                silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
+            }
+        }
+
+        /* Find worst in first set and best in second set */
+        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;
+        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;
+        RDmax_ind = 0;
+        RDmin_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            /* find worst in first set */
+            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
+                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                RDmax_ind = k;
+            }
+            /* find best in second set */
+            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;
+                RDmin_ind = k;
+            }
+        }
+
+        /* Replace a state if best from second set outperforms worst in first set */
+        if( RDmin_Q10 < RDmax_Q10 ) {
+            silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i,
+                         ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );
+            silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
+        }
+
+        /* Write samples from winner to output and long-term filter states */
+        psDD = &psDelDec[ Winner_ind ];
+        if( subfr > 0 || i >= decisionDelay ) {
+            pulses[  i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+            xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+                silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) );
+            NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ];
+            sLTP_Q15[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q15[  last_smple_idx ];
+        }
+        NSQ->sLTP_shp_buf_idx++;
+        NSQ->sLTP_buf_idx++;
+
+        /* Update states */
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            psDD                                     = &psDelDec[ k ];
+            psSS                                     = &psSampleState[ k ][ 0 ];
+            psDD->LF_AR_Q14                          = psSS->LF_AR_Q14;
+            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
+            psDD->Xq_Q14[    *smpl_buf_idx ]         = psSS->xq_Q14;
+            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;
+            psDD->Pred_Q15[  *smpl_buf_idx ]         = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 );
+            psDD->Shape_Q14[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q14;
+            psDD->Seed                               = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );
+            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;
+            psDD->RD_Q10                             = psSS->RD_Q10;
+        }
+        delayedGain_Q10[     *smpl_buf_idx ]         = Gain_Q10;
+    }
+    /* Update LPC states */
+    for( k = 0; k < nStatesDelayedDecision; k++ ) {
+        psDD = &psDelDec[ k ];
+        silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+    }
+}
+
+#endif /* __NSQ_DEL_DEC_MIPSR1_H__ */
+
--- a/silk/mips/macros_mipsr1.h
+++ b/silk/mips/macros_mipsr1.h
@ -0,0 +1,93 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the 
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+
+#ifndef __SILK_MACROS_MIPSR1_H__
+#define __SILK_MACROS_MIPSR1_H__
+
+#define mips_clz(x) __builtin_clz(x)
+
+#undef silk_SMULWB
+static inline int silk_SMULWB(int a, int b) 
+{
+    long long ac;
+    int c;
+
+    ac = __builtin_mips_mult(a, (opus_int32)(opus_int16)b);
+    c = __builtin_mips_extr_w(ac, 16);
+
+    return c;
+}
+
+#undef silk_SMLAWB
+#define silk_SMLAWB(a32, b32, c32)       ((a32) + silk_SMULWB(b32, c32))
+
+#undef silk_SMULWW
+static inline int silk_SMULWW(int a, int b) 
+{
+    long long ac;
+    int c;
+
+    ac = __builtin_mips_mult(a, b);
+    c = __builtin_mips_extr_w(ac, 16);
+
+    return c;
+}
+
+#undef silk_SMLAWW
+static inline int silk_SMLAWW(int a, int b, int c) 
+{
+    long long ac;
+    int res;
+
+    ac = __builtin_mips_mult(b, c);
+    res = __builtin_mips_extr_w(ac, 16);
+    res += a;
+
+    return res;
+}
+
+#define OVERRIDE_silk_CLZ16
+static inline opus_int32 silk_CLZ16(opus_int16 in16)
+{
+    int re32;
+    opus_int32 in32 = (opus_int32 )in16;
+    re32 = mips_clz(in32);
+    re32-=16;
+    return re32;
+}
+
+#define OVERRIDE_silk_CLZ32
+static inline opus_int32 silk_CLZ32(opus_int32 in32)
+{
+    int re32;
+    re32 = mips_clz(in32);
+    return re32;
+}
+
+#endif /* __SILK_MACROS_MIPSR1_H__ */
+
--- a/silk/mips/sigproc_fix_mipsr1.h
+++ b/silk/mips/sigproc_fix_mipsr1.h
@ -0,0 +1,66 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_MIPSR1_H
+#define SILK_SIGPROC_FIX_MIPSR1_H
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+#undef silk_SAT16
+static inline short int silk_SAT16(int a)
+{
+    int c;
+    c = __builtin_mips_shll_s_w(a, 16);
+    c = c>>16;
+
+    return c;
+}
+
+#undef silk_LSHIFT_SAT32
+static inline int silk_LSHIFT_SAT32(int a, int shift)
+{
+    int r;
+
+    r = __builtin_mips_shll_s_w(a, shift);
+
+    return r;
+}
+
+#undef silk_RSHIFT_ROUND
+static inline int silk_RSHIFT_ROUND(int a, int shift)
+{
+    int r;
+
+    r = __builtin_mips_shra_r_w(a, shift);
+    return r;
+}
+
+#endif /* SILK_SIGPROC_FIX_MIPSR1_H */
+