diff --git a/celt/_kiss_fft_guts.h b/celt/_kiss_fft_guts.h index 76941302..cbe75579 100644 --- a/celt/_kiss_fft_guts.h +++ b/celt/_kiss_fft_guts.h @@ -110,7 +110,7 @@ "smull %[tt], %[mi], r1, %[br]\n\t" \ "smlal %[tt], %[mi], r0, %[bi]\n\t" \ "rsb %[bi], %[bi], #0\n\t" \ - "smull r0, %[mr], r0, %[br]\n\t" \ + "smull r0, %[mr], %[br], r0\n\t" \ "mov %[tt], %[tt], lsr #15\n\t" \ "smlal r0, %[mr], r1, %[bi]\n\t" \ "orr %[mi], %[tt], %[mi], lsl #17\n\t" \ @@ -138,7 +138,7 @@ "smull %[tt], %[mi], r1, %[br]\n\t" \ "smlal %[tt], %[mi], r0, %[bi]\n\t" \ "rsb %[bi], %[bi], #0\n\t" \ - "smull r0, %[mr], r0, %[br]\n\t" \ + "smull r0, %[mr], %[br], r0\n\t" \ "mov %[tt], %[tt], lsr #17\n\t" \ "smlal r0, %[mr], r1, %[bi]\n\t" \ "orr %[mi], %[tt], %[mi], lsl #15\n\t" \ @@ -166,7 +166,7 @@ "smull %[tt], %[mr], r0, %[br]\n\t" \ "smlal %[tt], %[mr], r1, %[bi]\n\t" \ "rsb %[bi], %[bi], #0\n\t" \ - "smull r1, %[mi], r1, %[br]\n\t" \ + "smull r1, %[mi], %[br], r1\n\t" \ "mov %[tt], %[tt], lsr #15\n\t" \ "smlal r1, %[mi], r0, %[bi]\n\t" \ "orr %[mr], %[tt], %[mr], lsl #17\n\t" \ diff --git a/celt/fixed_armv4.h b/celt/fixed_armv4.h index d38880f1..73e4f434 100644 --- a/celt/fixed_armv4.h +++ b/celt/fixed_armv4.h @@ -36,8 +36,8 @@ static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b) __asm__( "#MULT16_32_Q16\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) - : "r"(b),"r"(a<<16) + : "=&r"(rd_lo), "=&r"(rd_hi) + : "%r"(b),"r"(a<<16) ); return rd_hi; } @@ -53,7 +53,7 @@ static inline opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) __asm__( "#MULT16_32_Q15\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) + : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(b), "r"(a<<16) ); /*We intentionally don't OR in the high bit of rd_lo for speed.*/ diff --git a/celt/fixed_armv5e.h b/celt/fixed_armv5e.h index 6b96150a..9d70d356 100644 --- a/celt/fixed_armv5e.h +++ b/celt/fixed_armv5e.h @@ -52,26 +52,14 @@ static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b) #undef MULT16_32_Q15 static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b) { -#if 0 - unsigned rd_lo; - int rd_hi; - __asm__( - "#MULT16_32_Q15\n\t" - "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) - : "%r"(b), "r"(a<<16) - ); - return (rd_lo>>31)|(rd_hi<<1); -#else int res; __asm__( "#MULT16_32_Q15\n\t" "smulwb %0, %1, %2\n\t" : "=r"(res) - : "%r"(b), "r"(a) + : "r"(b), "r"(a) ); return res<<1; -#endif } #define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b)) diff --git a/silk/SigProc_FIX_armv4.h b/silk/SigProc_FIX_armv4.h index ea372020..d69573e3 100644 --- a/silk/SigProc_FIX_armv4.h +++ b/silk/SigProc_FIX_armv4.h @@ -37,7 +37,7 @@ static inline opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b, __asm__( "#silk_MLA\n\t" "mla %0, %1, %2, %3\n\t" - : "=r"(res) + : "=&r"(res) : "r"(b), "r"(c), "r"(a) ); return res; diff --git a/silk/SigProc_FIX_armv5e.h b/silk/SigProc_FIX_armv5e.h index 804e2bc5..81a6324f 100644 --- a/silk/SigProc_FIX_armv5e.h +++ b/silk/SigProc_FIX_armv5e.h @@ -37,7 +37,7 @@ static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b) "#silk_SMULTT\n\t" "smultt %0, %1, %2\n\t" : "=r"(res) - : "r"(a), "r"(b) + : "%r"(a), "r"(b) ); return res; } @@ -52,7 +52,7 @@ static inline opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b, "#silk_SMLATT\n\t" "smlatt %0, %1, %2, %3\n\t" : "=r"(res) - : "r"(b), "r"(c), "r"(a) + : "%r"(b), "r"(c), "r"(a) ); return res; } diff --git a/silk/macros_armv4.h b/silk/macros_armv4.h index e5dfe69c..58df6c2e 100644 --- a/silk/macros_armv4.h +++ b/silk/macros_armv4.h @@ -37,7 +37,7 @@ static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b) __asm__( "#silk_SMULWB\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) + : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(a), "r"(b<<16) ); return rd_hi; @@ -57,7 +57,7 @@ static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b) __asm__( "#silk_SMULWT\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) + : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(a), "r"(b&~0xFFFF) ); return rd_hi; @@ -77,10 +77,10 @@ static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b) __asm__( "#silk_SMULWW\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) + : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(a), "r"(b) ); - return (rd_lo>>16)|(rd_hi<<16); + return (rd_hi<<16)+(rd_lo>>16); } #define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b)) @@ -91,12 +91,12 @@ static inline opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b, unsigned rd_lo; int rd_hi; __asm__( - "#silk_SMULWW\n\t" + "#silk_SMLAWW\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) + : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(b), "r"(c) ); - return a+((rd_lo>>16)|(rd_hi<<16)); + return a+(rd_hi<<16)+(rd_lo>>16); } #define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c)) diff --git a/silk/macros_armv5e.h b/silk/macros_armv5e.h index a86586b3..63b1e30f 100644 --- a/silk/macros_armv5e.h +++ b/silk/macros_armv5e.h @@ -203,7 +203,7 @@ static inline opus_int32 silk_CLZ32_armv5(opus_int32 in32) __asm__( "#silk_CLZ32\n\t" "clz %0, %1\n\t" - : "=&r"(res) + : "=r"(res) : "r"(in32) ); return res;