Generate slightly more accurate WMOPS figures

2008-09-12 20:52:27 -04:00 · 2008-09-12 20:52:27 -04:00 · 453ccd829a
commit 453ccd829a
parent 7b0cb4ba0d
5 changed files with 96 additions and 37 deletions
--- a/libcelt/_kiss_fft_guts.h
+++ b/libcelt/_kiss_fft_guts.h
@ -92,16 +92,16 @@ struct kiss_fft_state{
 #   define S_MUL(a,b) MULT16_32_Q15(b, a)

 #   define C_MUL(m,a,b) \
-      do{ (m).r = S_MUL((a).r,(b).r) - S_MUL((a).i,(b).i); \
-          (m).i = S_MUL((a).r,(b).i) + S_MUL((a).i,(b).r); }while(0)
+      do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+          (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)

 #   define C_MULC(m,a,b) \
-      do{ (m).r = S_MUL((a).r,(b).r) + S_MUL((a).i,(b).i); \
-          (m).i = S_MUL((a).i,(b).r) - S_MUL((a).r,(b).i); }while(0)
+      do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+          (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)

 #   define C_MUL4(m,a,b) \
-      do{ (m).r = SHR(S_MUL((a).r,(b).r) - S_MUL((a).i,(b).i),2); \
-          (m).i = SHR(S_MUL((a).r,(b).i) + S_MUL((a).i,(b).r),2); }while(0)
+      do{ (m).r = SHR(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
+          (m).i = SHR(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0)

 #   define C_MULBYSCALAR( c, s ) \
      do{ (c).r =  S_MUL( (c).r , s ) ;\
--- a/libcelt/arch.h
+++ b/libcelt/arch.h
@ -56,6 +56,10 @@
 #define ABS32(x) ((x) < 0 ? (-(x)) : (x))    /**< Absolute 32-bit value.  */
 #define MIN32(a,b) ((a) < (b) ? (a) : (b))   /**< Maximum 32-bit value.   */
 #define MAX32(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 32-bit value.   */
+#define UADD32(a,b) ((a)+(b))
+#define USUB32(a,b) ((a)-(b))
+
+#define PRINT_MIPS(file)

 #ifdef FIXED_POINT

--- a/libcelt/cwrs.c
+++ b/libcelt/cwrs.c
@ -47,6 +47,7 @@
 #include <string.h>
 #include "cwrs.h"
 #include "mathops.h"
+#include "arch.h"

 #if 0
 int log2_frac(ec_uint32 val, int frac)
@ -147,7 +148,7 @@ static inline void unext32(celt_uint32_t *_ui,int _len,celt_uint32_t _ui0){
  int           j;
  /* doing a do-while would overrun the array if we had less than 2 samples */
  j=1; do {
-    ui1=_ui[j]+_ui[j-1]+_ui0;
+    ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
    _ui[j-1]=_ui0;
    _ui0=ui1;
  } while (++j<_len);
@ -174,7 +175,7 @@ static inline void uprev32(celt_uint32_t *_ui,int _n,celt_uint32_t _ui0){
  int           j;
  /* doing a do-while would overrun the array if we had less than 2 samples */
  j=1; do {
-    ui1=_ui[j]-_ui[j-1]-_ui0;
+    ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
    _ui[j-1]=_ui0;
    _ui0=ui1;
  } while (++j<_n);
--- a/libcelt/fixed_debug.h
+++ b/libcelt/fixed_debug.h
@ -37,15 +37,19 @@

 #include <stdio.h>

-//extern long long celt_mips;
-static long long celt_mips = 0;
+#ifdef CELT_C
+long long celt_mips=0;
+#else
+extern long long celt_mips;
+#endif
+
 #define MIPS_INC celt_mips++,

 #define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
-#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))

 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
-#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16))

 #define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
 #define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
@ -53,6 +57,7 @@ static long long celt_mips = 0;

 #define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768)
 #define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL)
+#define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1))

 #define SHR(a,b) SHR32(a,b)
 #define PSHR(a,b) PSHR32(a,b)
@ -80,7 +85,7 @@ static inline int NEG32(long long x)
   res = -x;
   if (!VERIFY_INT(res))
      fprintf (stderr, "NEG16: output is not int: %d\n", (int)res);
-   celt_mips++;
+   celt_mips+=2;
   return res;
 }

@ -151,7 +156,7 @@ static inline int SHR32(long long a, int shift)
   {
      fprintf (stderr, "SHR32: output is not int: %d\n", (int)res);
   }
-   celt_mips++;
+   celt_mips+=2;
   return res;
 }
 static inline int SHL32(long long a, int shift) 
@ -166,18 +171,18 @@ static inline int SHL32(long long a, int shift)
   {
      fprintf (stderr, "SHL32: output is not int: %d\n", (int)res);
   }
-   celt_mips++;
+   celt_mips+=2;
   return res;
 }

-#define PSHR16(a,shift) (SHR16(ADD16((a),((1<<((shift))>>1))),shift))
-#define PSHR32(a,shift) (SHR32(ADD32((a),((EXTEND32(1)<<((shift))>>1))),shift))
+#define PSHR16(a,shift) (celt_mips--,SHR16(ADD16((a),((1<<((shift))>>1))),shift))
+#define PSHR32(a,shift) (celt_mips--,SHR32(ADD32((a),(((celt_word32_t)(1)<<((shift))>>1))),shift))
 #define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))

 #define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
 #define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))

-#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
+#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a))))
 #define HALF32(x)  (SHR32(x,1))

 //#define SHR(a,shift) ((a) >> (shift))
@ -228,25 +233,62 @@ static inline int _ADD32(long long a, long long b, char *file, int line)
   {
      fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line);
   }
-   celt_mips++;
+   celt_mips+=2;
   return res;
 }

-static inline int SUB32(long long a, long long b) 
+#define SUB32(a, b) _SUB32(a, b, __FILE__, __LINE__)
+static inline int _SUB32(long long a, long long b, char *file, int line) 
 {
   long long res;
   if (!VERIFY_INT(a) || !VERIFY_INT(b))
   {
-      fprintf (stderr, "SUB32: inputs are not int: %d %d\n", (int)a, (int)b);
+      fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
   }
   res = a-b;
   if (!VERIFY_INT(res))
-      fprintf (stderr, "SUB32: output is not int: %d\n", (int)res);
-   celt_mips++;
+      fprintf (stderr, "SUB32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+   celt_mips+=2;
   return res;
 }

-#define ADD64(a,b) (MIPS_INC(a)+(b))
+#undef UADD32
+#define UADD32(a, b) _UADD32(a, b, __FILE__, __LINE__)
+static inline unsigned int _UADD32(unsigned long long a, unsigned long long b, char *file, int line) 
+{
+   long long res;
+   if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
+   {
+      fprintf (stderr, "UADD32: inputs are not int: %u %u in %s: line %d\n", (unsigned)a, (unsigned)b, file, line);
+   }
+   res = a+b;
+   if (!VERIFY_UINT(res))
+   {
+      fprintf (stderr, "UADD32: output is not int: %u in %s: line %d\n", (unsigned)res, file, line);
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#undef USUB32
+#define USUB32(a, b) _USUB32(a, b, __FILE__, __LINE__)
+static inline unsigned int _USUB32(unsigned long long a, unsigned long long b, char *file, int line) 
+{
+   long long res;
+   if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
+   {
+      /*fprintf (stderr, "USUB32: inputs are not int: %llu %llu in %s: line %d\n", (unsigned)a, (unsigned)b, file, line);*/
+   }
+   res = a-b;
+   if (!VERIFY_UINT(res))
+   {
+      /*fprintf (stderr, "USUB32: output is not int: %llu - %llu = %llu in %s: line %d\n", a, b, res, file, line);*/
+   }
+   celt_mips+=2;
+   return res;
+}
+
+

 /* result fits in 16 bits */
 static inline short MULT16_16_16(int a, int b) 
@ -278,10 +320,10 @@ static inline int _MULT16_16(int a, int b, char *file, int line)
   return res;
 }

-#define MAC16_16(c,a,b)     (celt_mips--,ADD32((c),MULT16_16((a),(b))))
-#define MAC16_16_Q11(c,a,b)     (EXTRACT16(ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),11)))))
-#define MAC16_16_Q13(c,a,b)     (EXTRACT16(ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),13)))))
-#define MAC16_16_P13(c,a,b)     (EXTRACT16(ADD32((c),SHR32(ADD32(4096,MULT16_16((a),(b))),13))))
+#define MAC16_16(c,a,b)     (celt_mips-=2,ADD32((c),MULT16_16((a),(b))))
+#define MAC16_16_Q11(c,a,b)     (ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),11))))
+#define MAC16_16_Q13(c,a,b)     (ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),13))))
+#define MAC16_16_P13(c,a,b)     (ADD16((c),SHR32(ADD32(4096,MULT16_16((a),(b))),13)))


 #define MULT16_32_QX(a, b, Q) _MULT16_32_QX(a, b, Q, __FILE__, __LINE__)
@ -292,12 +334,15 @@ static inline int _MULT16_32_QX(int a, long long b, int Q, char *file, int line)
   {
      fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
   }
-   if (ABS32(b)>=(EXTEND32(1)<<(15+Q)))
+   if (ABS32(b)>=((celt_word32_t)(1)<<(15+Q)))
      fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);      
   res = (((long long)a)*(long long)b) >> Q;
   if (!VERIFY_INT(res))
      fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line);
-   celt_mips+=5;
+   if (Q==15)
+      celt_mips+=3;
+   else
+      celt_mips+=4;
   return res;
 }

@ -308,11 +353,14 @@ static inline int MULT16_32_PX(int a, long long b, int Q)
   {
      fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d\n", Q, (int)a, (int)b);
   }
-   if (ABS32(b)>=(EXTEND32(1)<<(15+Q)))
+   if (ABS32(b)>=((celt_word32_t)(1)<<(15+Q)))
      fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d\n", Q, (int)a, (int)b);      
-   res = ((((long long)a)*(long long)b) + ((EXTEND32(1)<<Q)>>1))>> Q;
+   res = ((((long long)a)*(long long)b) + (((celt_word32_t)(1)<<Q)>>1))>> Q;
   if (!VERIFY_INT(res))
      fprintf (stderr, "MULT16_32_P%d: output is not int: %d*%d=%d\n", Q, (int)a, (int)b,(int)res);
+   if (Q==15)
+      celt_mips+=4;
+   else
      celt_mips+=5;
   return res;
 }
@ -333,6 +381,7 @@ static inline int SATURATE(int a, int b)
      a=b;
   if (a<-b)
      a = -b;
+   celt_mips+=3;
   return a;
 }

@ -391,7 +440,7 @@ static inline short MULT16_16_Q15(int a, int b)
   {
      fprintf (stderr, "MULT16_16_Q15: output is not short: %d\n", (int)res);
   }
-   celt_mips+=3;
+   celt_mips+=1;
   return res;
 }

@ -443,7 +492,7 @@ static inline short MULT16_16_P15(int a, int b)
   res >>= 15;
   if (!VERIFY_SHORT(res))
      fprintf (stderr, "MULT16_16_P15: output is not short: %d*%d=%d\n", a, b, (int)res);
-   celt_mips+=4;
+   celt_mips+=2;
   return res;
 }

@ -470,7 +519,7 @@ static inline int _DIV32_16(long long a, long long b, char *file, int line)
      if (res<-32768)
         res = -32768;
   }
-   celt_mips+=20;
+   celt_mips+=35;
   return res;
 }

@ -491,10 +540,13 @@ static inline int _DIV32(long long a, long long b, char *file, int line)
   res = a/b;
   if (!VERIFY_INT(res))
      fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line);
-   celt_mips+=36;
+   celt_mips+=70;
   return res;
 }
 #define PDIV32(a,b) DIV32(ADD32((a),(b)>>1),b)
 #define PDIV32_16(a,b) DIV32_16(ADD32((a),(b)>>1),b)

+#undef PRINT_MIPS
+#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %d MIPS\n", celt_mips);} while (0);
+
 #endif
--- a/libcelt/testcelt.c
+++ b/libcelt/testcelt.c
@ -161,6 +161,8 @@ int main(int argc, char *argv[])
      fwrite(out+skip, sizeof(short), (frame_size-skip)*channels, fout);
      skip = 0;
   }
+   PRINT_MIPS(stderr);
+   
   celt_encoder_destroy(enc);
   celt_decoder_destroy(dec);
   fclose(fin);