Arm: Speed up FLOAT2INT16 conversion with Neon

Using Neon for float to int conversion, and introducing platform- specific function for converting an array of float values to int16. Also adding appropriate unit test. Signed-off-by: Jean-Marc Valin <jeanmarcv@google.com>
2024-09-11 14:00:32 +02:00 · 2024-09-11 14:00:32 +02:00 · d4494e6ed7
commit d4494e6ed7
parent edffe56b30
9 changed files with 277 additions and 4 deletions
--- a/celt/tests/test_unit_mathops.c
+++ b/celt/tests/test_unit_mathops.c
@ -1,5 +1,6 @@
 /* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,
                           Gregory Maxwell
+   Copyright (c) 2024 Arm Limited
   Written by Jean-Marc Valin, Gregory Maxwell, Timothy B. Terriberry,
   and Yunho Huh */
 /*
@ -37,8 +38,10 @@

 #include <stdio.h>
 #include <math.h>
-#include "mathops.h"
 #include "bands.h"
+#include "cpu_support.h"
+#include "float_cast.h"
+#include "mathops.h"

 #ifdef FIXED_POINT
 #define WORD "%d"
@ -351,8 +354,94 @@ void testilog2(void)
 }
 #endif

+
+#ifndef DISABLE_FLOAT_API
+
+void testcelt_float2int16(int use_ref_impl, int buffer_size)
+{
+
+#define MAX_BUFFER_SIZE 2080
+   int i, cnt;
+   float floatsToConvert[MAX_BUFFER_SIZE];
+   short results[MAX_BUFFER_SIZE] = { 0 };
+   float scaleInt16RangeTo01;
+
+   celt_assert(buffer_size <= MAX_BUFFER_SIZE);
+
+   scaleInt16RangeTo01 = 1.f / 32768.f;
+   cnt = 0;
+
+   while (cnt + 15 < buffer_size && cnt < buffer_size / 2)
+   {
+      floatsToConvert[cnt++] = 77777.0f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = 33000.0f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = 32768.0f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = 32767.4f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = 32766.6f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = .501 * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = .499f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = .0f;
+      floatsToConvert[cnt++] = -.499f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = -.501f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = -32767.6f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = -32768.4f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = -32769.0f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = -33000.0f * scaleInt16RangeTo01;
+      floatsToConvert[cnt++] = -77777.0f * scaleInt16RangeTo01;
+
+      celt_assert(cnt < buffer_size);
+   }
+
+   while (cnt < buffer_size)
+   {
+      float inInt16Range = cnt * 7 + .5;
+      inInt16Range += (cnt & 0x01) ? .1 : -.1;
+      inInt16Range *= (cnt & 0x02) ? 1 : -1;
+      floatsToConvert[cnt++] = inInt16Range * scaleInt16RangeTo01;
+   }
+
+   for (i = 0; i < MAX_BUFFER_SIZE; ++i)
+   {
+      results[i] = 42;
+   }
+
+   if (use_ref_impl)
+   {
+      celt_float2int16_c(floatsToConvert, results, cnt);
+   } else {
+      celt_float2int16(floatsToConvert, results, cnt, opus_select_arch());
+   }
+
+   for (i = 0; i < cnt; ++i)
+   {
+      const float expected = FLOAT2INT16(floatsToConvert[i]);
+      if (results[i] != expected)
+      {
+         fprintf (stderr, "testcelt_float2int16 failed: celt_float2int16 converted %f (index: %d) to %d (x*32768=%f, expected: %d, cnt: %d, ref: %d)\n",
+               floatsToConvert[i], i, (int)results[i], floatsToConvert[i] * 32768.0f, (int)expected, buffer_size, use_ref_impl);
+         ret = 1;
+      }
+   }
+
+   for (i = cnt; i < MAX_BUFFER_SIZE; ++i)
+   {
+      if (results[i] != 42)
+      {
+         fprintf (stderr, "testcelt_float2int16 failed: buffer overflow (cnt: %d, ref: %d)\n", buffer_size, use_ref_impl);
+         ret = 1;
+         break;
+      }
+   }
+#undef MAX_BUFFER_SIZE
+}
+
+#endif
+
 int main(void)
 {
+   int i;
+   int use_ref_impl[2] = { 0, 1 };
+
   testbitexactcos();
   testbitexactlog2tan();
   testdiv();
@ -364,6 +453,15 @@ int main(void)
   testilog2();
   testlog2_db();
   testexp2_db();
+#endif
+#ifndef DISABLE_FLOAT_API
+   for (i = 0; i <= 1; ++i)
+   {
+      testcelt_float2int16(use_ref_impl[i], 1);
+      testcelt_float2int16(use_ref_impl[i], 32);
+      testcelt_float2int16(use_ref_impl[i], 127);
+      testcelt_float2int16(use_ref_impl[i], 1031);
+   }
 #endif
   return ret;
 }