Arm: Speed up FLOAT2INT16 conversion with Neon

Using Neon for float to int conversion, and introducing platform-
specific function for converting an array of float values to int16.
Also adding appropriate unit test.

Signed-off-by: Jean-Marc Valin <jeanmarcv@google.com>
This commit is contained in:
Sandor Zsombor Vegh 2024-09-11 14:00:32 +02:00 committed by Jean-Marc Valin
parent edffe56b30
commit d4494e6ed7
No known key found for this signature in database
GPG key ID: 8D2952BBB52C646D
9 changed files with 277 additions and 4 deletions

View file

@ -1,5 +1,6 @@
/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,
Gregory Maxwell
Copyright (c) 2024 Arm Limited
Written by Jean-Marc Valin, Gregory Maxwell, Timothy B. Terriberry,
and Yunho Huh */
/*
@ -37,8 +38,10 @@
#include <stdio.h>
#include <math.h>
#include "mathops.h"
#include "bands.h"
#include "cpu_support.h"
#include "float_cast.h"
#include "mathops.h"
#ifdef FIXED_POINT
#define WORD "%d"
@ -351,8 +354,94 @@ void testilog2(void)
}
#endif
#ifndef DISABLE_FLOAT_API
void testcelt_float2int16(int use_ref_impl, int buffer_size)
{
#define MAX_BUFFER_SIZE 2080
int i, cnt;
float floatsToConvert[MAX_BUFFER_SIZE];
short results[MAX_BUFFER_SIZE] = { 0 };
float scaleInt16RangeTo01;
celt_assert(buffer_size <= MAX_BUFFER_SIZE);
scaleInt16RangeTo01 = 1.f / 32768.f;
cnt = 0;
while (cnt + 15 < buffer_size && cnt < buffer_size / 2)
{
floatsToConvert[cnt++] = 77777.0f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = 33000.0f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = 32768.0f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = 32767.4f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = 32766.6f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = .501 * scaleInt16RangeTo01;
floatsToConvert[cnt++] = .499f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = .0f;
floatsToConvert[cnt++] = -.499f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = -.501f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = -32767.6f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = -32768.4f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = -32769.0f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = -33000.0f * scaleInt16RangeTo01;
floatsToConvert[cnt++] = -77777.0f * scaleInt16RangeTo01;
celt_assert(cnt < buffer_size);
}
while (cnt < buffer_size)
{
float inInt16Range = cnt * 7 + .5;
inInt16Range += (cnt & 0x01) ? .1 : -.1;
inInt16Range *= (cnt & 0x02) ? 1 : -1;
floatsToConvert[cnt++] = inInt16Range * scaleInt16RangeTo01;
}
for (i = 0; i < MAX_BUFFER_SIZE; ++i)
{
results[i] = 42;
}
if (use_ref_impl)
{
celt_float2int16_c(floatsToConvert, results, cnt);
} else {
celt_float2int16(floatsToConvert, results, cnt, opus_select_arch());
}
for (i = 0; i < cnt; ++i)
{
const float expected = FLOAT2INT16(floatsToConvert[i]);
if (results[i] != expected)
{
fprintf (stderr, "testcelt_float2int16 failed: celt_float2int16 converted %f (index: %d) to %d (x*32768=%f, expected: %d, cnt: %d, ref: %d)\n",
floatsToConvert[i], i, (int)results[i], floatsToConvert[i] * 32768.0f, (int)expected, buffer_size, use_ref_impl);
ret = 1;
}
}
for (i = cnt; i < MAX_BUFFER_SIZE; ++i)
{
if (results[i] != 42)
{
fprintf (stderr, "testcelt_float2int16 failed: buffer overflow (cnt: %d, ref: %d)\n", buffer_size, use_ref_impl);
ret = 1;
break;
}
}
#undef MAX_BUFFER_SIZE
}
#endif
int main(void)
{
int i;
int use_ref_impl[2] = { 0, 1 };
testbitexactcos();
testbitexactlog2tan();
testdiv();
@ -364,6 +453,15 @@ int main(void)
testilog2();
testlog2_db();
testexp2_db();
#endif
#ifndef DISABLE_FLOAT_API
for (i = 0; i <= 1; ++i)
{
testcelt_float2int16(use_ref_impl[i], 1);
testcelt_float2int16(use_ref_impl[i], 32);
testcelt_float2int16(use_ref_impl[i], 127);
testcelt_float2int16(use_ref_impl[i], 1031);
}
#endif
return ret;
}