mirror of
https://github.com/xiph/opus.git
synced 2025-05-15 16:08:30 +00:00
Adding RTCD for compute_conv2d()
This commit is contained in:
parent
91d1f7539e
commit
a93b09e241
5 changed files with 132 additions and 91 deletions
88
dnn/nnet.c
88
dnn/nnet.c
|
@ -212,91 +212,3 @@ void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, fl
|
||||||
OPUS_COPY(&mem[input_size*dilation*(ksize-1)-input_size], input, input_size);
|
OPUS_COPY(&mem[input_size*dilation*(ksize-1)-input_size], input, input_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Computes non-padded convolution for input [ ksize1 x in_channels x (len2+ksize2) ],
|
|
||||||
kernel [ out_channels x in_channels x ksize1 x ksize2 ],
|
|
||||||
storing the output as [ out_channels x len2 ].
|
|
||||||
We assume that the output dimension along the ksize1 axis is 1,
|
|
||||||
i.e. processing one frame at a time. */
|
|
||||||
static void conv2d_float(float *out, const float *weights, int in_channels, int out_channels, int ktime, int kheight, const float *in, int height, int hstride)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
int in_stride;
|
|
||||||
in_stride = height+kheight-1;
|
|
||||||
for (i=0;i<out_channels;i++) {
|
|
||||||
int m;
|
|
||||||
OPUS_CLEAR(&out[i*hstride], height);
|
|
||||||
for (m=0;m<in_channels;m++) {
|
|
||||||
int t;
|
|
||||||
for (t=0;t<ktime;t++) {
|
|
||||||
int h;
|
|
||||||
for (h=0;h<kheight;h++) {
|
|
||||||
int j;
|
|
||||||
for (j=0;j<height;j++) {
|
|
||||||
out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + t*kheight + h] *
|
|
||||||
in[t*in_channels*in_stride + m*in_stride + j + h];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void conv2d_3x3_float(float *out, const float *weights, int in_channels, int out_channels, const float *in, int height, int hstride)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
int in_stride;
|
|
||||||
int kheight, ktime;
|
|
||||||
kheight = ktime = 3;
|
|
||||||
in_stride = height+kheight-1;
|
|
||||||
for (i=0;i<out_channels;i++) {
|
|
||||||
int m;
|
|
||||||
OPUS_CLEAR(&out[i*hstride], height);
|
|
||||||
for (m=0;m<in_channels;m++) {
|
|
||||||
int j;
|
|
||||||
for (j=0;j<height;j++) {
|
|
||||||
/* Unrolled version of previous function -- compiler will figure out the indexing simplifications. */
|
|
||||||
out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 0]*in[0*in_channels*in_stride + m*in_stride + j + 0]
|
|
||||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 1]*in[0*in_channels*in_stride + m*in_stride + j + 1]
|
|
||||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 2]*in[0*in_channels*in_stride + m*in_stride + j + 2]
|
|
||||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 0]*in[1*in_channels*in_stride + m*in_stride + j + 0]
|
|
||||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 1]*in[1*in_channels*in_stride + m*in_stride + j + 1]
|
|
||||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 2]*in[1*in_channels*in_stride + m*in_stride + j + 2]
|
|
||||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 0]*in[2*in_channels*in_stride + m*in_stride + j + 0]
|
|
||||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 1]*in[2*in_channels*in_stride + m*in_stride + j + 1]
|
|
||||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 2]*in[2*in_channels*in_stride + m*in_stride + j + 2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MAX_CONV2D_INPUTS 8192
|
|
||||||
|
|
||||||
void compute_conv2d(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation, int arch)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
const float *bias;
|
|
||||||
float in_buf[MAX_CONV2D_INPUTS];
|
|
||||||
int time_stride;
|
|
||||||
celt_assert(in != out);
|
|
||||||
time_stride = conv->in_channels*(height+conv->kheight-1);
|
|
||||||
celt_assert(conv->ktime*time_stride <= MAX_CONV2D_INPUTS);
|
|
||||||
OPUS_COPY(in_buf, mem, (conv->ktime-1)*time_stride);
|
|
||||||
OPUS_COPY(&in_buf[(conv->ktime-1)*time_stride], in, time_stride);
|
|
||||||
OPUS_COPY(mem, &in_buf[time_stride], (conv->ktime-1)*time_stride);
|
|
||||||
bias = conv->bias;
|
|
||||||
if (conv->kheight == 3 && conv->ktime == 3)
|
|
||||||
conv2d_3x3_float(out, conv->float_weights, conv->in_channels, conv->out_channels, in_buf, height, hstride);
|
|
||||||
else
|
|
||||||
conv2d_float(out, conv->float_weights, conv->in_channels, conv->out_channels, conv->ktime, conv->kheight, in_buf, height, hstride);
|
|
||||||
if (bias != NULL) {
|
|
||||||
for (i=0;i<conv->out_channels;i++) {
|
|
||||||
int j;
|
|
||||||
for (j=0;j<height;j++) out[i*hstride+j] += bias[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (i=0;i<conv->out_channels;i++) {
|
|
||||||
compute_activation(&out[i*hstride], &out[i*hstride], height, activation, arch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -185,12 +185,11 @@ int gru_init(GRULayer *layer, const WeightArray *arrays,
|
||||||
int activation,
|
int activation,
|
||||||
int reset_after);
|
int reset_after);
|
||||||
|
|
||||||
void compute_conv2d(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation, int arch);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void compute_linear_c(const LinearLayer *linear, float *out, const float *in);
|
void compute_linear_c(const LinearLayer *linear, float *out, const float *in);
|
||||||
void compute_activation_c(float *output, const float *input, int N, int activation);
|
void compute_activation_c(float *output, const float *input, int N, int activation);
|
||||||
|
void compute_conv2d_c(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||||
|
|
||||||
|
|
||||||
#if defined(OPUS_X86_MAY_HAVE_SSE2)
|
#if defined(OPUS_X86_MAY_HAVE_SSE2)
|
||||||
#include "x86/dnn_x86.h"
|
#include "x86/dnn_x86.h"
|
||||||
|
@ -204,6 +203,9 @@ void compute_activation_c(float *output, const float *input, int N, int activati
|
||||||
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
|
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef OVERRIDE_COMPUTE_CONV2D
|
||||||
|
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_c(conv, out, mem, in, height, hstride, activation))
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64__) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
|
#if defined(__x86_64__) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
|
|
|
@ -127,5 +127,93 @@ void RTCD_SUF(compute_linear_) (const LinearLayer *linear, float *out, const flo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Computes non-padded convolution for input [ ksize1 x in_channels x (len2+ksize2) ],
|
||||||
|
kernel [ out_channels x in_channels x ksize1 x ksize2 ],
|
||||||
|
storing the output as [ out_channels x len2 ].
|
||||||
|
We assume that the output dimension along the ksize1 axis is 1,
|
||||||
|
i.e. processing one frame at a time. */
|
||||||
|
static void conv2d_float(float *out, const float *weights, int in_channels, int out_channels, int ktime, int kheight, const float *in, int height, int hstride)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int in_stride;
|
||||||
|
in_stride = height+kheight-1;
|
||||||
|
for (i=0;i<out_channels;i++) {
|
||||||
|
int m;
|
||||||
|
OPUS_CLEAR(&out[i*hstride], height);
|
||||||
|
for (m=0;m<in_channels;m++) {
|
||||||
|
int t;
|
||||||
|
for (t=0;t<ktime;t++) {
|
||||||
|
int h;
|
||||||
|
for (h=0;h<kheight;h++) {
|
||||||
|
int j;
|
||||||
|
for (j=0;j<height;j++) {
|
||||||
|
out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + t*kheight + h] *
|
||||||
|
in[t*in_channels*in_stride + m*in_stride + j + h];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* There's no intrinsics in this function (or the one above) because the gcc (and hopefully other compiler) auto-vectorizer is smart enough to
|
||||||
|
produce the right code by itself based on the compile flags. */
|
||||||
|
static void conv2d_3x3_float(float *out, const float *weights, int in_channels, int out_channels, const float *in, int height, int hstride)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int in_stride;
|
||||||
|
int kheight, ktime;
|
||||||
|
kheight = ktime = 3;
|
||||||
|
in_stride = height+kheight-1;
|
||||||
|
for (i=0;i<out_channels;i++) {
|
||||||
|
int m;
|
||||||
|
OPUS_CLEAR(&out[i*hstride], height);
|
||||||
|
for (m=0;m<in_channels;m++) {
|
||||||
|
int j;
|
||||||
|
for (j=0;j<height;j++) {
|
||||||
|
/* Unrolled version of previous function -- compiler will figure out the indexing simplifications. */
|
||||||
|
out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 0]*in[0*in_channels*in_stride + m*in_stride + j + 0]
|
||||||
|
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 1]*in[0*in_channels*in_stride + m*in_stride + j + 1]
|
||||||
|
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 2]*in[0*in_channels*in_stride + m*in_stride + j + 2]
|
||||||
|
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 0]*in[1*in_channels*in_stride + m*in_stride + j + 0]
|
||||||
|
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 1]*in[1*in_channels*in_stride + m*in_stride + j + 1]
|
||||||
|
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 2]*in[1*in_channels*in_stride + m*in_stride + j + 2]
|
||||||
|
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 0]*in[2*in_channels*in_stride + m*in_stride + j + 0]
|
||||||
|
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 1]*in[2*in_channels*in_stride + m*in_stride + j + 1]
|
||||||
|
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 2]*in[2*in_channels*in_stride + m*in_stride + j + 2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MAX_CONV2D_INPUTS 8192
|
||||||
|
|
||||||
|
void RTCD_SUF(compute_conv2d_)(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
const float *bias;
|
||||||
|
float in_buf[MAX_CONV2D_INPUTS];
|
||||||
|
int time_stride;
|
||||||
|
celt_assert(in != out);
|
||||||
|
time_stride = conv->in_channels*(height+conv->kheight-1);
|
||||||
|
celt_assert(conv->ktime*time_stride <= MAX_CONV2D_INPUTS);
|
||||||
|
OPUS_COPY(in_buf, mem, (conv->ktime-1)*time_stride);
|
||||||
|
OPUS_COPY(&in_buf[(conv->ktime-1)*time_stride], in, time_stride);
|
||||||
|
OPUS_COPY(mem, &in_buf[time_stride], (conv->ktime-1)*time_stride);
|
||||||
|
bias = conv->bias;
|
||||||
|
if (conv->kheight == 3 && conv->ktime == 3)
|
||||||
|
conv2d_3x3_float(out, conv->float_weights, conv->in_channels, conv->out_channels, in_buf, height, hstride);
|
||||||
|
else
|
||||||
|
conv2d_float(out, conv->float_weights, conv->in_channels, conv->out_channels, conv->ktime, conv->kheight, in_buf, height, hstride);
|
||||||
|
if (bias != NULL) {
|
||||||
|
for (i=0;i<conv->out_channels;i++) {
|
||||||
|
int j;
|
||||||
|
for (j=0;j<height;j++) out[i*hstride+j] += bias[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i=0;i<conv->out_channels;i++) {
|
||||||
|
RTCD_SUF(compute_activation_)(&out[i*hstride], &out[i*hstride], height, activation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -34,16 +34,19 @@
|
||||||
#if defined(OPUS_X86_MAY_HAVE_SSE2)
|
#if defined(OPUS_X86_MAY_HAVE_SSE2)
|
||||||
void compute_linear_sse2(const LinearLayer *linear, float *out, const float *in);
|
void compute_linear_sse2(const LinearLayer *linear, float *out, const float *in);
|
||||||
void compute_activation_sse2(float *output, const float *input, int N, int activation);
|
void compute_activation_sse2(float *output, const float *input, int N, int activation);
|
||||||
|
void compute_conv2d_sse2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
||||||
void compute_linear_sse4_1(const LinearLayer *linear, float *out, const float *in);
|
void compute_linear_sse4_1(const LinearLayer *linear, float *out, const float *in);
|
||||||
void compute_activation_sse4_1(float *output, const float *input, int N, int activation);
|
void compute_activation_sse4_1(float *output, const float *input, int N, int activation);
|
||||||
|
void compute_conv2d_sse4_1(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(OPUS_X86_MAY_HAVE_AVX2)
|
#if defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||||
void compute_linear_avx2(const LinearLayer *linear, float *out, const float *in);
|
void compute_linear_avx2(const LinearLayer *linear, float *out, const float *in);
|
||||||
void compute_activation_avx2(float *output, const float *input, int N, int activation);
|
void compute_activation_avx2(float *output, const float *input, int N, int activation);
|
||||||
|
void compute_conv2d_avx2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -53,6 +56,8 @@ void compute_activation_avx2(float *output, const float *input, int N, int activ
|
||||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_avx2(linear, out, in))
|
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_avx2(linear, out, in))
|
||||||
#define OVERRIDE_COMPUTE_ACTIVATION
|
#define OVERRIDE_COMPUTE_ACTIVATION
|
||||||
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_avx2(output, input, N, activation))
|
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_avx2(output, input, N, activation))
|
||||||
|
#define OVERRIDE_COMPUTE_CONV2D
|
||||||
|
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_avx2(conv, out, mem, in, height, hstride, activation))
|
||||||
|
|
||||||
#elif defined(OPUS_X86_PRESUME_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
|
#elif defined(OPUS_X86_PRESUME_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||||
|
|
||||||
|
@ -60,6 +65,8 @@ void compute_activation_avx2(float *output, const float *input, int N, int activ
|
||||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse4_1(linear, out, in))
|
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse4_1(linear, out, in))
|
||||||
#define OVERRIDE_COMPUTE_ACTIVATION
|
#define OVERRIDE_COMPUTE_ACTIVATION
|
||||||
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse4_1(output, input, N, activation))
|
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse4_1(output, input, N, activation))
|
||||||
|
#define OVERRIDE_COMPUTE_CONV2D
|
||||||
|
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse4_1(conv, out, mem, in, height, hstride, activation))
|
||||||
|
|
||||||
#elif defined(OPUS_X86_PRESUME_SSE2) && !defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
#elif defined(OPUS_X86_PRESUME_SSE2) && !defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
||||||
|
|
||||||
|
@ -67,6 +74,8 @@ void compute_activation_avx2(float *output, const float *input, int N, int activ
|
||||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse2(linear, out, in))
|
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse2(linear, out, in))
|
||||||
#define OVERRIDE_COMPUTE_ACTIVATION
|
#define OVERRIDE_COMPUTE_ACTIVATION
|
||||||
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse2(output, input, N, activation))
|
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse2(output, input, N, activation))
|
||||||
|
#define OVERRIDE_COMPUTE_CONV2D
|
||||||
|
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse2(conv, out, mem, in, height, hstride, activation))
|
||||||
|
|
||||||
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_X86_MAY_HAVE_AVX2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2))
|
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_X86_MAY_HAVE_AVX2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2))
|
||||||
|
|
||||||
|
@ -91,6 +100,20 @@ extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
|
||||||
((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
|
((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
|
||||||
|
|
||||||
|
|
||||||
|
extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
|
||||||
|
const Conv2dLayer *conv,
|
||||||
|
float *out,
|
||||||
|
float *mem,
|
||||||
|
const float *in,
|
||||||
|
int height,
|
||||||
|
int hstride,
|
||||||
|
int activation
|
||||||
|
);
|
||||||
|
#define OVERRIDE_COMPUTE_CONV2D
|
||||||
|
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
|
||||||
|
((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -61,6 +61,22 @@ void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
|
||||||
MAY_HAVE_AVX2(compute_activation) /* avx */
|
MAY_HAVE_AVX2(compute_activation) /* avx */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
|
||||||
|
const Conv2dLayer *conv,
|
||||||
|
float *out,
|
||||||
|
float *mem,
|
||||||
|
const float *in,
|
||||||
|
int height,
|
||||||
|
int hstride,
|
||||||
|
int activation
|
||||||
|
) = {
|
||||||
|
compute_conv2d_c, /* non-sse */
|
||||||
|
compute_conv2d_c,
|
||||||
|
MAY_HAVE_SSE2(compute_conv2d),
|
||||||
|
MAY_HAVE_SSE4_1(compute_conv2d), /* sse4.1 */
|
||||||
|
MAY_HAVE_AVX2(compute_conv2d) /* avx */
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue