Force vectorization for DNN primitives

Avoids having to write intrinsics for simple loops
This commit is contained in:
Jean-Marc Valin 2023-11-27 16:44:11 -05:00
parent d4506af5a9
commit 7cc30ec681
No known key found for this signature in database
GPG key ID: 531A52533318F00A

View file

@ -38,6 +38,13 @@
#define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH)
/* Force vectorization on for DNN code because some of the loops rely on
compiler vectorization rather than explicitly using intrinsics. */
#ifdef __GNUC__
#pragma GCC push_options
#pragma GCC optimize("tree-vectorize")
#endif
#define MAX_ACTIVATIONS (4096)
@ -216,4 +223,8 @@ void RTCD_SUF(compute_conv2d_)(const Conv2dLayer *conv, float *out, float *mem,
}
}
#ifdef __GNUC__
#pragma GCC pop_options
#endif
#endif