Force vectorization for DNN primitives

Avoids having to write intrinsics for simple loops
2023-11-27 16:44:11 -05:00 · 2023-11-27 16:44:11 -05:00 · 7cc30ec681
commit 7cc30ec681
parent d4506af5a9
1 changed files with 11 additions and 0 deletions
--- a/dnn/nnet_arch.h
+++ b/dnn/nnet_arch.h
@ -38,6 +38,13 @@
 #define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH)
 /* Force vectorization on for DNN code because some of the loops rely on
   compiler vectorization rather than explicitly using intrinsics. */
 #ifdef __GNUC__
 #pragma GCC push_options
 #pragma GCC optimize("tree-vectorize")
 #endif
 #define MAX_ACTIVATIONS (4096)
@ -216,4 +223,8 @@ void RTCD_SUF(compute_conv2d_)(const Conv2dLayer *conv, float *out, float *mem,
   }
 }
 #ifdef __GNUC__
 #pragma GCC pop_options
 #endif
 #endif