Force vectorization for DNN primitives
Avoids having to write intrinsics for simple loops
This commit is contained in:
parent
d4506af5a9
commit
7cc30ec681
1 changed files with 11 additions and 0 deletions
|
@ -38,6 +38,13 @@
|
||||||
|
|
||||||
#define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH)
|
#define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH)
|
||||||
|
|
||||||
|
/* Force vectorization on for DNN code because some of the loops rely on
|
||||||
|
compiler vectorization rather than explicitly using intrinsics. */
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC optimize("tree-vectorize")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define MAX_ACTIVATIONS (4096)
|
#define MAX_ACTIVATIONS (4096)
|
||||||
|
|
||||||
|
@ -216,4 +223,8 @@ void RTCD_SUF(compute_conv2d_)(const Conv2dLayer *conv, float *out, float *mem,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue