Limit compiler hint to compilers that are known to benefit from it
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
This commit is contained in:
parent
075f8797ac
commit
00b530e395
1 changed files with 33 additions and 17 deletions
|
@ -199,32 +199,42 @@ static inline void mbedtls_xor(unsigned char *r,
|
||||||
uint8x16_t x = veorq_u8(v1, v2);
|
uint8x16_t x = veorq_u8(v1, v2);
|
||||||
vst1q_u8(r + i, x);
|
vst1q_u8(r + i, x);
|
||||||
}
|
}
|
||||||
|
#if defined(__IAR_SYSTEMS_ICC__)
|
||||||
/* This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case
|
/* This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case
|
||||||
* where n is a constant multiple of 16.
|
* where n is a constant multiple of 16.
|
||||||
* It makes no difference for others (e.g. recent gcc and clang) if n is a compile-time
|
* For other compilers (e.g. recent gcc and clang) it makes no difference if n is a compile-time
|
||||||
* constant, and very little difference if n is not a compile-time constant. */
|
* constant, and is a very small perf regression if n is not a compile-time constant. */
|
||||||
if (n % 16 != 0)
|
if (n % 16 == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#elif defined(MBEDTLS_ARCH_IS_X64) || defined(MBEDTLS_ARCH_IS_ARM64)
|
#elif defined(MBEDTLS_ARCH_IS_X64) || defined(MBEDTLS_ARCH_IS_ARM64)
|
||||||
/* This codepath probably only makes sense on architectures with 64-bit registers */
|
/* This codepath probably only makes sense on architectures with 64-bit registers */
|
||||||
for (; (i + 8) <= n; i += 8) {
|
for (; (i + 8) <= n; i += 8) {
|
||||||
uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
|
uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
|
||||||
mbedtls_put_unaligned_uint64(r + i, x);
|
mbedtls_put_unaligned_uint64(r + i, x);
|
||||||
}
|
}
|
||||||
if (n % 8 != 0)
|
#if defined(__IAR_SYSTEMS_ICC__)
|
||||||
|
if (n % 8 == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
for (; (i + 4) <= n; i += 4) {
|
for (; (i + 4) <= n; i += 4) {
|
||||||
uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
|
uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
|
||||||
mbedtls_put_unaligned_uint32(r + i, x);
|
mbedtls_put_unaligned_uint32(r + i, x);
|
||||||
}
|
}
|
||||||
if (n % 4 != 0)
|
#if defined(__IAR_SYSTEMS_ICC__)
|
||||||
|
if (n % 4 == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
{
|
|
||||||
for (; i < n; i++) {
|
for (; i < n; i++) {
|
||||||
r[i] = a[i] ^ b[i];
|
r[i] = a[i] ^ b[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* Always inline mbedtls_xor_no_simd() as we see significant perf regressions when it does not get
|
/* Always inline mbedtls_xor_no_simd() as we see significant perf regressions when it does not get
|
||||||
* inlined (e.g., observed about 3x perf difference in gcm_mult_largetable with gcc 7 - 12) */
|
* inlined (e.g., observed about 3x perf difference in gcm_mult_largetable with gcc 7 - 12) */
|
||||||
|
@ -268,25 +278,31 @@ static inline void mbedtls_xor_no_simd(unsigned char *r,
|
||||||
uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
|
uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
|
||||||
mbedtls_put_unaligned_uint64(r + i, x);
|
mbedtls_put_unaligned_uint64(r + i, x);
|
||||||
}
|
}
|
||||||
|
#if defined(__IAR_SYSTEMS_ICC__)
|
||||||
/* This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case
|
/* This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case
|
||||||
* where n is a constant multiple of 16.
|
* where n is a constant multiple of 16.
|
||||||
* It makes no difference for others (e.g. recent gcc and clang) if n is a compile-time
|
* For other compilers (e.g. recent gcc and clang) it makes no difference if n is a compile-time
|
||||||
* constant, and very little difference if n is not a compile-time constant. */
|
* constant, and is a very small perf regression if n is not a compile-time constant. */
|
||||||
if (n % 8 != 0)
|
if (n % 8 == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
for (; (i + 4) <= n; i += 4) {
|
for (; (i + 4) <= n; i += 4) {
|
||||||
uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
|
uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
|
||||||
mbedtls_put_unaligned_uint32(r + i, x);
|
mbedtls_put_unaligned_uint32(r + i, x);
|
||||||
}
|
}
|
||||||
if (n % 4 != 0)
|
#if defined(__IAR_SYSTEMS_ICC__)
|
||||||
|
if (n % 4 == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
{
|
|
||||||
for (; i < n; i++) {
|
for (; i < n; i++) {
|
||||||
r[i] = a[i] ^ b[i];
|
r[i] = a[i] ^ b[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* Fix MSVC C99 compatible issue
|
/* Fix MSVC C99 compatible issue
|
||||||
* MSVC support __func__ from visual studio 2015( 1900 )
|
* MSVC support __func__ from visual studio 2015( 1900 )
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue