diff --git a/library/aes.c b/library/aes.c index 0a61d1b07..d2687bcf3 100644 --- a/library/aes.c +++ b/library/aes.c @@ -1040,23 +1040,6 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx, #if defined(MBEDTLS_CIPHER_MODE_CBC) -#if defined(__ARM_NEON) && defined(__aarch64__) -/* Avoid using the NEON implementation of mbedtls_xor. Because of the dependency on - * the result for the next block in CBC, and the cost of transferring that data from - * NEON registers, it is faster to use the following on aarch64. - * For 32-bit arm, NEON should be faster. */ -#define CBC_XOR_16(r, a, b) do { \ - mbedtls_put_unaligned_uint64(r, \ - mbedtls_get_unaligned_uint64(a) ^ \ - mbedtls_get_unaligned_uint64(b)); \ - mbedtls_put_unaligned_uint64(r + 8, \ - mbedtls_get_unaligned_uint64(a + 8) ^ \ - mbedtls_get_unaligned_uint64(b + 8)); \ -} while (0) -#else -#define CBC_XOR_16(r, a, b) mbedtls_xor(r, a, b, 16) -#endif - /* * AES-CBC buffer encryption/decryption */ @@ -1099,7 +1082,10 @@ int mbedtls_aes_crypt_cbc(mbedtls_aes_context *ctx, if (ret != 0) { goto exit; } - CBC_XOR_16(output, output, iv); + /* Avoid using the NEON implementation of mbedtls_xor. Because of the dependency on + * the result for the next block in CBC, and the cost of transferring that data from + * NEON registers, NEON is slower on aarch64. */ + mbedtls_xor_no_simd(output, output, iv, 16); memcpy(iv, temp, 16); @@ -1109,7 +1095,7 @@ int mbedtls_aes_crypt_cbc(mbedtls_aes_context *ctx, } } else { while (length > 0) { - CBC_XOR_16(output, input, ivp); + mbedtls_xor_no_simd(output, input, ivp, 16); ret = mbedtls_aes_crypt_ecb(ctx, mode, output, output); if (ret != 0) { diff --git a/library/cmac.c b/library/cmac.c index 48f51df41..2f19d1129 100644 --- a/library/cmac.c +++ b/library/cmac.c @@ -237,7 +237,7 @@ int mbedtls_cipher_cmac_update(mbedtls_cipher_context_t *ctx, input, block_size - cmac_ctx->unprocessed_len); - mbedtls_xor(state, cmac_ctx->unprocessed_block, state, block_size); + mbedtls_xor_no_simd(state, cmac_ctx->unprocessed_block, state, block_size); if ((ret = mbedtls_cipher_update(ctx, state, block_size, state, &olen)) != 0) { @@ -255,7 +255,7 @@ int mbedtls_cipher_cmac_update(mbedtls_cipher_context_t *ctx, /* Iterate across the input data in block sized chunks, excluding any * final partial or complete block */ for (j = 1; j < n; j++) { - mbedtls_xor(state, input, state, block_size); + mbedtls_xor_no_simd(state, input, state, block_size); if ((ret = mbedtls_cipher_update(ctx, state, block_size, state, &olen)) != 0) {