diff --git a/library/aes.c b/library/aes.c
index 0a61d1b07..d2687bcf3 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -1040,23 +1040,6 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx,
 
 #if defined(MBEDTLS_CIPHER_MODE_CBC)
 
-#if defined(__ARM_NEON) && defined(__aarch64__)
-/* Avoid using the NEON implementation of mbedtls_xor. Because of the dependency on
- * the result for the next block in CBC, and the cost of transferring that data from
- * NEON registers, it is faster to use the following on aarch64.
- * For 32-bit arm, NEON should be faster. */
-#define CBC_XOR_16(r, a, b) do {                                           \
-        mbedtls_put_unaligned_uint64(r,                                    \
-                                     mbedtls_get_unaligned_uint64(a) ^     \
-                                     mbedtls_get_unaligned_uint64(b));     \
-        mbedtls_put_unaligned_uint64(r + 8,                                \
-                                     mbedtls_get_unaligned_uint64(a + 8) ^ \
-                                     mbedtls_get_unaligned_uint64(b + 8)); \
-} while (0)
-#else
-#define CBC_XOR_16(r, a, b) mbedtls_xor(r, a, b, 16)
-#endif
-
 /*
  * AES-CBC buffer encryption/decryption
  */
@@ -1099,7 +1082,10 @@ int mbedtls_aes_crypt_cbc(mbedtls_aes_context *ctx,
             if (ret != 0) {
                 goto exit;
             }
-            CBC_XOR_16(output, output, iv);
+            /* Avoid using the NEON implementation of mbedtls_xor. Because of the dependency on
+            * the result for the next block in CBC, and the cost of transferring that data from
+            * NEON registers, NEON is slower on aarch64. */
+            mbedtls_xor_no_simd(output, output, iv, 16);
 
             memcpy(iv, temp, 16);
 
@@ -1109,7 +1095,7 @@ int mbedtls_aes_crypt_cbc(mbedtls_aes_context *ctx,
         }
     } else {
         while (length > 0) {
-            CBC_XOR_16(output, input, ivp);
+            mbedtls_xor_no_simd(output, input, ivp, 16);
 
             ret = mbedtls_aes_crypt_ecb(ctx, mode, output, output);
             if (ret != 0) {
diff --git a/library/cmac.c b/library/cmac.c
index 48f51df41..2f19d1129 100644
--- a/library/cmac.c
+++ b/library/cmac.c
@@ -237,7 +237,7 @@ int mbedtls_cipher_cmac_update(mbedtls_cipher_context_t *ctx,
                input,
                block_size - cmac_ctx->unprocessed_len);
 
-        mbedtls_xor(state, cmac_ctx->unprocessed_block, state, block_size);
+        mbedtls_xor_no_simd(state, cmac_ctx->unprocessed_block, state, block_size);
 
         if ((ret = mbedtls_cipher_update(ctx, state, block_size, state,
                                          &olen)) != 0) {
@@ -255,7 +255,7 @@ int mbedtls_cipher_cmac_update(mbedtls_cipher_context_t *ctx,
     /* Iterate across the input data in block sized chunks, excluding any
      * final partial or complete block */
     for (j = 1; j < n; j++) {
-        mbedtls_xor(state, input, state, block_size);
+        mbedtls_xor_no_simd(state, input, state, block_size);
 
         if ((ret = mbedtls_cipher_update(ctx, state, block_size, state,
                                          &olen)) != 0) {