From c3d8041fe7d5dd189ab0bd7bf96d8261b062a238 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 15:01:39 +0000
Subject: [PATCH 01/56] Introduce mbedtls_xor

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/library/common.h b/library/common.h
index a630fcc45..d1af48eb2 100644
--- a/library/common.h
+++ b/library/common.h
@@ -26,6 +26,7 @@
 #include "mbedtls/build_info.h"
 
 #include <stdint.h>
+#include <stddef.h>
 
 /** Helper to define a function as static except when building invasive tests.
  *
@@ -390,6 +391,32 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
 }
 #endif
 
+/**
+ * Perform a fast block XOR operation, such that
+ * r[i] = a[i] ^ b[i] where 0 <= i < n
+ *
+ * \param   r Pointer to result (buffer of at least \p n bytes). \p r
+ *            may be equal to either \p a or \p b, but behaviour when
+ *            it overlaps in other ways is undefined.
+ * \param   a Pointer to input (buffer of at least \p n bytes)
+ * \param   b Pointer to input (buffer of at least \p n bytes)
+ * \param   n Number of bytes to process.
+ */
+static inline void mbedtls_xor( unsigned char* r, unsigned char const *a, unsigned char const *b, size_t n )
+{
+    uint32_t *a32 = (uint32_t*)a;
+    uint32_t *b32 = (uint32_t*)b;
+    uint32_t *r32 = (uint32_t*)r;
+    for ( size_t i = 0; i < (n >> 2); i++ )
+    {
+        r32[i] = a32[i] ^ b32[i];
+    }
+    for ( size_t i = n - (n % 4) ; i < n; i++ )
+    {
+        r[i] = a[i] ^ b[i];
+    }
+}
+
 /* Fix MSVC C99 compatible issue
  *      MSVC support __func__ from visual studio 2015( 1900 )
  *      Use MSVC predefine macro to avoid name check fail.

From 4413b6690f00621313cee70601e68be76ddebd13 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 17:31:50 +0000
Subject: [PATCH 02/56] Add tests for mbedtls_xor

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 tests/suites/test_suite_common.data     | 20 +++++++
 tests/suites/test_suite_common.function | 76 +++++++++++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 tests/suites/test_suite_common.data
 create mode 100644 tests/suites/test_suite_common.function

diff --git a/tests/suites/test_suite_common.data b/tests/suites/test_suite_common.data
new file mode 100644
index 000000000..500852d54
--- /dev/null
+++ b/tests/suites/test_suite_common.data
@@ -0,0 +1,20 @@
+Block xor, length 0
+mbedtls_xor:0
+
+Block xor, length 1
+mbedtls_xor:1
+
+Block xor, length 3
+mbedtls_xor:3
+
+Block xor, length 4
+mbedtls_xor:4
+
+Block xor, length 7
+mbedtls_xor:7
+
+Block xor, length 8
+mbedtls_xor:8
+
+Block xor, length 16
+mbedtls_xor:16
diff --git a/tests/suites/test_suite_common.function b/tests/suites/test_suite_common.function
new file mode 100644
index 000000000..17eae27c4
--- /dev/null
+++ b/tests/suites/test_suite_common.function
@@ -0,0 +1,76 @@
+/* BEGIN_HEADER */
+#include "../library/common.h"
+
+void fill_arrays( unsigned char *a, unsigned char *b, unsigned char *r1, unsigned char *r2, size_t n )
+{
+    for ( size_t i = 0; i < n; i++ )
+    {
+        a[i]  = (unsigned char) i * 3;
+        b[i]  = (unsigned char) i * 3 + 1;
+        r1[i] = (unsigned char) i * 3 + 2;
+        r2[i] = r1[i];
+    }
+}
+/* END_HEADER */
+
+/* BEGIN_CASE */
+void mbedtls_xor( int len )
+{
+    size_t n = (size_t) len;
+    unsigned char *a = NULL, *b = NULL, *r1 = NULL, *r2 = NULL;
+    ASSERT_ALLOC( a, n );
+    ASSERT_ALLOC( b, n );
+    ASSERT_ALLOC( r1, n );
+    ASSERT_ALLOC( r2, n );
+
+    /* Test non-overlapping */
+    fill_arrays( a, b, r1, r2, n );
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r1[i] = a[i] ^ b[i];
+    }
+    mbedtls_xor( r2, a, b, n );
+    ASSERT_COMPARE( r1, n, r2, n );
+
+    /* Test r == a */
+    fill_arrays( a, b, r1, r2, n );
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r1[i] = r1[i] ^ b[i];
+    }
+    mbedtls_xor( r2, r2, b, n );
+    ASSERT_COMPARE( r1, n, r2, n );
+
+    /* Test r == b */
+    fill_arrays( a, b, r1, r2, n );
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r1[i] = a[i] ^ r1[i];
+    }
+    mbedtls_xor( r2, a, r2, n );
+    ASSERT_COMPARE( r1, n, r2, n );
+
+    /* Test a == b */
+    fill_arrays( a, b, r1, r2, n );
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r1[i] = a[i] ^ a[i];
+    }
+    mbedtls_xor( r2, a, a, n );
+    ASSERT_COMPARE( r1, n, r2, n );
+
+    /* Test a == b == r */
+    fill_arrays( a, b, r1, r2, n );
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r1[i] = r1[i] ^ r1[i];
+    }
+    mbedtls_xor( r2, r2, r2, n );
+    ASSERT_COMPARE( r1, n, r2, n );
+exit:
+    if ( a != NULL ) mbedtls_free( a );
+    if ( a != NULL ) mbedtls_free( b );
+    if ( r1 != NULL ) mbedtls_free( r1 );
+    if ( r2 != NULL ) mbedtls_free( r2 );
+}
+/* END_CASE */

From a8cf607458dfda284ceaabdf033e5a7a99a45807 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 15:02:54 +0000
Subject: [PATCH 03/56] Use mbedtls_xor in AES

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/aes.c | 31 +++++++++++--------------------
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/library/aes.c b/library/aes.c
index 7d035246f..4f7c88859 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -978,7 +978,6 @@ int mbedtls_aes_crypt_cbc( mbedtls_aes_context *ctx,
                     const unsigned char *input,
                     unsigned char *output )
 {
-    int i;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char temp[16];
 
@@ -1009,8 +1008,7 @@ int mbedtls_aes_crypt_cbc( mbedtls_aes_context *ctx,
             if( ret != 0 )
                 goto exit;
 
-            for( i = 0; i < 16; i++ )
-                output[i] = (unsigned char)( output[i] ^ iv[i] );
+            mbedtls_xor( output, output, iv, 16 );
 
             memcpy( iv, temp, 16 );
 
@@ -1023,8 +1021,7 @@ int mbedtls_aes_crypt_cbc( mbedtls_aes_context *ctx,
     {
         while( length > 0 )
         {
-            for( i = 0; i < 16; i++ )
-                output[i] = (unsigned char)( input[i] ^ iv[i] );
+            mbedtls_xor( output, input, iv, 16 );
 
             ret = mbedtls_aes_crypt_ecb( ctx, mode, output, output );
             if( ret != 0 )
@@ -1106,8 +1103,6 @@ int mbedtls_aes_crypt_xts( mbedtls_aes_xts_context *ctx,
 
     while( blocks-- )
     {
-        size_t i;
-
         if( leftover && ( mode == MBEDTLS_AES_DECRYPT ) && blocks == 0 )
         {
             /* We are on the last block in a decrypt operation that has
@@ -1119,15 +1114,13 @@ int mbedtls_aes_crypt_xts( mbedtls_aes_xts_context *ctx,
             mbedtls_gf128mul_x_ble( tweak, tweak );
         }
 
-        for( i = 0; i < 16; i++ )
-            tmp[i] = input[i] ^ tweak[i];
+        mbedtls_xor( tmp, input, tweak, 16 );
 
         ret = mbedtls_aes_crypt_ecb( &ctx->crypt, mode, tmp, tmp );
         if( ret != 0 )
             return( ret );
 
-        for( i = 0; i < 16; i++ )
-            output[i] = tmp[i] ^ tweak[i];
+        mbedtls_xor( output, tmp, tweak, 16 );
 
         /* Update the tweak for the next block. */
         mbedtls_gf128mul_x_ble( tweak, tweak );
@@ -1147,20 +1140,19 @@ int mbedtls_aes_crypt_xts( mbedtls_aes_xts_context *ctx,
         size_t i;
         unsigned char *prev_output = output - 16;
 
-        /* Copy ciphertext bytes from the previous block to our output for each
-         * byte of ciphertext we won't steal. At the same time, copy the
-         * remainder of the input for this final round (since the loop bounds
-         * are the same). */
+        /* Copy the remainder of the input for this final round. */
         for( i = 0; i < leftover; i++ )
         {
             output[i] = prev_output[i];
-            tmp[i] = input[i] ^ t[i];
         }
 
+        /* Copy ciphertext bytes from the previous block to our output for each
+         * byte of ciphertext we won't steal. */
+        mbedtls_xor( tmp, input, t, leftover );
+
         /* Copy ciphertext bytes from the previous block for input in this
          * round. */
-        for( ; i < 16; i++ )
-            tmp[i] = prev_output[i] ^ t[i];
+        mbedtls_xor( tmp + i, prev_output + i, t + i, 16 - i );
 
         ret = mbedtls_aes_crypt_ecb( &ctx->crypt, mode, tmp, tmp );
         if( ret != 0 )
@@ -1168,8 +1160,7 @@ int mbedtls_aes_crypt_xts( mbedtls_aes_xts_context *ctx,
 
         /* Write the result back to the previous block, overriding the previous
          * output we copied. */
-        for( i = 0; i < 16; i++ )
-            prev_output[i] = tmp[i] ^ t[i];
+        mbedtls_xor( prev_output, tmp, t, 16 );
     }
 
     return( 0 );

From 7bb6b84b293d7c621dc7a3f15bc1b13976bcf912 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 16:18:55 +0000
Subject: [PATCH 04/56] Use mbedtls_xor in ARIA

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/aria.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/library/aria.c b/library/aria.c
index 924f95283..a1876b776 100644
--- a/library/aria.c
+++ b/library/aria.c
@@ -588,7 +588,6 @@ int mbedtls_aria_crypt_cbc( mbedtls_aria_context *ctx,
                             const unsigned char *input,
                             unsigned char *output )
 {
-    int i;
     unsigned char temp[MBEDTLS_ARIA_BLOCKSIZE];
 
     ARIA_VALIDATE_RET( ctx != NULL );
@@ -608,8 +607,7 @@ int mbedtls_aria_crypt_cbc( mbedtls_aria_context *ctx,
             memcpy( temp, input, MBEDTLS_ARIA_BLOCKSIZE );
             mbedtls_aria_crypt_ecb( ctx, input, output );
 
-            for( i = 0; i < MBEDTLS_ARIA_BLOCKSIZE; i++ )
-                output[i] = (unsigned char)( output[i] ^ iv[i] );
+            mbedtls_xor( output, output, iv, MBEDTLS_ARIA_BLOCKSIZE );
 
             memcpy( iv, temp, MBEDTLS_ARIA_BLOCKSIZE );
 
@@ -622,8 +620,7 @@ int mbedtls_aria_crypt_cbc( mbedtls_aria_context *ctx,
     {
         while( length > 0 )
         {
-            for( i = 0; i < MBEDTLS_ARIA_BLOCKSIZE; i++ )
-                output[i] = (unsigned char)( input[i] ^ iv[i] );
+            mbedtls_xor( output, input, iv, MBEDTLS_ARIA_BLOCKSIZE );
 
             mbedtls_aria_crypt_ecb( ctx, output, output );
             memcpy( iv, output, MBEDTLS_ARIA_BLOCKSIZE );

From d23399eb699f2533f62779ac629b943c99a4c479 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 16:23:11 +0000
Subject: [PATCH 05/56] Use mbedtls_xor in Camellia

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/camellia.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/library/camellia.c b/library/camellia.c
index 5dd6c5615..6e781c72b 100644
--- a/library/camellia.c
+++ b/library/camellia.c
@@ -526,7 +526,6 @@ int mbedtls_camellia_crypt_cbc( mbedtls_camellia_context *ctx,
                                 const unsigned char *input,
                                 unsigned char *output )
 {
-    int i;
     unsigned char temp[16];
     if( mode != MBEDTLS_CAMELLIA_ENCRYPT && mode != MBEDTLS_CAMELLIA_DECRYPT )
         return MBEDTLS_ERR_CAMELLIA_BAD_INPUT_DATA;
@@ -541,8 +540,7 @@ int mbedtls_camellia_crypt_cbc( mbedtls_camellia_context *ctx,
             memcpy( temp, input, 16 );
             mbedtls_camellia_crypt_ecb( ctx, mode, input, output );
 
-            for( i = 0; i < 16; i++ )
-                output[i] = (unsigned char)( output[i] ^ iv[i] );
+            mbedtls_xor( output, output, iv, 16 );
 
             memcpy( iv, temp, 16 );
 
@@ -555,8 +553,7 @@ int mbedtls_camellia_crypt_cbc( mbedtls_camellia_context *ctx,
     {
         while( length > 0 )
         {
-            for( i = 0; i < 16; i++ )
-                output[i] = (unsigned char)( input[i] ^ iv[i] );
+            mbedtls_xor( output, input, iv, 16 );
 
             mbedtls_camellia_crypt_ecb( ctx, mode, output, output );
             memcpy( iv, output, 16 );

From 0d3b55bca8c3325f8c00ae558a0e8e63c34bd91d Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 16:30:35 +0000
Subject: [PATCH 06/56] Use mbedtls_xor in ccm

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/ccm.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/library/ccm.c b/library/ccm.c
index 3edfba366..7bf9e9f5f 100644
--- a/library/ccm.c
+++ b/library/ccm.c
@@ -112,7 +112,6 @@ static int mbedtls_ccm_crypt( mbedtls_ccm_context *ctx,
                               const unsigned char *input,
                               unsigned char *output )
 {
-    size_t i;
     size_t olen = 0;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char tmp_buf[16] = {0};
@@ -125,8 +124,7 @@ static int mbedtls_ccm_crypt( mbedtls_ccm_context *ctx,
         return ret;
     }
 
-    for( i = 0; i < use_len; i++ )
-        output[i] = input[i] ^ tmp_buf[offset + i];
+    mbedtls_xor( output, input, tmp_buf + offset, use_len );
 
     mbedtls_platform_zeroize(tmp_buf, sizeof(tmp_buf));
     return ret;
@@ -269,7 +267,6 @@ int mbedtls_ccm_update_ad( mbedtls_ccm_context *ctx,
                            size_t add_len )
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    unsigned char i;
     size_t olen, use_len, offset;
 
     if( ctx->state & CCM_STATE__ERROR )
@@ -310,8 +307,7 @@ int mbedtls_ccm_update_ad( mbedtls_ccm_context *ctx,
             if( use_len > add_len )
                 use_len = add_len;
 
-            for( i = 0; i < use_len; i++ )
-                ctx->y[i + offset] ^= add[i];
+            mbedtls_xor( ctx->y + offset, ctx->y + offset, add, use_len );
 
             ctx->processed += use_len;
             add_len -= use_len;
@@ -381,8 +377,7 @@ int mbedtls_ccm_update( mbedtls_ccm_context *ctx,
         if( ctx->mode == MBEDTLS_CCM_ENCRYPT || \
             ctx->mode == MBEDTLS_CCM_STAR_ENCRYPT )
         {
-            for( i = 0; i < use_len; i++ )
-                ctx->y[i + offset] ^= input[i];
+            mbedtls_xor( ctx->y + offset, ctx->y + offset, input, use_len );
 
             if( use_len + offset == 16 || ctx->processed == ctx->plaintext_len )
             {
@@ -411,8 +406,7 @@ int mbedtls_ccm_update( mbedtls_ccm_context *ctx,
             if( ret != 0 )
                 goto exit;
 
-            for( i = 0; i < use_len; i++ )
-                ctx->y[i + offset] ^= local_output[i];
+            mbedtls_xor( ctx->y + offset, ctx->y + offset, local_output, use_len );
 
             memcpy( output, local_output, use_len );
             mbedtls_platform_zeroize( local_output, 16 );

From c1d9022bab918ff4b9791759e726a374ce802ea8 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 16:35:42 +0000
Subject: [PATCH 07/56] Use mbedtls_xor in ChaCha20

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/chacha20.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/library/chacha20.c b/library/chacha20.c
index e53eb82f5..28751043a 100644
--- a/library/chacha20.c
+++ b/library/chacha20.c
@@ -222,7 +222,6 @@ int mbedtls_chacha20_update( mbedtls_chacha20_context *ctx,
                               unsigned char *output )
 {
     size_t offset = 0U;
-    size_t i;
 
     /* Use leftover keystream bytes, if available */
     while( size > 0U && ctx->keystream_bytes_used < CHACHA20_BLOCK_SIZE_BYTES )
@@ -242,17 +241,7 @@ int mbedtls_chacha20_update( mbedtls_chacha20_context *ctx,
         chacha20_block( ctx->state, ctx->keystream8 );
         ctx->state[CHACHA20_CTR_INDEX]++;
 
-        for( i = 0U; i < 64U; i += 8U )
-        {
-            output[offset + i  ] = input[offset + i  ] ^ ctx->keystream8[i  ];
-            output[offset + i+1] = input[offset + i+1] ^ ctx->keystream8[i+1];
-            output[offset + i+2] = input[offset + i+2] ^ ctx->keystream8[i+2];
-            output[offset + i+3] = input[offset + i+3] ^ ctx->keystream8[i+3];
-            output[offset + i+4] = input[offset + i+4] ^ ctx->keystream8[i+4];
-            output[offset + i+5] = input[offset + i+5] ^ ctx->keystream8[i+5];
-            output[offset + i+6] = input[offset + i+6] ^ ctx->keystream8[i+6];
-            output[offset + i+7] = input[offset + i+7] ^ ctx->keystream8[i+7];
-        }
+        mbedtls_xor( output + offset, input + offset, ctx->keystream8, 64U );
 
         offset += CHACHA20_BLOCK_SIZE_BYTES;
         size   -= CHACHA20_BLOCK_SIZE_BYTES;
@@ -265,10 +254,7 @@ int mbedtls_chacha20_update( mbedtls_chacha20_context *ctx,
         chacha20_block( ctx->state, ctx->keystream8 );
         ctx->state[CHACHA20_CTR_INDEX]++;
 
-        for( i = 0U; i < size; i++)
-        {
-            output[offset + i] = input[offset + i] ^ ctx->keystream8[i];
-        }
+        mbedtls_xor( output + offset, input + offset, ctx->keystream8, size );
 
         ctx->keystream_bytes_used = size;
 

From 8c0ff81ce713f84f6c70c92195d0a06a8bea442b Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 16:38:44 +0000
Subject: [PATCH 08/56] Use mbedtls_xor in CMAC

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/cmac.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/library/cmac.c b/library/cmac.c
index 3cc49d10c..987085686 100644
--- a/library/cmac.c
+++ b/library/cmac.c
@@ -148,15 +148,6 @@ exit:
 #endif /* !defined(MBEDTLS_CMAC_ALT) || defined(MBEDTLS_SELF_TEST) */
 
 #if !defined(MBEDTLS_CMAC_ALT)
-static void cmac_xor_block( unsigned char *output, const unsigned char *input1,
-                            const unsigned char *input2,
-                            const size_t block_size )
-{
-    size_t idx;
-
-    for( idx = 0; idx < block_size; idx++ )
-        output[ idx ] = input1[ idx ] ^ input2[ idx ];
-}
 
 /*
  * Create padded last block from (partial) last block.
@@ -247,7 +238,7 @@ int mbedtls_cipher_cmac_update( mbedtls_cipher_context_t *ctx,
                 input,
                 block_size - cmac_ctx->unprocessed_len );
 
-        cmac_xor_block( state, cmac_ctx->unprocessed_block, state, block_size );
+        mbedtls_xor( state, cmac_ctx->unprocessed_block, state, block_size );
 
         if( ( ret = mbedtls_cipher_update( ctx, state, block_size, state,
                                            &olen ) ) != 0 )
@@ -267,7 +258,7 @@ int mbedtls_cipher_cmac_update( mbedtls_cipher_context_t *ctx,
      * final partial or complete block */
     for( j = 1; j < n; j++ )
     {
-        cmac_xor_block( state, input, state, block_size );
+        mbedtls_xor( state, input, state, block_size );
 
         if( ( ret = mbedtls_cipher_update( ctx, state, block_size, state,
                                            &olen ) ) != 0 )
@@ -319,16 +310,16 @@ int mbedtls_cipher_cmac_finish( mbedtls_cipher_context_t *ctx,
     if( cmac_ctx->unprocessed_len < block_size )
     {
         cmac_pad( M_last, block_size, last_block, cmac_ctx->unprocessed_len );
-        cmac_xor_block( M_last, M_last, K2, block_size );
+        mbedtls_xor( M_last, M_last, K2, block_size );
     }
     else
     {
         /* Last block is complete block */
-        cmac_xor_block( M_last, last_block, K1, block_size );
+        mbedtls_xor( M_last, last_block, K1, block_size );
     }
 
 
-    cmac_xor_block( state, M_last, state, block_size );
+    mbedtls_xor( state, M_last, state, block_size );
     if( ( ret = mbedtls_cipher_update( ctx, state, block_size, state,
                                        &olen ) ) != 0 )
     {

From ffb5499988f7a2cd1d908a7c542e3daad22d72c7 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 16:40:58 +0000
Subject: [PATCH 09/56] Use mbedtls_xor in CTR_DRBG

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/ctr_drbg.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/library/ctr_drbg.c b/library/ctr_drbg.c
index 71c48afd2..f5c5e7b61 100644
--- a/library/ctr_drbg.c
+++ b/library/ctr_drbg.c
@@ -174,8 +174,7 @@ static int block_cipher_df( unsigned char *output,
 
         while( use_len > 0 )
         {
-            for( i = 0; i < MBEDTLS_CTR_DRBG_BLOCKSIZE; i++ )
-                chain[i] ^= p[i];
+            mbedtls_xor( chain, chain, p, MBEDTLS_CTR_DRBG_BLOCKSIZE );
             p += MBEDTLS_CTR_DRBG_BLOCKSIZE;
             use_len -= ( use_len >= MBEDTLS_CTR_DRBG_BLOCKSIZE ) ?
                        MBEDTLS_CTR_DRBG_BLOCKSIZE : use_len;

From 2e9db8e9bf54f734c441cd07b10b880cfcb2f7da Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 16:43:29 +0000
Subject: [PATCH 10/56] Use mbedtls_xor in DES

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/des.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/library/des.c b/library/des.c
index 65f5681cf..c56d4d4fe 100644
--- a/library/des.c
+++ b/library/des.c
@@ -635,7 +635,6 @@ int mbedtls_des_crypt_cbc( mbedtls_des_context *ctx,
                     const unsigned char *input,
                     unsigned char *output )
 {
-    int i;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char temp[8];
 
@@ -646,8 +645,7 @@ int mbedtls_des_crypt_cbc( mbedtls_des_context *ctx,
     {
         while( length > 0 )
         {
-            for( i = 0; i < 8; i++ )
-                output[i] = (unsigned char)( input[i] ^ iv[i] );
+            mbedtls_xor( output, input, iv, 8 );
 
             ret = mbedtls_des_crypt_ecb( ctx, output, output );
             if( ret != 0 )
@@ -668,8 +666,7 @@ int mbedtls_des_crypt_cbc( mbedtls_des_context *ctx,
             if( ret != 0 )
                 goto exit;
 
-            for( i = 0; i < 8; i++ )
-                output[i] = (unsigned char)( output[i] ^ iv[i] );
+            mbedtls_xor( output, output, iv, 8 );
 
             memcpy( iv, temp, 8 );
 
@@ -741,7 +738,6 @@ int mbedtls_des3_crypt_cbc( mbedtls_des3_context *ctx,
                      const unsigned char *input,
                      unsigned char *output )
 {
-    int i;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char temp[8];
 
@@ -752,8 +748,7 @@ int mbedtls_des3_crypt_cbc( mbedtls_des3_context *ctx,
     {
         while( length > 0 )
         {
-            for( i = 0; i < 8; i++ )
-                output[i] = (unsigned char)( input[i] ^ iv[i] );
+            mbedtls_xor( output, input, iv, 8 );
 
             ret = mbedtls_des3_crypt_ecb( ctx, output, output );
             if( ret != 0 )
@@ -774,8 +769,7 @@ int mbedtls_des3_crypt_cbc( mbedtls_des3_context *ctx,
             if( ret != 0 )
                 goto exit;
 
-            for( i = 0; i < 8; i++ )
-                output[i] = (unsigned char)( output[i] ^ iv[i] );
+            mbedtls_xor( output, output, iv, 8 );
 
             memcpy( iv, temp, 8 );
 

From d22fb73e3ed52812e4c474df148064cd4abb9b71 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 16:53:25 +0000
Subject: [PATCH 11/56] Use mbedtls_xor in GCM

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/gcm.c | 40 ++++++++++++++--------------------------
 1 file changed, 14 insertions(+), 26 deletions(-)

diff --git a/library/gcm.c b/library/gcm.c
index f004a73c7..0178b5ba7 100644
--- a/library/gcm.c
+++ b/library/gcm.c
@@ -235,7 +235,6 @@ int mbedtls_gcm_starts( mbedtls_gcm_context *ctx,
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char work_buf[16];
-    size_t i;
     const unsigned char *p;
     size_t use_len, olen = 0;
     uint64_t iv_bits;
@@ -268,8 +267,7 @@ int mbedtls_gcm_starts( mbedtls_gcm_context *ctx,
         {
             use_len = ( iv_len < 16 ) ? iv_len : 16;
 
-            for( i = 0; i < use_len; i++ )
-                ctx->y[i] ^= p[i];
+            mbedtls_xor( ctx->y, ctx->y, p, use_len );
 
             gcm_mult( ctx, ctx->y, ctx->y );
 
@@ -277,8 +275,7 @@ int mbedtls_gcm_starts( mbedtls_gcm_context *ctx,
             p += use_len;
         }
 
-        for( i = 0; i < 16; i++ )
-            ctx->y[i] ^= work_buf[i];
+        mbedtls_xor( ctx->y, ctx->y, work_buf, 16);
 
         gcm_mult( ctx, ctx->y, ctx->y );
     }
@@ -313,7 +310,7 @@ int mbedtls_gcm_update_ad( mbedtls_gcm_context *ctx,
                            const unsigned char *add, size_t add_len )
 {
     const unsigned char *p;
-    size_t use_len, i, offset;
+    size_t use_len, offset;
 
     /* IV is limited to 2^64 bits, so 2^61 bytes */
     if( (uint64_t) add_len >> 61 != 0 )
@@ -328,8 +325,7 @@ int mbedtls_gcm_update_ad( mbedtls_gcm_context *ctx,
         if( use_len > add_len )
             use_len = add_len;
 
-        for( i = 0; i < use_len; i++ )
-            ctx->buf[i+offset] ^= p[i];
+        mbedtls_xor( ctx->buf + offset, ctx->buf + offset, p, use_len );
 
         if( offset + use_len == 16 )
             gcm_mult( ctx, ctx->buf, ctx->buf );
@@ -343,8 +339,7 @@ int mbedtls_gcm_update_ad( mbedtls_gcm_context *ctx,
 
     while( add_len >= 16 )
     {
-        for( i = 0; i < 16; i++ )
-            ctx->buf[i] ^= p[i];
+        mbedtls_xor( ctx->buf, ctx->buf, p, 16 );
 
         gcm_mult( ctx, ctx->buf, ctx->buf );
 
@@ -354,8 +349,7 @@ int mbedtls_gcm_update_ad( mbedtls_gcm_context *ctx,
 
     if( add_len > 0 )
     {
-        for( i = 0; i < add_len; i++ )
-            ctx->buf[i] ^= p[i];
+        mbedtls_xor( ctx->buf, ctx->buf, p, add_len );
     }
 
     return( 0 );
@@ -378,7 +372,6 @@ static int gcm_mask( mbedtls_gcm_context *ctx,
                      const unsigned char *input,
                      unsigned char *output )
 {
-    size_t i;
     size_t olen = 0;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
 
@@ -389,14 +382,12 @@ static int gcm_mask( mbedtls_gcm_context *ctx,
         return( ret );
     }
 
-    for( i = 0; i < use_len; i++ )
-    {
-        if( ctx->mode == MBEDTLS_GCM_DECRYPT )
-            ctx->buf[offset + i] ^= input[i];
-        output[i] = ectr[offset + i] ^ input[i];
-        if( ctx->mode == MBEDTLS_GCM_ENCRYPT )
-            ctx->buf[offset + i] ^= output[i];
-    }
+    if( ctx->mode == MBEDTLS_GCM_DECRYPT )
+        mbedtls_xor( ctx->buf + offset, ctx->buf + offset, input, use_len );
+    mbedtls_xor( output, ectr + offset, input, use_len );
+    if( ctx->mode == MBEDTLS_GCM_ENCRYPT )
+        mbedtls_xor( ctx->buf + offset, ctx->buf + offset, output, use_len );
+
     return( 0 );
 }
 
@@ -489,7 +480,6 @@ int mbedtls_gcm_finish( mbedtls_gcm_context *ctx,
                         unsigned char *tag, size_t tag_len )
 {
     unsigned char work_buf[16];
-    size_t i;
     uint64_t orig_len;
     uint64_t orig_add_len;
 
@@ -524,13 +514,11 @@ int mbedtls_gcm_finish( mbedtls_gcm_context *ctx,
         MBEDTLS_PUT_UINT32_BE( ( orig_len     >> 32 ), work_buf, 8  );
         MBEDTLS_PUT_UINT32_BE( ( orig_len           ), work_buf, 12 );
 
-        for( i = 0; i < 16; i++ )
-            ctx->buf[i] ^= work_buf[i];
+        mbedtls_xor( ctx->buf, ctx->buf, work_buf, 16 );
 
         gcm_mult( ctx, ctx->buf, ctx->buf );
 
-        for( i = 0; i < tag_len; i++ )
-            tag[i] ^= ctx->buf[i];
+        mbedtls_xor( tag, tag, ctx->buf, tag_len );
     }
 
     return( 0 );

From 99a507ee55e0969ee49c33584f0a6eeaf68c2f4c Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 16:54:54 +0000
Subject: [PATCH 12/56] Use mbedtls_xor in md

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/md.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/library/md.c b/library/md.c
index 8efcf105b..9c161a53f 100644
--- a/library/md.c
+++ b/library/md.c
@@ -633,7 +633,6 @@ int mbedtls_md_hmac_starts( mbedtls_md_context_t *ctx, const unsigned char *key,
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char sum[MBEDTLS_MD_MAX_SIZE];
     unsigned char *ipad, *opad;
-    size_t i;
 
     if( ctx == NULL || ctx->md_info == NULL || ctx->hmac_ctx == NULL )
         return( MBEDTLS_ERR_MD_BAD_INPUT_DATA );
@@ -657,11 +656,8 @@ int mbedtls_md_hmac_starts( mbedtls_md_context_t *ctx, const unsigned char *key,
     memset( ipad, 0x36, ctx->md_info->block_size );
     memset( opad, 0x5C, ctx->md_info->block_size );
 
-    for( i = 0; i < keylen; i++ )
-    {
-        ipad[i] = (unsigned char)( ipad[i] ^ key[i] );
-        opad[i] = (unsigned char)( opad[i] ^ key[i] );
-    }
+    mbedtls_xor( ipad, ipad, key, keylen );
+    mbedtls_xor( opad, opad, key, keylen );
 
     if( ( ret = mbedtls_md_starts( ctx ) ) != 0 )
         goto cleanup;

From 74b345f2821231b6a979aa3c19c0d80d377d5bb9 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 16:56:59 +0000
Subject: [PATCH 13/56] Use mbedtls_xor in PKCS #5

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/pkcs5.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/library/pkcs5.c b/library/pkcs5.c
index ac5945a11..a6bc1b55c 100644
--- a/library/pkcs5.c
+++ b/library/pkcs5.c
@@ -211,7 +211,6 @@ static int pkcs5_pbkdf2_hmac( mbedtls_md_context_t *ctx,
                               uint32_t key_length, unsigned char *output )
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    int j;
     unsigned int i;
     unsigned char md1[MBEDTLS_MD_MAX_SIZE];
     unsigned char work[MBEDTLS_MD_MAX_SIZE];
@@ -263,8 +262,7 @@ static int pkcs5_pbkdf2_hmac( mbedtls_md_context_t *ctx,
 
             // U1 xor U2
             //
-            for( j = 0; j < md_size; j++ )
-                work[j] ^= md1[j];
+            mbedtls_xor( work, work, md1, md_size );
         }
 
         use_len = ( key_length < md_size ) ? key_length : md_size;
@@ -396,8 +394,7 @@ exit:
 
             // U1 xor U2
             //
-            for( j = 0; j < md_size; j++ )
-                work[j] ^= md1[j];
+            mbedtls_xor( work, work, md1, md_size );
         }
 
         use_len = ( key_length < md_size ) ? key_length : md_size;

From c36a56e8907f6295515f7374dd0a1666085ebdad Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 17:00:26 +0000
Subject: [PATCH 14/56] Use mbedtls_xor in TLS messaging layer

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/ssl_msg.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/library/ssl_msg.c b/library/ssl_msg.c
index dbc639188..085233c15 100644
--- a/library/ssl_msg.c
+++ b/library/ssl_msg.c
@@ -511,15 +511,12 @@ static void ssl_build_record_nonce( unsigned char *dst_iv,
                                     unsigned char const *dynamic_iv,
                                     size_t dynamic_iv_len )
 {
-    size_t i;
-
     /* Start with Fixed IV || 0 */
     memset( dst_iv, 0, dst_iv_len );
     memcpy( dst_iv, fixed_iv, fixed_iv_len );
 
     dst_iv += dst_iv_len - dynamic_iv_len;
-    for( i = 0; i < dynamic_iv_len; i++ )
-        dst_iv[i] ^= dynamic_iv[i];
+    mbedtls_xor( dst_iv, dst_iv, dynamic_iv, dynamic_iv_len );
 }
 #endif /* MBEDTLS_GCM_C || MBEDTLS_CCM_C || MBEDTLS_CHACHAPOLY_C */
 

From 8c18baf8c1ef8c2ed24981e2298d0e730eaa1e1b Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 18:15:55 +0000
Subject: [PATCH 15/56] Add test for unaligned buffers

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 tests/suites/test_suite_common.function | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tests/suites/test_suite_common.function b/tests/suites/test_suite_common.function
index 17eae27c4..8412e60f3 100644
--- a/tests/suites/test_suite_common.function
+++ b/tests/suites/test_suite_common.function
@@ -18,10 +18,10 @@ void mbedtls_xor( int len )
 {
     size_t n = (size_t) len;
     unsigned char *a = NULL, *b = NULL, *r1 = NULL, *r2 = NULL;
-    ASSERT_ALLOC( a, n );
-    ASSERT_ALLOC( b, n );
-    ASSERT_ALLOC( r1, n );
-    ASSERT_ALLOC( r2, n );
+    ASSERT_ALLOC( a, n + 1 );
+    ASSERT_ALLOC( b, n + 1 );
+    ASSERT_ALLOC( r1, n + 1 );
+    ASSERT_ALLOC( r2, n + 1 );
 
     /* Test non-overlapping */
     fill_arrays( a, b, r1, r2, n );
@@ -67,6 +67,15 @@ void mbedtls_xor( int len )
     }
     mbedtls_xor( r2, r2, r2, n );
     ASSERT_COMPARE( r1, n, r2, n );
+
+    /* Test non-word-aligned buffers */
+    fill_arrays( a + 1, b + 1, r1 + 1, r2 + 1, n );
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r1[i + 1] = a[i + 1] ^ b[i + 1];
+    }
+    mbedtls_xor( r2 + 1, a + 1, b + 1, n );
+    ASSERT_COMPARE( r1 + 1, n, r2 + 1, n );
 exit:
     if ( a != NULL ) mbedtls_free( a );
     if ( a != NULL ) mbedtls_free( b );

From fdd967ebdc53c1d3947f3fc145087579e32011ff Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Tue, 22 Nov 2022 18:55:17 +0000
Subject: [PATCH 16/56] Detect support for unaligned memory access

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/library/common.h b/library/common.h
index d1af48eb2..bcb99f752 100644
--- a/library/common.h
+++ b/library/common.h
@@ -69,6 +69,18 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
  */
 #define MBEDTLS_ALLOW_PRIVATE_ACCESS
 
+/** Detect architectures where unaligned memory accesses are safe and performant.
+ *
+ * This list is incomplete.
+ */
+#if defined(__i386__) || defined(__amd64__) || defined( __x86_64__) \
+    || defined(__ARM_FEATURE_UNALIGNED) \
+    || defined(__aarch64__) \
+    || defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8M__) \
+    || defined(__ARM_ARCH_7A__)
+#define MBEDTLS_ALLOW_UNALIGNED_ACCESS
+#endif
+
 /** Byte Reading Macros
  *
  * Given a multi-byte integer \p x, MBEDTLS_BYTE_n retrieves the n-th
@@ -404,6 +416,7 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
  */
 static inline void mbedtls_xor( unsigned char* r, unsigned char const *a, unsigned char const *b, size_t n )
 {
+#if defined(MBEDTLS_ALLOW_UNALIGNED_ACCESS)
     uint32_t *a32 = (uint32_t*)a;
     uint32_t *b32 = (uint32_t*)b;
     uint32_t *r32 = (uint32_t*)r;
@@ -415,6 +428,12 @@ static inline void mbedtls_xor( unsigned char* r, unsigned char const *a, unsign
     {
         r[i] = a[i] ^ b[i];
     }
+#else
+    for ( size_t i = 0; i < n; i++ )
+    {
+        r[i] = a[i] ^ b[i];
+    }
+#endif
 }
 
 /* Fix MSVC C99 compatible issue

From f9a1c37bc8dd54d90ea706ed11a73e4b32a3e974 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 23 Nov 2022 14:02:00 +0000
Subject: [PATCH 17/56] Whitespace cleanup

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/library/common.h b/library/common.h
index bcb99f752..94bf7b9d6 100644
--- a/library/common.h
+++ b/library/common.h
@@ -417,14 +417,14 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
 static inline void mbedtls_xor( unsigned char* r, unsigned char const *a, unsigned char const *b, size_t n )
 {
 #if defined(MBEDTLS_ALLOW_UNALIGNED_ACCESS)
-    uint32_t *a32 = (uint32_t*)a;
-    uint32_t *b32 = (uint32_t*)b;
-    uint32_t *r32 = (uint32_t*)r;
-    for ( size_t i = 0; i < (n >> 2); i++ )
+    uint32_t *a32 = (uint32_t *)a;
+    uint32_t *b32 = (uint32_t *)b;
+    uint32_t *r32 = (uint32_t *)r;
+    for ( size_t i = 0; i < ( n >> 2 ); i++ )
     {
         r32[i] = a32[i] ^ b32[i];
     }
-    for ( size_t i = n - (n % 4) ; i < n; i++ )
+    for ( size_t i = n - ( n % 4 ) ; i < n; i++ )
     {
         r[i] = a[i] ^ b[i];
     }

From 63d114305f1c46fd9cdf1903d7b520fb77a4b655 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 23 Nov 2022 14:03:30 +0000
Subject: [PATCH 18/56] Whitespace cleanup

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/common.h b/library/common.h
index 94bf7b9d6..c3847cdbf 100644
--- a/library/common.h
+++ b/library/common.h
@@ -414,7 +414,7 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
  * \param   b Pointer to input (buffer of at least \p n bytes)
  * \param   n Number of bytes to process.
  */
-static inline void mbedtls_xor( unsigned char* r, unsigned char const *a, unsigned char const *b, size_t n )
+static inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n )
 {
 #if defined(MBEDTLS_ALLOW_UNALIGNED_ACCESS)
     uint32_t *a32 = (uint32_t *)a;

From 3c8eb7e9909dcfb05523d3c1c21f59687588bbfa Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 23 Nov 2022 14:50:03 +0000
Subject: [PATCH 19/56] Provide external definition of mbedtls_xor

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h        | 2 +-
 library/platform_util.c | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/library/common.h b/library/common.h
index c3847cdbf..e502b5de1 100644
--- a/library/common.h
+++ b/library/common.h
@@ -414,7 +414,7 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
  * \param   b Pointer to input (buffer of at least \p n bytes)
  * \param   n Number of bytes to process.
  */
-static inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n )
+inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n )
 {
 #if defined(MBEDTLS_ALLOW_UNALIGNED_ACCESS)
     uint32_t *a32 = (uint32_t *)a;
diff --git a/library/platform_util.c b/library/platform_util.c
index 916a7f444..8fba664d2 100644
--- a/library/platform_util.c
+++ b/library/platform_util.c
@@ -143,3 +143,8 @@ struct tm *mbedtls_platform_gmtime_r( const mbedtls_time_t *tt,
 void (*mbedtls_test_hook_test_fail)( const char *, int, const char *);
 #endif /* MBEDTLS_TEST_HOOKS */
 
+/*
+ * Provide an external definition of mbedtls_xor so that the compiler
+ * has the option to not inline it
+ */
+extern inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n );

From 15531bc6d2e3c5e68229cd38baf59f382d238c89 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 23 Nov 2022 16:11:04 +0000
Subject: [PATCH 20/56] Test for all alignment combinations

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 tests/suites/test_suite_common.function | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/tests/suites/test_suite_common.function b/tests/suites/test_suite_common.function
index 8412e60f3..6c0d4afc4 100644
--- a/tests/suites/test_suite_common.function
+++ b/tests/suites/test_suite_common.function
@@ -68,14 +68,19 @@ void mbedtls_xor( int len )
     mbedtls_xor( r2, r2, r2, n );
     ASSERT_COMPARE( r1, n, r2, n );
 
-    /* Test non-word-aligned buffers */
-    fill_arrays( a + 1, b + 1, r1 + 1, r2 + 1, n );
-    for ( size_t i = 0; i < n; i++ )
+    /* Test non-word-aligned buffers, for all combinations of alignedness */
+    for ( int i = 0; i < 7; i++ )
     {
-        r1[i + 1] = a[i + 1] ^ b[i + 1];
+        int r_off = i & 1, a_off = (i & 2) >> 1, b_off = (i & 4) >> 2;
+        fill_arrays( a, b, r1, r2, n + 1 );
+
+        for ( size_t i = 0; i < n; i++ )
+        {
+            r1[i + r_off] = a[i + a_off] ^ b[i + b_off];
+        }
+        mbedtls_xor( r2 + r_off, a + a_off, b + b_off, n );
+        ASSERT_COMPARE( r1 + r_off, n, r2 + r_off, n );
     }
-    mbedtls_xor( r2 + 1, a + 1, b + 1, n );
-    ASSERT_COMPARE( r1 + 1, n, r2 + 1, n );
 exit:
     if ( a != NULL ) mbedtls_free( a );
     if ( a != NULL ) mbedtls_free( b );

From 1bab27f9831c3258716d09ef9702dabcb6dca703 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 23 Nov 2022 16:51:59 +0000
Subject: [PATCH 21/56] Prevent unaligned access under ASan builds

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/library/common.h b/library/common.h
index e502b5de1..3925a79ce 100644
--- a/library/common.h
+++ b/library/common.h
@@ -73,11 +73,12 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
  *
  * This list is incomplete.
  */
-#if defined(__i386__) || defined(__amd64__) || defined( __x86_64__) \
+#if (defined(__i386__) || defined(__amd64__) || defined( __x86_64__) \
     || defined(__ARM_FEATURE_UNALIGNED) \
     || defined(__aarch64__) \
     || defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8M__) \
-    || defined(__ARM_ARCH_7A__)
+    || defined(__ARM_ARCH_7A__)) \
+    && (!(defined(__has_feature) && __has_feature(undefined_behavior_sanitizer)))
 #define MBEDTLS_ALLOW_UNALIGNED_ACCESS
 #endif
 

From 468df317bfea56a90eaf983b26fa8e9541c04aff Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 23 Nov 2022 16:56:35 +0000
Subject: [PATCH 22/56] Fix MSVC support for inline keyword

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/library/common.h b/library/common.h
index 3925a79ce..808d13de2 100644
--- a/library/common.h
+++ b/library/common.h
@@ -28,6 +28,11 @@
 #include <stdint.h>
 #include <stddef.h>
 
+#if ( defined(__ARMCC_VERSION) || defined(_MSC_VER) ) && \
+     !defined(inline) && !defined(__cplusplus)
+#define inline __inline
+#endif
+
 /** Helper to define a function as static except when building invasive tests.
  *
  * If a function is only used inside its own source file and should be

From a6778013b4dd7bc3d60d57e0b87aa674d0b0ea76 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 23 Nov 2022 17:16:57 +0000
Subject: [PATCH 23/56] Tidy up UBSan detection

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/library/common.h b/library/common.h
index 808d13de2..49c5826aa 100644
--- a/library/common.h
+++ b/library/common.h
@@ -78,12 +78,17 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
  *
  * This list is incomplete.
  */
+#if defined(__has_feature)
+#if __has_feature(undefined_behavior_sanitizer)
+#define MBEDTLS_HAVE_UBSAN
+#endif
+#endif
+
 #if (defined(__i386__) || defined(__amd64__) || defined( __x86_64__) \
     || defined(__ARM_FEATURE_UNALIGNED) \
     || defined(__aarch64__) \
     || defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8M__) \
-    || defined(__ARM_ARCH_7A__)) \
-    && (!(defined(__has_feature) && __has_feature(undefined_behavior_sanitizer)))
+    || defined(__ARM_ARCH_7A__)) && !defined(MBEDTLS_HAVE_UBSAN)
 #define MBEDTLS_ALLOW_UNALIGNED_ACCESS
 #endif
 

From e7cd137606437755acf6e2236d1913bf0efaee35 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 23 Nov 2022 19:14:26 +0000
Subject: [PATCH 24/56] Define UNALIGNED_UINT32_PTR for unaligned access

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/library/common.h b/library/common.h
index 49c5826aa..75d28ff1f 100644
--- a/library/common.h
+++ b/library/common.h
@@ -74,22 +74,29 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
  */
 #define MBEDTLS_ALLOW_PRIVATE_ACCESS
 
-/** Detect architectures where unaligned memory accesses are safe and performant.
+/** MBEDTLS_ALLOW_UNALIGNED_ACCESS is defined for architectures where unaligned
+ * memory accesses are safe and performant.
+ *
+ * Unaligned accesses must be made via the UNALIGNED_UINT32_PTR type
+ * defined here.
  *
  * This list is incomplete.
  */
-#if defined(__has_feature)
-#if __has_feature(undefined_behavior_sanitizer)
-#define MBEDTLS_HAVE_UBSAN
-#endif
-#endif
-
-#if (defined(__i386__) || defined(__amd64__) || defined( __x86_64__) \
+#if defined(__i386__) || defined(__amd64__) || defined( __x86_64__) \
     || defined(__ARM_FEATURE_UNALIGNED) \
     || defined(__aarch64__) \
     || defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8M__) \
-    || defined(__ARM_ARCH_7A__)) && !defined(MBEDTLS_HAVE_UBSAN)
+    || defined(__ARM_ARCH_7A__)
+#if (defined(__GNUC__) && __GNUC__ >= 4) \
+    || (defined(__clang__) && __has_attribute(aligned)) \
+    || (defined(__ARMCC_VERSION) && __ARMCC_VERSION >= 5000000 )
 #define MBEDTLS_ALLOW_UNALIGNED_ACCESS
+__attribute__((aligned(1))) typedef uint32_t unaligned_uint32_t;
+#define UNALIGNED_UINT32_PTR unaligned_uint32_t
+#elif defined(_MSC_VER)
+#define MBEDTLS_ALLOW_UNALIGNED_ACCESS
+#define UNALIGNED_UINT32_PTR __declspec(align(1)) uint32_t
+#endif
 #endif
 
 /** Byte Reading Macros
@@ -428,9 +435,9 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
 inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n )
 {
 #if defined(MBEDTLS_ALLOW_UNALIGNED_ACCESS)
-    uint32_t *a32 = (uint32_t *)a;
-    uint32_t *b32 = (uint32_t *)b;
-    uint32_t *r32 = (uint32_t *)r;
+     UNALIGNED_UINT32_PTR *a32 = (uint32_t *)a;
+     UNALIGNED_UINT32_PTR *b32 = (uint32_t *)b;
+     UNALIGNED_UINT32_PTR *r32 = (uint32_t *)r;
     for ( size_t i = 0; i < ( n >> 2 ); i++ )
     {
         r32[i] = a32[i] ^ b32[i];

From 3172e4e1c992ebdab7552fe822b2f35c6b5a83b8 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 23 Nov 2022 19:38:13 +0000
Subject: [PATCH 25/56] Fix typo in test_suite_common

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 tests/suites/test_suite_common.function | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/suites/test_suite_common.function b/tests/suites/test_suite_common.function
index 6c0d4afc4..c8a7a52ab 100644
--- a/tests/suites/test_suite_common.function
+++ b/tests/suites/test_suite_common.function
@@ -74,9 +74,9 @@ void mbedtls_xor( int len )
         int r_off = i & 1, a_off = (i & 2) >> 1, b_off = (i & 4) >> 2;
         fill_arrays( a, b, r1, r2, n + 1 );
 
-        for ( size_t i = 0; i < n; i++ )
+        for ( size_t j = 0; j < n; j++ )
         {
-            r1[i + r_off] = a[i + a_off] ^ b[i + b_off];
+            r1[j + r_off] = a[j + a_off] ^ b[j + b_off];
         }
         mbedtls_xor( r2 + r_off, a + a_off, b + b_off, n );
         ASSERT_COMPARE( r1 + r_off, n, r2 + r_off, n );

From dd3103e9e7ece8173e8a8212e1c00b8078a99963 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 23 Nov 2022 19:42:13 +0000
Subject: [PATCH 26/56] Tidy up UNALIGNED_UINT32_T macro

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/library/common.h b/library/common.h
index 75d28ff1f..affec10bf 100644
--- a/library/common.h
+++ b/library/common.h
@@ -77,7 +77,7 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
 /** MBEDTLS_ALLOW_UNALIGNED_ACCESS is defined for architectures where unaligned
  * memory accesses are safe and performant.
  *
- * Unaligned accesses must be made via the UNALIGNED_UINT32_PTR type
+ * Unaligned accesses must be made via the UNALIGNED_UINT32_T type
  * defined here.
  *
  * This list is incomplete.
@@ -92,10 +92,10 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
     || (defined(__ARMCC_VERSION) && __ARMCC_VERSION >= 5000000 )
 #define MBEDTLS_ALLOW_UNALIGNED_ACCESS
 __attribute__((aligned(1))) typedef uint32_t unaligned_uint32_t;
-#define UNALIGNED_UINT32_PTR unaligned_uint32_t
+#define UNALIGNED_UINT32_T unaligned_uint32_t
 #elif defined(_MSC_VER)
 #define MBEDTLS_ALLOW_UNALIGNED_ACCESS
-#define UNALIGNED_UINT32_PTR __declspec(align(1)) uint32_t
+#define UNALIGNED_UINT32_T __declspec(align(1)) uint32_t
 #endif
 #endif
 
@@ -435,9 +435,9 @@ __attribute__((aligned(1))) typedef uint32_t unaligned_uint32_t;
 inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n )
 {
 #if defined(MBEDTLS_ALLOW_UNALIGNED_ACCESS)
-     UNALIGNED_UINT32_PTR *a32 = (uint32_t *)a;
-     UNALIGNED_UINT32_PTR *b32 = (uint32_t *)b;
-     UNALIGNED_UINT32_PTR *r32 = (uint32_t *)r;
+     UNALIGNED_UINT32_T *a32 = (uint32_t *)a;
+     UNALIGNED_UINT32_T *b32 = (uint32_t *)b;
+     UNALIGNED_UINT32_T *r32 = (uint32_t *)r;
     for ( size_t i = 0; i < ( n >> 2 ); i++ )
     {
         r32[i] = a32[i] ^ b32[i];

From 358c7d6eb07906368a960f8aa47639bbe0560237 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 23 Nov 2022 20:29:03 +0000
Subject: [PATCH 27/56] Fix naming inconsistency

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/library/common.h b/library/common.h
index affec10bf..6ea89789b 100644
--- a/library/common.h
+++ b/library/common.h
@@ -91,8 +91,8 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
     || (defined(__clang__) && __has_attribute(aligned)) \
     || (defined(__ARMCC_VERSION) && __ARMCC_VERSION >= 5000000 )
 #define MBEDTLS_ALLOW_UNALIGNED_ACCESS
-__attribute__((aligned(1))) typedef uint32_t unaligned_uint32_t;
-#define UNALIGNED_UINT32_T unaligned_uint32_t
+__attribute__((aligned(1))) typedef uint32_t mbedtls_unaligned_uint32_t;
+#define UNALIGNED_UINT32_T mbedtls_unaligned_uint32_t
 #elif defined(_MSC_VER)
 #define MBEDTLS_ALLOW_UNALIGNED_ACCESS
 #define UNALIGNED_UINT32_T __declspec(align(1)) uint32_t

From 6921959b83abb43003bdc99603783a6a7cc9f88f Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 24 Nov 2022 09:27:15 +0000
Subject: [PATCH 28/56] Remove unused variable

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/pkcs5.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/library/pkcs5.c b/library/pkcs5.c
index a6bc1b55c..1e3b17e64 100644
--- a/library/pkcs5.c
+++ b/library/pkcs5.c
@@ -322,7 +322,6 @@ exit:
     mbedtls_md_free( &md_ctx );
     return( ret );
 #else
-    int j;
     unsigned int i;
     unsigned char md1[PSA_HASH_MAX_SIZE];
     unsigned char work[PSA_HASH_MAX_SIZE];

From fbc23225d62acd5443e9c616871e611d1800f3e5 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 24 Nov 2022 18:07:37 +0000
Subject: [PATCH 29/56] Tidy up alignment-related code into separate header

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h | 382 ++++++++++++++++++++++++++++++++++++++++++++
 library/common.h    | 348 +---------------------------------------
 2 files changed, 383 insertions(+), 347 deletions(-)
 create mode 100644 library/alignment.h

diff --git a/library/alignment.h b/library/alignment.h
new file mode 100644
index 000000000..60e5dba13
--- /dev/null
+++ b/library/alignment.h
@@ -0,0 +1,382 @@
+/**
+ * \file alignment.h
+ *
+ * \brief Utility code for dealing with unaligned memory accesses
+ */
+/*
+ *  Copyright The Mbed TLS Contributors
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#ifndef MBEDTLS_LIBRARY_ALIGNMENT_H
+#define MBEDTLS_LIBRARY_ALIGNMENT_H
+
+#include <stdint.h>
+
+/** MBEDTLS_ALLOW_UNALIGNED_ACCESS is defined for architectures where unaligned
+ * memory accesses are safe and performant.
+ *
+ * Unaligned accesses must be made via the UNALIGNED_UINT32_T type
+ * defined here.
+ *
+ * This list is incomplete.
+ */
+#if defined(__i386__) || defined(__amd64__) || defined( __x86_64__) \
+    || defined(__ARM_FEATURE_UNALIGNED) \
+    || defined(__aarch64__) \
+    || defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8M__) \
+    || defined(__ARM_ARCH_7A__)
+#if (defined(__GNUC__) && __GNUC__ >= 4) \
+    || (defined(__clang__) && __has_attribute(aligned)) \
+    || (defined(__ARMCC_VERSION) && __ARMCC_VERSION >= 5000000 )
+    /* GCC, Clang and armcc */
+#define MBEDTLS_ALLOW_UNALIGNED_ACCESS
+__attribute__((aligned(1))) typedef uint32_t mbedtls_unaligned_uint32_t;
+#define UNALIGNED_UINT32_T mbedtls_unaligned_uint32_t
+#elif defined(_MSC_VER)
+    /* MSVC */
+#define MBEDTLS_ALLOW_UNALIGNED_ACCESS
+#define UNALIGNED_UINT32_T __declspec(align(1)) uint32_t
+#elif (defined __ICCARM__)
+    /* IAR - this is disabled until we have the opportunity to test it */
+#undef MBEDTLS_ALLOW_UNALIGNED_ACCESS
+#define UNALIGNED_UINT32_T _Pragma("data_alignment = 1") uint32_t
+#endif
+#endif
+
+
+/** Byte Reading Macros
+ *
+ * Given a multi-byte integer \p x, MBEDTLS_BYTE_n retrieves the n-th
+ * byte from x, where byte 0 is the least significant byte.
+ */
+#define MBEDTLS_BYTE_0( x ) ( (uint8_t) (   ( x )         & 0xff ) )
+#define MBEDTLS_BYTE_1( x ) ( (uint8_t) ( ( ( x ) >> 8  ) & 0xff ) )
+#define MBEDTLS_BYTE_2( x ) ( (uint8_t) ( ( ( x ) >> 16 ) & 0xff ) )
+#define MBEDTLS_BYTE_3( x ) ( (uint8_t) ( ( ( x ) >> 24 ) & 0xff ) )
+#define MBEDTLS_BYTE_4( x ) ( (uint8_t) ( ( ( x ) >> 32 ) & 0xff ) )
+#define MBEDTLS_BYTE_5( x ) ( (uint8_t) ( ( ( x ) >> 40 ) & 0xff ) )
+#define MBEDTLS_BYTE_6( x ) ( (uint8_t) ( ( ( x ) >> 48 ) & 0xff ) )
+#define MBEDTLS_BYTE_7( x ) ( (uint8_t) ( ( ( x ) >> 56 ) & 0xff ) )
+
+/**
+ * Get the unsigned 32 bits integer corresponding to four bytes in
+ * big-endian order (MSB first).
+ *
+ * \param   data    Base address of the memory to get the four bytes from.
+ * \param   offset  Offset from \p data of the first and most significant
+ *                  byte of the four bytes to build the 32 bits unsigned
+ *                  integer from.
+ */
+#ifndef MBEDTLS_GET_UINT32_BE
+#define MBEDTLS_GET_UINT32_BE( data , offset )                  \
+    (                                                           \
+          ( (uint32_t) ( data )[( offset )    ] << 24 )         \
+        | ( (uint32_t) ( data )[( offset ) + 1] << 16 )         \
+        | ( (uint32_t) ( data )[( offset ) + 2] <<  8 )         \
+        | ( (uint32_t) ( data )[( offset ) + 3]       )         \
+    )
+#endif
+
+/**
+ * Put in memory a 32 bits unsigned integer in big-endian order.
+ *
+ * \param   n       32 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 32
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the most significant
+ *                  byte of the 32 bits unsigned integer \p n.
+ */
+#ifndef MBEDTLS_PUT_UINT32_BE
+#define MBEDTLS_PUT_UINT32_BE( n, data, offset )                \
+{                                                               \
+    ( data )[( offset )    ] = MBEDTLS_BYTE_3( n );             \
+    ( data )[( offset ) + 1] = MBEDTLS_BYTE_2( n );             \
+    ( data )[( offset ) + 2] = MBEDTLS_BYTE_1( n );             \
+    ( data )[( offset ) + 3] = MBEDTLS_BYTE_0( n );             \
+}
+#endif
+
+/**
+ * Get the unsigned 32 bits integer corresponding to four bytes in
+ * little-endian order (LSB first).
+ *
+ * \param   data    Base address of the memory to get the four bytes from.
+ * \param   offset  Offset from \p data of the first and least significant
+ *                  byte of the four bytes to build the 32 bits unsigned
+ *                  integer from.
+ */
+#ifndef MBEDTLS_GET_UINT32_LE
+#define MBEDTLS_GET_UINT32_LE( data, offset )                   \
+    (                                                           \
+          ( (uint32_t) ( data )[( offset )    ]       )         \
+        | ( (uint32_t) ( data )[( offset ) + 1] <<  8 )         \
+        | ( (uint32_t) ( data )[( offset ) + 2] << 16 )         \
+        | ( (uint32_t) ( data )[( offset ) + 3] << 24 )         \
+    )
+#endif
+
+/**
+ * Put in memory a 32 bits unsigned integer in little-endian order.
+ *
+ * \param   n       32 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 32
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the least significant
+ *                  byte of the 32 bits unsigned integer \p n.
+ */
+#ifndef MBEDTLS_PUT_UINT32_LE
+#define MBEDTLS_PUT_UINT32_LE( n, data, offset )                \
+{                                                               \
+    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
+    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
+    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
+    ( data )[( offset ) + 3] = MBEDTLS_BYTE_3( n );             \
+}
+#endif
+
+/**
+ * Get the unsigned 16 bits integer corresponding to two bytes in
+ * little-endian order (LSB first).
+ *
+ * \param   data    Base address of the memory to get the two bytes from.
+ * \param   offset  Offset from \p data of the first and least significant
+ *                  byte of the two bytes to build the 16 bits unsigned
+ *                  integer from.
+ */
+#ifndef MBEDTLS_GET_UINT16_LE
+#define MBEDTLS_GET_UINT16_LE( data, offset )                   \
+    (                                                           \
+          ( (uint16_t) ( data )[( offset )    ]       )         \
+        | ( (uint16_t) ( data )[( offset ) + 1] <<  8 )         \
+    )
+#endif
+
+/**
+ * Put in memory a 16 bits unsigned integer in little-endian order.
+ *
+ * \param   n       16 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 16
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the least significant
+ *                  byte of the 16 bits unsigned integer \p n.
+ */
+#ifndef MBEDTLS_PUT_UINT16_LE
+#define MBEDTLS_PUT_UINT16_LE( n, data, offset )                \
+{                                                               \
+    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
+    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
+}
+#endif
+
+/**
+ * Get the unsigned 16 bits integer corresponding to two bytes in
+ * big-endian order (MSB first).
+ *
+ * \param   data    Base address of the memory to get the two bytes from.
+ * \param   offset  Offset from \p data of the first and most significant
+ *                  byte of the two bytes to build the 16 bits unsigned
+ *                  integer from.
+ */
+#ifndef MBEDTLS_GET_UINT16_BE
+#define MBEDTLS_GET_UINT16_BE( data, offset )                   \
+    (                                                           \
+          ( (uint16_t) ( data )[( offset )    ] << 8 )          \
+        | ( (uint16_t) ( data )[( offset ) + 1]      )          \
+    )
+#endif
+
+/**
+ * Put in memory a 16 bits unsigned integer in big-endian order.
+ *
+ * \param   n       16 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 16
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the most significant
+ *                  byte of the 16 bits unsigned integer \p n.
+ */
+#ifndef MBEDTLS_PUT_UINT16_BE
+#define MBEDTLS_PUT_UINT16_BE( n, data, offset )                \
+{                                                               \
+    ( data )[( offset )    ] = MBEDTLS_BYTE_1( n );             \
+    ( data )[( offset ) + 1] = MBEDTLS_BYTE_0( n );             \
+}
+#endif
+
+/**
+ * Get the unsigned 24 bits integer corresponding to three bytes in
+ * big-endian order (MSB first).
+ *
+ * \param   data    Base address of the memory to get the three bytes from.
+ * \param   offset  Offset from \p data of the first and most significant
+ *                  byte of the three bytes to build the 24 bits unsigned
+ *                  integer from.
+ */
+#ifndef MBEDTLS_GET_UINT24_BE
+#define MBEDTLS_GET_UINT24_BE( data , offset )                  \
+    (                                                           \
+          ( (uint32_t) ( data )[( offset )    ] << 16 )         \
+        | ( (uint32_t) ( data )[( offset ) + 1] << 8  )         \
+        | ( (uint32_t) ( data )[( offset ) + 2]       )         \
+    )
+#endif
+
+/**
+ * Put in memory a 24 bits unsigned integer in big-endian order.
+ *
+ * \param   n       24 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 24
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the most significant
+ *                  byte of the 24 bits unsigned integer \p n.
+ */
+#ifndef MBEDTLS_PUT_UINT24_BE
+#define MBEDTLS_PUT_UINT24_BE( n, data, offset )                \
+{                                                               \
+    ( data )[( offset )    ] = MBEDTLS_BYTE_2( n );             \
+    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
+    ( data )[( offset ) + 2] = MBEDTLS_BYTE_0( n );             \
+}
+#endif
+
+/**
+ * Get the unsigned 24 bits integer corresponding to three bytes in
+ * little-endian order (LSB first).
+ *
+ * \param   data    Base address of the memory to get the three bytes from.
+ * \param   offset  Offset from \p data of the first and least significant
+ *                  byte of the three bytes to build the 24 bits unsigned
+ *                  integer from.
+ */
+#ifndef MBEDTLS_GET_UINT24_LE
+#define MBEDTLS_GET_UINT24_LE( data, offset )                   \
+    (                                                           \
+          ( (uint32_t) ( data )[( offset )    ]       )         \
+        | ( (uint32_t) ( data )[( offset ) + 1] <<  8 )         \
+        | ( (uint32_t) ( data )[( offset ) + 2] << 16 )         \
+    )
+#endif
+
+/**
+ * Put in memory a 24 bits unsigned integer in little-endian order.
+ *
+ * \param   n       24 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 24
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the least significant
+ *                  byte of the 24 bits unsigned integer \p n.
+ */
+#ifndef MBEDTLS_PUT_UINT24_LE
+#define MBEDTLS_PUT_UINT24_LE( n, data, offset )                \
+{                                                               \
+    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
+    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
+    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
+}
+#endif
+
+/**
+ * Get the unsigned 64 bits integer corresponding to eight bytes in
+ * big-endian order (MSB first).
+ *
+ * \param   data    Base address of the memory to get the eight bytes from.
+ * \param   offset  Offset from \p data of the first and most significant
+ *                  byte of the eight bytes to build the 64 bits unsigned
+ *                  integer from.
+ */
+#ifndef MBEDTLS_GET_UINT64_BE
+#define MBEDTLS_GET_UINT64_BE( data, offset )                   \
+    (                                                           \
+          ( (uint64_t) ( data )[( offset )    ] << 56 )         \
+        | ( (uint64_t) ( data )[( offset ) + 1] << 48 )         \
+        | ( (uint64_t) ( data )[( offset ) + 2] << 40 )         \
+        | ( (uint64_t) ( data )[( offset ) + 3] << 32 )         \
+        | ( (uint64_t) ( data )[( offset ) + 4] << 24 )         \
+        | ( (uint64_t) ( data )[( offset ) + 5] << 16 )         \
+        | ( (uint64_t) ( data )[( offset ) + 6] <<  8 )         \
+        | ( (uint64_t) ( data )[( offset ) + 7]       )         \
+    )
+#endif
+
+/**
+ * Put in memory a 64 bits unsigned integer in big-endian order.
+ *
+ * \param   n       64 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 64
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the most significant
+ *                  byte of the 64 bits unsigned integer \p n.
+ */
+#ifndef MBEDTLS_PUT_UINT64_BE
+#define MBEDTLS_PUT_UINT64_BE( n, data, offset )                \
+{                                                               \
+    ( data )[( offset )    ] = MBEDTLS_BYTE_7( n );             \
+    ( data )[( offset ) + 1] = MBEDTLS_BYTE_6( n );             \
+    ( data )[( offset ) + 2] = MBEDTLS_BYTE_5( n );             \
+    ( data )[( offset ) + 3] = MBEDTLS_BYTE_4( n );             \
+    ( data )[( offset ) + 4] = MBEDTLS_BYTE_3( n );             \
+    ( data )[( offset ) + 5] = MBEDTLS_BYTE_2( n );             \
+    ( data )[( offset ) + 6] = MBEDTLS_BYTE_1( n );             \
+    ( data )[( offset ) + 7] = MBEDTLS_BYTE_0( n );             \
+}
+#endif
+
+/**
+ * Get the unsigned 64 bits integer corresponding to eight bytes in
+ * little-endian order (LSB first).
+ *
+ * \param   data    Base address of the memory to get the eight bytes from.
+ * \param   offset  Offset from \p data of the first and least significant
+ *                  byte of the eight bytes to build the 64 bits unsigned
+ *                  integer from.
+ */
+#ifndef MBEDTLS_GET_UINT64_LE
+#define MBEDTLS_GET_UINT64_LE( data, offset )                   \
+    (                                                           \
+          ( (uint64_t) ( data )[( offset ) + 7] << 56 )         \
+        | ( (uint64_t) ( data )[( offset ) + 6] << 48 )         \
+        | ( (uint64_t) ( data )[( offset ) + 5] << 40 )         \
+        | ( (uint64_t) ( data )[( offset ) + 4] << 32 )         \
+        | ( (uint64_t) ( data )[( offset ) + 3] << 24 )         \
+        | ( (uint64_t) ( data )[( offset ) + 2] << 16 )         \
+        | ( (uint64_t) ( data )[( offset ) + 1] <<  8 )         \
+        | ( (uint64_t) ( data )[( offset )    ]       )         \
+    )
+#endif
+
+/**
+ * Put in memory a 64 bits unsigned integer in little-endian order.
+ *
+ * \param   n       64 bits unsigned integer to put in memory.
+ * \param   data    Base address of the memory where to put the 64
+ *                  bits unsigned integer in.
+ * \param   offset  Offset from \p data where to put the least significant
+ *                  byte of the 64 bits unsigned integer \p n.
+ */
+#ifndef MBEDTLS_PUT_UINT64_LE
+#define MBEDTLS_PUT_UINT64_LE( n, data, offset )                \
+{                                                               \
+    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
+    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
+    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
+    ( data )[( offset ) + 3] = MBEDTLS_BYTE_3( n );             \
+    ( data )[( offset ) + 4] = MBEDTLS_BYTE_4( n );             \
+    ( data )[( offset ) + 5] = MBEDTLS_BYTE_5( n );             \
+    ( data )[( offset ) + 6] = MBEDTLS_BYTE_6( n );             \
+    ( data )[( offset ) + 7] = MBEDTLS_BYTE_7( n );             \
+}
+#endif
+
+#endif /* MBEDTLS_LIBRARY_ALIGNMENT_H */
diff --git a/library/common.h b/library/common.h
index 6ea89789b..63c936fe5 100644
--- a/library/common.h
+++ b/library/common.h
@@ -24,6 +24,7 @@
 #define MBEDTLS_LIBRARY_COMMON_H
 
 #include "mbedtls/build_info.h"
+#include "alignment.h"
 
 #include <stdint.h>
 #include <stddef.h>
@@ -74,353 +75,6 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
  */
 #define MBEDTLS_ALLOW_PRIVATE_ACCESS
 
-/** MBEDTLS_ALLOW_UNALIGNED_ACCESS is defined for architectures where unaligned
- * memory accesses are safe and performant.
- *
- * Unaligned accesses must be made via the UNALIGNED_UINT32_T type
- * defined here.
- *
- * This list is incomplete.
- */
-#if defined(__i386__) || defined(__amd64__) || defined( __x86_64__) \
-    || defined(__ARM_FEATURE_UNALIGNED) \
-    || defined(__aarch64__) \
-    || defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8M__) \
-    || defined(__ARM_ARCH_7A__)
-#if (defined(__GNUC__) && __GNUC__ >= 4) \
-    || (defined(__clang__) && __has_attribute(aligned)) \
-    || (defined(__ARMCC_VERSION) && __ARMCC_VERSION >= 5000000 )
-#define MBEDTLS_ALLOW_UNALIGNED_ACCESS
-__attribute__((aligned(1))) typedef uint32_t mbedtls_unaligned_uint32_t;
-#define UNALIGNED_UINT32_T mbedtls_unaligned_uint32_t
-#elif defined(_MSC_VER)
-#define MBEDTLS_ALLOW_UNALIGNED_ACCESS
-#define UNALIGNED_UINT32_T __declspec(align(1)) uint32_t
-#endif
-#endif
-
-/** Byte Reading Macros
- *
- * Given a multi-byte integer \p x, MBEDTLS_BYTE_n retrieves the n-th
- * byte from x, where byte 0 is the least significant byte.
- */
-#define MBEDTLS_BYTE_0( x ) ( (uint8_t) (   ( x )         & 0xff ) )
-#define MBEDTLS_BYTE_1( x ) ( (uint8_t) ( ( ( x ) >> 8  ) & 0xff ) )
-#define MBEDTLS_BYTE_2( x ) ( (uint8_t) ( ( ( x ) >> 16 ) & 0xff ) )
-#define MBEDTLS_BYTE_3( x ) ( (uint8_t) ( ( ( x ) >> 24 ) & 0xff ) )
-#define MBEDTLS_BYTE_4( x ) ( (uint8_t) ( ( ( x ) >> 32 ) & 0xff ) )
-#define MBEDTLS_BYTE_5( x ) ( (uint8_t) ( ( ( x ) >> 40 ) & 0xff ) )
-#define MBEDTLS_BYTE_6( x ) ( (uint8_t) ( ( ( x ) >> 48 ) & 0xff ) )
-#define MBEDTLS_BYTE_7( x ) ( (uint8_t) ( ( ( x ) >> 56 ) & 0xff ) )
-
-/**
- * Get the unsigned 32 bits integer corresponding to four bytes in
- * big-endian order (MSB first).
- *
- * \param   data    Base address of the memory to get the four bytes from.
- * \param   offset  Offset from \p data of the first and most significant
- *                  byte of the four bytes to build the 32 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT32_BE
-#define MBEDTLS_GET_UINT32_BE( data , offset )                  \
-    (                                                           \
-          ( (uint32_t) ( data )[( offset )    ] << 24 )         \
-        | ( (uint32_t) ( data )[( offset ) + 1] << 16 )         \
-        | ( (uint32_t) ( data )[( offset ) + 2] <<  8 )         \
-        | ( (uint32_t) ( data )[( offset ) + 3]       )         \
-    )
-#endif
-
-/**
- * Put in memory a 32 bits unsigned integer in big-endian order.
- *
- * \param   n       32 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 32
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the most significant
- *                  byte of the 32 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT32_BE
-#define MBEDTLS_PUT_UINT32_BE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_3( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_0( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 32 bits integer corresponding to four bytes in
- * little-endian order (LSB first).
- *
- * \param   data    Base address of the memory to get the four bytes from.
- * \param   offset  Offset from \p data of the first and least significant
- *                  byte of the four bytes to build the 32 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT32_LE
-#define MBEDTLS_GET_UINT32_LE( data, offset )                   \
-    (                                                           \
-          ( (uint32_t) ( data )[( offset )    ]       )         \
-        | ( (uint32_t) ( data )[( offset ) + 1] <<  8 )         \
-        | ( (uint32_t) ( data )[( offset ) + 2] << 16 )         \
-        | ( (uint32_t) ( data )[( offset ) + 3] << 24 )         \
-    )
-#endif
-
-/**
- * Put in memory a 32 bits unsigned integer in little-endian order.
- *
- * \param   n       32 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 32
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the least significant
- *                  byte of the 32 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT32_LE
-#define MBEDTLS_PUT_UINT32_LE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_3( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 16 bits integer corresponding to two bytes in
- * little-endian order (LSB first).
- *
- * \param   data    Base address of the memory to get the two bytes from.
- * \param   offset  Offset from \p data of the first and least significant
- *                  byte of the two bytes to build the 16 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT16_LE
-#define MBEDTLS_GET_UINT16_LE( data, offset )                   \
-    (                                                           \
-          ( (uint16_t) ( data )[( offset )    ]       )         \
-        | ( (uint16_t) ( data )[( offset ) + 1] <<  8 )         \
-    )
-#endif
-
-/**
- * Put in memory a 16 bits unsigned integer in little-endian order.
- *
- * \param   n       16 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 16
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the least significant
- *                  byte of the 16 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT16_LE
-#define MBEDTLS_PUT_UINT16_LE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 16 bits integer corresponding to two bytes in
- * big-endian order (MSB first).
- *
- * \param   data    Base address of the memory to get the two bytes from.
- * \param   offset  Offset from \p data of the first and most significant
- *                  byte of the two bytes to build the 16 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT16_BE
-#define MBEDTLS_GET_UINT16_BE( data, offset )                   \
-    (                                                           \
-          ( (uint16_t) ( data )[( offset )    ] << 8 )          \
-        | ( (uint16_t) ( data )[( offset ) + 1]      )          \
-    )
-#endif
-
-/**
- * Put in memory a 16 bits unsigned integer in big-endian order.
- *
- * \param   n       16 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 16
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the most significant
- *                  byte of the 16 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT16_BE
-#define MBEDTLS_PUT_UINT16_BE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_0( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 24 bits integer corresponding to three bytes in
- * big-endian order (MSB first).
- *
- * \param   data    Base address of the memory to get the three bytes from.
- * \param   offset  Offset from \p data of the first and most significant
- *                  byte of the three bytes to build the 24 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT24_BE
-#define MBEDTLS_GET_UINT24_BE( data , offset )                  \
-    (                                                           \
-          ( (uint32_t) ( data )[( offset )    ] << 16 )         \
-        | ( (uint32_t) ( data )[( offset ) + 1] << 8  )         \
-        | ( (uint32_t) ( data )[( offset ) + 2]       )         \
-    )
-#endif
-
-/**
- * Put in memory a 24 bits unsigned integer in big-endian order.
- *
- * \param   n       24 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 24
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the most significant
- *                  byte of the 24 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT24_BE
-#define MBEDTLS_PUT_UINT24_BE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_0( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 24 bits integer corresponding to three bytes in
- * little-endian order (LSB first).
- *
- * \param   data    Base address of the memory to get the three bytes from.
- * \param   offset  Offset from \p data of the first and least significant
- *                  byte of the three bytes to build the 24 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT24_LE
-#define MBEDTLS_GET_UINT24_LE( data, offset )                   \
-    (                                                           \
-          ( (uint32_t) ( data )[( offset )    ]       )         \
-        | ( (uint32_t) ( data )[( offset ) + 1] <<  8 )         \
-        | ( (uint32_t) ( data )[( offset ) + 2] << 16 )         \
-    )
-#endif
-
-/**
- * Put in memory a 24 bits unsigned integer in little-endian order.
- *
- * \param   n       24 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 24
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the least significant
- *                  byte of the 24 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT24_LE
-#define MBEDTLS_PUT_UINT24_LE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 64 bits integer corresponding to eight bytes in
- * big-endian order (MSB first).
- *
- * \param   data    Base address of the memory to get the eight bytes from.
- * \param   offset  Offset from \p data of the first and most significant
- *                  byte of the eight bytes to build the 64 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT64_BE
-#define MBEDTLS_GET_UINT64_BE( data, offset )                   \
-    (                                                           \
-          ( (uint64_t) ( data )[( offset )    ] << 56 )         \
-        | ( (uint64_t) ( data )[( offset ) + 1] << 48 )         \
-        | ( (uint64_t) ( data )[( offset ) + 2] << 40 )         \
-        | ( (uint64_t) ( data )[( offset ) + 3] << 32 )         \
-        | ( (uint64_t) ( data )[( offset ) + 4] << 24 )         \
-        | ( (uint64_t) ( data )[( offset ) + 5] << 16 )         \
-        | ( (uint64_t) ( data )[( offset ) + 6] <<  8 )         \
-        | ( (uint64_t) ( data )[( offset ) + 7]       )         \
-    )
-#endif
-
-/**
- * Put in memory a 64 bits unsigned integer in big-endian order.
- *
- * \param   n       64 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 64
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the most significant
- *                  byte of the 64 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT64_BE
-#define MBEDTLS_PUT_UINT64_BE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_7( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_6( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_5( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_4( n );             \
-    ( data )[( offset ) + 4] = MBEDTLS_BYTE_3( n );             \
-    ( data )[( offset ) + 5] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 6] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 7] = MBEDTLS_BYTE_0( n );             \
-}
-#endif
-
-/**
- * Get the unsigned 64 bits integer corresponding to eight bytes in
- * little-endian order (LSB first).
- *
- * \param   data    Base address of the memory to get the eight bytes from.
- * \param   offset  Offset from \p data of the first and least significant
- *                  byte of the eight bytes to build the 64 bits unsigned
- *                  integer from.
- */
-#ifndef MBEDTLS_GET_UINT64_LE
-#define MBEDTLS_GET_UINT64_LE( data, offset )                   \
-    (                                                           \
-          ( (uint64_t) ( data )[( offset ) + 7] << 56 )         \
-        | ( (uint64_t) ( data )[( offset ) + 6] << 48 )         \
-        | ( (uint64_t) ( data )[( offset ) + 5] << 40 )         \
-        | ( (uint64_t) ( data )[( offset ) + 4] << 32 )         \
-        | ( (uint64_t) ( data )[( offset ) + 3] << 24 )         \
-        | ( (uint64_t) ( data )[( offset ) + 2] << 16 )         \
-        | ( (uint64_t) ( data )[( offset ) + 1] <<  8 )         \
-        | ( (uint64_t) ( data )[( offset )    ]       )         \
-    )
-#endif
-
-/**
- * Put in memory a 64 bits unsigned integer in little-endian order.
- *
- * \param   n       64 bits unsigned integer to put in memory.
- * \param   data    Base address of the memory where to put the 64
- *                  bits unsigned integer in.
- * \param   offset  Offset from \p data where to put the least significant
- *                  byte of the 64 bits unsigned integer \p n.
- */
-#ifndef MBEDTLS_PUT_UINT64_LE
-#define MBEDTLS_PUT_UINT64_LE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_3( n );             \
-    ( data )[( offset ) + 4] = MBEDTLS_BYTE_4( n );             \
-    ( data )[( offset ) + 5] = MBEDTLS_BYTE_5( n );             \
-    ( data )[( offset ) + 6] = MBEDTLS_BYTE_6( n );             \
-    ( data )[( offset ) + 7] = MBEDTLS_BYTE_7( n );             \
-}
-#endif
-
 /**
  * Perform a fast block XOR operation, such that
  * r[i] = a[i] ^ b[i] where 0 <= i < n

From 96d61d14d85fa2b7492b2738ee417a5e8fce0961 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 24 Nov 2022 19:33:22 +0000
Subject: [PATCH 30/56] Use memcpy for unaligned accesses

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h | 50 ++++++++++++++++++++-------------------------
 library/common.h    | 18 +++++-----------
 2 files changed, 27 insertions(+), 41 deletions(-)

diff --git a/library/alignment.h b/library/alignment.h
index 60e5dba13..6394be2f3 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -24,38 +24,32 @@
 #define MBEDTLS_LIBRARY_ALIGNMENT_H
 
 #include <stdint.h>
+#include <string.h>
 
-/** MBEDTLS_ALLOW_UNALIGNED_ACCESS is defined for architectures where unaligned
- * memory accesses are safe and performant.
+/**
+ * Read the unsigned 32 bits integer from the given address, which need not
+ * be aligned.
  *
- * Unaligned accesses must be made via the UNALIGNED_UINT32_T type
- * defined here.
- *
- * This list is incomplete.
+ * \param   p pointer to 4 bytes of data
  */
-#if defined(__i386__) || defined(__amd64__) || defined( __x86_64__) \
-    || defined(__ARM_FEATURE_UNALIGNED) \
-    || defined(__aarch64__) \
-    || defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8M__) \
-    || defined(__ARM_ARCH_7A__)
-#if (defined(__GNUC__) && __GNUC__ >= 4) \
-    || (defined(__clang__) && __has_attribute(aligned)) \
-    || (defined(__ARMCC_VERSION) && __ARMCC_VERSION >= 5000000 )
-    /* GCC, Clang and armcc */
-#define MBEDTLS_ALLOW_UNALIGNED_ACCESS
-__attribute__((aligned(1))) typedef uint32_t mbedtls_unaligned_uint32_t;
-#define UNALIGNED_UINT32_T mbedtls_unaligned_uint32_t
-#elif defined(_MSC_VER)
-    /* MSVC */
-#define MBEDTLS_ALLOW_UNALIGNED_ACCESS
-#define UNALIGNED_UINT32_T __declspec(align(1)) uint32_t
-#elif (defined __ICCARM__)
-    /* IAR - this is disabled until we have the opportunity to test it */
-#undef MBEDTLS_ALLOW_UNALIGNED_ACCESS
-#define UNALIGNED_UINT32_T _Pragma("data_alignment = 1") uint32_t
-#endif
-#endif
+static inline uint32_t mbedtls_get_unaligned_uint32( void const *p )
+{
+    uint32_t r;
+    memcpy( &r, p, 4 );
+    return r;
+}
 
+/**
+ * Write the unsigned 32 bits integer to the given address, which need not
+ * be aligned.
+ *
+ * \param   p pointer to 4 bytes of data
+ * \param   x data to write
+ */
+static inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x )
+{
+    memcpy(p, &x, 4);
+}
 
 /** Byte Reading Macros
  *
diff --git a/library/common.h b/library/common.h
index 63c936fe5..9dbe21b88 100644
--- a/library/common.h
+++ b/library/common.h
@@ -88,24 +88,16 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
  */
 inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n )
 {
-#if defined(MBEDTLS_ALLOW_UNALIGNED_ACCESS)
-     UNALIGNED_UINT32_T *a32 = (uint32_t *)a;
-     UNALIGNED_UINT32_T *b32 = (uint32_t *)b;
-     UNALIGNED_UINT32_T *r32 = (uint32_t *)r;
-    for ( size_t i = 0; i < ( n >> 2 ); i++ )
+    size_t i;
+    for ( i = 0; (i + 4) < n; i+= 4 )
     {
-        r32[i] = a32[i] ^ b32[i];
+        uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
+        mbedtls_put_unaligned_uint32(r + i, x);
     }
-    for ( size_t i = n - ( n % 4 ) ; i < n; i++ )
+    for ( ; i < n; i++ )
     {
         r[i] = a[i] ^ b[i];
     }
-#else
-    for ( size_t i = 0; i < n; i++ )
-    {
-        r[i] = a[i] ^ b[i];
-    }
-#endif
 }
 
 /* Fix MSVC C99 compatible issue

From 069e7f462a426795301aaaa79e811721427447df Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 24 Nov 2022 19:37:26 +0000
Subject: [PATCH 31/56] Correct mixed up comments

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/aes.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/library/aes.c b/library/aes.c
index 4f7c88859..6e1bcd2be 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -1140,14 +1140,14 @@ int mbedtls_aes_crypt_xts( mbedtls_aes_xts_context *ctx,
         size_t i;
         unsigned char *prev_output = output - 16;
 
-        /* Copy the remainder of the input for this final round. */
+        /* Copy ciphertext bytes from the previous block to our output for each
+         * byte of ciphertext we won't steal. */
         for( i = 0; i < leftover; i++ )
         {
             output[i] = prev_output[i];
         }
 
-        /* Copy ciphertext bytes from the previous block to our output for each
-         * byte of ciphertext we won't steal. */
+        /* Copy the remainder of the input for this final round. */
         mbedtls_xor( tmp, input, t, leftover );
 
         /* Copy ciphertext bytes from the previous block for input in this

From 4b910c1ed1313678ab894222cd2a4e05a5641b5e Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 24 Nov 2022 19:44:52 +0000
Subject: [PATCH 32/56] Fix whitespace

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h | 2 +-
 library/common.h    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/library/alignment.h b/library/alignment.h
index 6394be2f3..a5b71d306 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -48,7 +48,7 @@ static inline uint32_t mbedtls_get_unaligned_uint32( void const *p )
  */
 static inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x )
 {
-    memcpy(p, &x, 4);
+    memcpy( p, &x, 4 );
 }
 
 /** Byte Reading Macros
diff --git a/library/common.h b/library/common.h
index 9dbe21b88..943f84c48 100644
--- a/library/common.h
+++ b/library/common.h
@@ -89,10 +89,10 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
 inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n )
 {
     size_t i;
-    for ( i = 0; (i + 4) < n; i+= 4 )
+    for ( i = 0; ( i + 4 ) < n; i+= 4 )
     {
-        uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
-        mbedtls_put_unaligned_uint32(r + i, x);
+        uint32_t x = mbedtls_get_unaligned_uint32( a + i ) ^ mbedtls_get_unaligned_uint32( b + i );
+        mbedtls_put_unaligned_uint32( r + i, x );
     }
     for ( ; i < n; i++ )
     {

From 66433444fcd6fcd45b4d96c9a95e0a97e0ca3135 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 24 Nov 2022 20:07:39 +0000
Subject: [PATCH 33/56] Fix static inline linker issues

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h     | 4 ++--
 library/platform_util.c | 8 ++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/library/alignment.h b/library/alignment.h
index a5b71d306..755cde384 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -32,7 +32,7 @@
  *
  * \param   p pointer to 4 bytes of data
  */
-static inline uint32_t mbedtls_get_unaligned_uint32( void const *p )
+inline uint32_t mbedtls_get_unaligned_uint32( void const *p )
 {
     uint32_t r;
     memcpy( &r, p, 4 );
@@ -46,7 +46,7 @@ static inline uint32_t mbedtls_get_unaligned_uint32( void const *p )
  * \param   p pointer to 4 bytes of data
  * \param   x data to write
  */
-static inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x )
+inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x )
 {
     memcpy( p, &x, 4 );
 }
diff --git a/library/platform_util.c b/library/platform_util.c
index 8fba664d2..7843609e1 100644
--- a/library/platform_util.c
+++ b/library/platform_util.c
@@ -144,7 +144,11 @@ void (*mbedtls_test_hook_test_fail)( const char *, int, const char *);
 #endif /* MBEDTLS_TEST_HOOKS */
 
 /*
- * Provide an external definition of mbedtls_xor so that the compiler
- * has the option to not inline it
+ * Provide external definitions of some inline functions so that the compiler
+ * has the option to not inline them
  */
 extern inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n );
+
+extern inline uint32_t mbedtls_get_unaligned_uint32( void const *p );
+
+extern inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x );
\ No newline at end of file

From c58858865b1b706ff2c1699b215d76bf827a78a1 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 24 Nov 2022 20:35:04 +0000
Subject: [PATCH 34/56] Fix off-by-one error

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/common.h b/library/common.h
index 943f84c48..c17506416 100644
--- a/library/common.h
+++ b/library/common.h
@@ -89,7 +89,7 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
 inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n )
 {
     size_t i;
-    for ( i = 0; ( i + 4 ) < n; i+= 4 )
+    for ( i = 0; ( i + 4 ) <= n; i += 4 )
     {
         uint32_t x = mbedtls_get_unaligned_uint32( a + i ) ^ mbedtls_get_unaligned_uint32( b + i );
         mbedtls_put_unaligned_uint32( r + i, x );

From aaf69fd6826d9c5c6ffa05a220ad1674602fcee2 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 24 Nov 2022 20:40:28 +0000
Subject: [PATCH 35/56] Fix missing newline

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/platform_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/platform_util.c b/library/platform_util.c
index 7843609e1..9d7323b3a 100644
--- a/library/platform_util.c
+++ b/library/platform_util.c
@@ -151,4 +151,4 @@ extern inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsign
 
 extern inline uint32_t mbedtls_get_unaligned_uint32( void const *p );
 
-extern inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x );
\ No newline at end of file
+extern inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x );

From 875d2383d026f8c6adc8a00c0fdfc6f327ea29e3 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 24 Nov 2022 20:43:15 +0000
Subject: [PATCH 36/56] Improve documentation

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/library/alignment.h b/library/alignment.h
index 755cde384..dedfdc9e7 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -31,6 +31,7 @@
  * be aligned.
  *
  * \param   p pointer to 4 bytes of data
+ * \return  Data at the given address
  */
 inline uint32_t mbedtls_get_unaligned_uint32( void const *p )
 {

From 7a910a8be0b4ea105223cd51a6e701d496d0756e Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 24 Nov 2022 21:17:40 +0000
Subject: [PATCH 37/56] Minor formatting tweaks

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h                     | 6 +++---
 library/common.h                        | 2 +-
 tests/suites/test_suite_common.function | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/library/alignment.h b/library/alignment.h
index dedfdc9e7..e7f11da40 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -33,10 +33,10 @@
  * \param   p pointer to 4 bytes of data
  * \return  Data at the given address
  */
-inline uint32_t mbedtls_get_unaligned_uint32( void const *p )
+inline uint32_t mbedtls_get_unaligned_uint32( const void *p )
 {
     uint32_t r;
-    memcpy( &r, p, 4 );
+    memcpy( &r, p, sizeof( r ) );
     return r;
 }
 
@@ -49,7 +49,7 @@ inline uint32_t mbedtls_get_unaligned_uint32( void const *p )
  */
 inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x )
 {
-    memcpy( p, &x, 4 );
+    memcpy( p, &x, sizeof( x ) );
 }
 
 /** Byte Reading Macros
diff --git a/library/common.h b/library/common.h
index c17506416..2cc44fb71 100644
--- a/library/common.h
+++ b/library/common.h
@@ -86,7 +86,7 @@ extern void (*mbedtls_test_hook_test_fail)( const char * test, int line, const c
  * \param   b Pointer to input (buffer of at least \p n bytes)
  * \param   n Number of bytes to process.
  */
-inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n )
+inline void mbedtls_xor( unsigned char *r, const unsigned char *a, const unsigned char *b, size_t n )
 {
     size_t i;
     for ( i = 0; ( i + 4 ) <= n; i += 4 )
diff --git a/tests/suites/test_suite_common.function b/tests/suites/test_suite_common.function
index c8a7a52ab..4444a52ce 100644
--- a/tests/suites/test_suite_common.function
+++ b/tests/suites/test_suite_common.function
@@ -82,9 +82,9 @@ void mbedtls_xor( int len )
         ASSERT_COMPARE( r1 + r_off, n, r2 + r_off, n );
     }
 exit:
-    if ( a != NULL ) mbedtls_free( a );
-    if ( a != NULL ) mbedtls_free( b );
-    if ( r1 != NULL ) mbedtls_free( r1 );
-    if ( r2 != NULL ) mbedtls_free( r2 );
+    mbedtls_free( a );
+    mbedtls_free( b );
+    mbedtls_free( r1 );
+    mbedtls_free( r2 );
 }
 /* END_CASE */

From b8c4a0d940421c08253234e0207721076fe50bb4 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 24 Nov 2022 21:18:55 +0000
Subject: [PATCH 38/56] Minor formatting tweaks

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/platform_util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/library/platform_util.c b/library/platform_util.c
index 9d7323b3a..9c18dd502 100644
--- a/library/platform_util.c
+++ b/library/platform_util.c
@@ -147,8 +147,8 @@ void (*mbedtls_test_hook_test_fail)( const char *, int, const char *);
  * Provide external definitions of some inline functions so that the compiler
  * has the option to not inline them
  */
-extern inline void mbedtls_xor( unsigned char *r, unsigned char const *a, unsigned char const *b, size_t n );
+extern inline void mbedtls_xor( unsigned char *r, const unsigned char *a, const unsigned char *b, size_t n );
 
-extern inline uint32_t mbedtls_get_unaligned_uint32( void const *p );
+extern inline uint32_t mbedtls_get_unaligned_uint32( const void *p );
 
 extern inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x );

From 8f6583d8363086433cb1d8a86f5683589e9f9a74 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Fri, 25 Nov 2022 09:16:41 +0000
Subject: [PATCH 39/56] Fix for MSVC unsupported #inline keyword

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/library/alignment.h b/library/alignment.h
index e7f11da40..986fdaa95 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -26,6 +26,11 @@
 #include <stdint.h>
 #include <string.h>
 
+#if ( defined(__ARMCC_VERSION) || defined(_MSC_VER) ) && \
+     !defined(inline) && !defined(__cplusplus)
+#define inline __inline
+#endif
+
 /**
  * Read the unsigned 32 bits integer from the given address, which need not
  * be aligned.

From a616afeae41225b7e79ae8f872f2fda8ab880ab8 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Fri, 25 Nov 2022 17:11:45 +0000
Subject: [PATCH 40/56] Remove redundant inline workarounds

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h | 5 +----
 library/common.h    | 5 -----
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/library/alignment.h b/library/alignment.h
index 986fdaa95..de1ab9148 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -26,10 +26,7 @@
 #include <stdint.h>
 #include <string.h>
 
-#if ( defined(__ARMCC_VERSION) || defined(_MSC_VER) ) && \
-     !defined(inline) && !defined(__cplusplus)
-#define inline __inline
-#endif
+#include "mbedtls/build_info.h"
 
 /**
  * Read the unsigned 32 bits integer from the given address, which need not
diff --git a/library/common.h b/library/common.h
index 97d180aef..9d3b8fe93 100644
--- a/library/common.h
+++ b/library/common.h
@@ -30,11 +30,6 @@
 #include <stdint.h>
 #include <stddef.h>
 
-#if ( defined(__ARMCC_VERSION) || defined(_MSC_VER) ) && \
-     !defined(inline) && !defined(__cplusplus)
-#define inline __inline
-#endif
-
 /** Helper to define a function as static except when building invasive tests.
  *
  * If a function is only used inside its own source file and should be

From a360e1987aa369ff6f58b534fcbd09c932c07907 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Mon, 28 Nov 2022 14:44:05 +0000
Subject: [PATCH 41/56] Add efficent unaligned get/put functions

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h     | 52 +++++++++++++++++++++++++++++++++++++++++
 library/platform_util.c |  8 +++++++
 2 files changed, 60 insertions(+)

diff --git a/library/alignment.h b/library/alignment.h
index de1ab9148..3539c9175 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -28,6 +28,32 @@
 
 #include "mbedtls/build_info.h"
 
+/**
+ * Read the unsigned 16 bits integer from the given address, which need not
+ * be aligned.
+ *
+ * \param   p pointer to 2 bytes of data
+ * \return  Data at the given address
+ */
+inline uint16_t mbedtls_get_unaligned_uint16( const void *p )
+{
+    uint16_t r;
+    memcpy( &r, p, sizeof( r ) );
+    return r;
+}
+
+/**
+ * Write the unsigned 16 bits integer to the given address, which need not
+ * be aligned.
+ *
+ * \param   p pointer to 2 bytes of data
+ * \param   x data to write
+ */
+inline void mbedtls_put_unaligned_uint16( void *p, uint16_t x )
+{
+    memcpy( p, &x, sizeof( x ) );
+}
+
 /**
  * Read the unsigned 32 bits integer from the given address, which need not
  * be aligned.
@@ -54,6 +80,32 @@ inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x )
     memcpy( p, &x, sizeof( x ) );
 }
 
+/**
+ * Read the unsigned 64 bits integer from the given address, which need not
+ * be aligned.
+ *
+ * \param   p pointer to 8 bytes of data
+ * \return  Data at the given address
+ */
+inline uint64_t mbedtls_get_unaligned_uint64( const void *p )
+{
+    uint64_t r;
+    memcpy( &r, p, sizeof( r ) );
+    return r;
+}
+
+/**
+ * Write the unsigned 64 bits integer to the given address, which need not
+ * be aligned.
+ *
+ * \param   p pointer to 8 bytes of data
+ * \param   x data to write
+ */
+inline void mbedtls_put_unaligned_uint64( void *p, uint64_t x )
+{
+    memcpy( p, &x, sizeof( x ) );
+}
+
 /** Byte Reading Macros
  *
  * Given a multi-byte integer \p x, MBEDTLS_BYTE_n retrieves the n-th
diff --git a/library/platform_util.c b/library/platform_util.c
index 9c18dd502..2b674f62e 100644
--- a/library/platform_util.c
+++ b/library/platform_util.c
@@ -149,6 +149,14 @@ void (*mbedtls_test_hook_test_fail)( const char *, int, const char *);
  */
 extern inline void mbedtls_xor( unsigned char *r, const unsigned char *a, const unsigned char *b, size_t n );
 
+extern inline uint16_t mbedtls_get_unaligned_uint16( const void *p );
+
+extern inline void mbedtls_put_unaligned_uint16( void *p, uint16_t x );
+
 extern inline uint32_t mbedtls_get_unaligned_uint32( const void *p );
 
 extern inline void mbedtls_put_unaligned_uint32( void *p, uint32_t x );
+
+extern inline uint64_t mbedtls_get_unaligned_uint64( const void *p );
+
+extern inline void mbedtls_put_unaligned_uint64( void *p, uint64_t x );

From e5c42594e552173692ca0f4fb3abab821e5e7434 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Mon, 28 Nov 2022 14:47:46 +0000
Subject: [PATCH 42/56] Add byte order detection macro

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/library/alignment.h b/library/alignment.h
index 3539c9175..3f8926e29 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -120,6 +120,13 @@ inline void mbedtls_put_unaligned_uint64( void *p, uint64_t x )
 #define MBEDTLS_BYTE_6( x ) ( (uint8_t) ( ( ( x ) >> 48 ) & 0xff ) )
 #define MBEDTLS_BYTE_7( x ) ( (uint8_t) ( ( ( x ) >> 56 ) & 0xff ) )
 
+#if !defined(__BYTE_ORDER__)
+static const uint16_t mbedtls_byte_order_detector = { 0x100 };
+#define MBEDTLS_IS_BIG_ENDIAN (*((unsigned char *) (&mbedtls_byte_order_detector)) == 0x01)
+#else
+#define MBEDTLS_IS_BIG_ENDIAN ((__BYTE_ORDER__) == (__ORDER_BIG_ENDIAN__))
+#endif /* !defined(__BYTE_ORDER__) */
+
 /**
  * Get the unsigned 32 bits integer corresponding to four bytes in
  * big-endian order (MSB first).

From 6298b24127cf4df20ff24890b9ab0d0a6253e70c Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Mon, 28 Nov 2022 14:51:49 +0000
Subject: [PATCH 43/56] Add byteswap routines

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/library/alignment.h b/library/alignment.h
index 3f8926e29..3d38946d4 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -120,6 +120,35 @@ inline void mbedtls_put_unaligned_uint64( void *p, uint64_t x )
 #define MBEDTLS_BYTE_6( x ) ( (uint8_t) ( ( ( x ) >> 48 ) & 0xff ) )
 #define MBEDTLS_BYTE_7( x ) ( (uint8_t) ( ( ( x ) >> 56 ) & 0xff ) )
 
+static inline uint16_t mbedtls_bswap16( uint16_t x ) {
+    return
+         ( x & 0x00ff ) << 8 |
+         ( x & 0xff00 ) >> 8;
+}
+#define MBEDTLS_BSWAP16 mbedtls_bswap16
+
+static inline uint32_t mbedtls_bswap32( uint32_t x ) {
+    return
+         ( x & 0x000000ff ) << 24 |
+         ( x & 0x0000ff00 ) <<  8 |
+         ( x & 0x00ff0000 ) >>  8 |
+         ( x & 0xff000000 ) >> 24;
+}
+#define MBEDTLS_BSWAP32 mbedtls_bswap32
+
+static inline uint64_t mbedtls_bswap64( uint64_t x ) {
+    return
+         ( x & 0x00000000000000ff ) << 56 |
+         ( x & 0x000000000000ff00 ) << 40 |
+         ( x & 0x0000000000ff0000 ) << 24 |
+         ( x & 0x00000000ff000000 ) <<  8 |
+         ( x & 0x000000ff00000000 ) >>  8 |
+         ( x & 0x0000ff0000000000 ) >> 24 |
+         ( x & 0x00ff000000000000 ) >> 40 |
+         ( x & 0xff00000000000000 ) >> 56;
+}
+#define MBEDTLS_BSWAP64 mbedtls_bswap64
+
 #if !defined(__BYTE_ORDER__)
 static const uint16_t mbedtls_byte_order_detector = { 0x100 };
 #define MBEDTLS_IS_BIG_ENDIAN (*((unsigned char *) (&mbedtls_byte_order_detector)) == 0x01)

From a5110b0d79c094b2a1e99e298930a9d8df22c874 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Mon, 28 Nov 2022 14:48:45 +0000
Subject: [PATCH 44/56] Make use of efficient unaligned access functions

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h | 197 +++++++++++++++++++-------------------------
 1 file changed, 85 insertions(+), 112 deletions(-)

diff --git a/library/alignment.h b/library/alignment.h
index 3d38946d4..7638f9438 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -165,15 +165,11 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  *                  byte of the four bytes to build the 32 bits unsigned
  *                  integer from.
  */
-#ifndef MBEDTLS_GET_UINT32_BE
-#define MBEDTLS_GET_UINT32_BE( data , offset )                  \
-    (                                                           \
-          ( (uint32_t) ( data )[( offset )    ] << 24 )         \
-        | ( (uint32_t) ( data )[( offset ) + 1] << 16 )         \
-        | ( (uint32_t) ( data )[( offset ) + 2] <<  8 )         \
-        | ( (uint32_t) ( data )[( offset ) + 3]       )         \
+#define MBEDTLS_GET_UINT32_BE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? mbedtls_get_unaligned_uint32((data) + (offset))                  \
+        : MBEDTLS_BSWAP32(mbedtls_get_unaligned_uint32((data) + (offset))) \
     )
-#endif
 
 /**
  * Put in memory a 32 bits unsigned integer in big-endian order.
@@ -184,15 +180,17 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  * \param   offset  Offset from \p data where to put the most significant
  *                  byte of the 32 bits unsigned integer \p n.
  */
-#ifndef MBEDTLS_PUT_UINT32_BE
-#define MBEDTLS_PUT_UINT32_BE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_3( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_0( n );             \
+#define MBEDTLS_PUT_UINT32_BE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint32((data) + (offset), (uint32_t)(n));      \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint32((data) + (offset), MBEDTLS_BSWAP32((uint32_t)(n))); \
+    }                                                                        \
 }
-#endif
 
 /**
  * Get the unsigned 32 bits integer corresponding to four bytes in
@@ -203,15 +201,12 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  *                  byte of the four bytes to build the 32 bits unsigned
  *                  integer from.
  */
-#ifndef MBEDTLS_GET_UINT32_LE
-#define MBEDTLS_GET_UINT32_LE( data, offset )                   \
-    (                                                           \
-          ( (uint32_t) ( data )[( offset )    ]       )         \
-        | ( (uint32_t) ( data )[( offset ) + 1] <<  8 )         \
-        | ( (uint32_t) ( data )[( offset ) + 2] << 16 )         \
-        | ( (uint32_t) ( data )[( offset ) + 3] << 24 )         \
+#define MBEDTLS_GET_UINT32_LE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? MBEDTLS_BSWAP32(mbedtls_get_unaligned_uint32((data) + (offset))) \
+        : mbedtls_get_unaligned_uint32((data) + (offset))                  \
     )
-#endif
+
 
 /**
  * Put in memory a 32 bits unsigned integer in little-endian order.
@@ -222,15 +217,17 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  * \param   offset  Offset from \p data where to put the least significant
  *                  byte of the 32 bits unsigned integer \p n.
  */
-#ifndef MBEDTLS_PUT_UINT32_LE
-#define MBEDTLS_PUT_UINT32_LE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_3( n );             \
+#define MBEDTLS_PUT_UINT32_LE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint32((data) + (offset), MBEDTLS_BSWAP32((uint32_t)(n))); \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint32((data) + (offset), ((uint32_t)(n)));      \
+    }                                                                        \
 }
-#endif
 
 /**
  * Get the unsigned 16 bits integer corresponding to two bytes in
@@ -241,13 +238,11 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  *                  byte of the two bytes to build the 16 bits unsigned
  *                  integer from.
  */
-#ifndef MBEDTLS_GET_UINT16_LE
-#define MBEDTLS_GET_UINT16_LE( data, offset )                   \
-    (                                                           \
-          ( (uint16_t) ( data )[( offset )    ]       )         \
-        | ( (uint16_t) ( data )[( offset ) + 1] <<  8 )         \
+#define MBEDTLS_GET_UINT16_LE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? MBEDTLS_BSWAP16(mbedtls_get_unaligned_uint16((data) + (offset))) \
+        : mbedtls_get_unaligned_uint16((data) + (offset))                  \
     )
-#endif
 
 /**
  * Put in memory a 16 bits unsigned integer in little-endian order.
@@ -258,13 +253,17 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  * \param   offset  Offset from \p data where to put the least significant
  *                  byte of the 16 bits unsigned integer \p n.
  */
-#ifndef MBEDTLS_PUT_UINT16_LE
-#define MBEDTLS_PUT_UINT16_LE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
+#define MBEDTLS_PUT_UINT16_LE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint16((data) + (offset), MBEDTLS_BSWAP16((uint16_t)(n))); \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint16((data) + (offset), (uint16_t)(n));      \
+    }                                                                        \
 }
-#endif
 
 /**
  * Get the unsigned 16 bits integer corresponding to two bytes in
@@ -275,13 +274,11 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  *                  byte of the two bytes to build the 16 bits unsigned
  *                  integer from.
  */
-#ifndef MBEDTLS_GET_UINT16_BE
-#define MBEDTLS_GET_UINT16_BE( data, offset )                   \
-    (                                                           \
-          ( (uint16_t) ( data )[( offset )    ] << 8 )          \
-        | ( (uint16_t) ( data )[( offset ) + 1]      )          \
+#define MBEDTLS_GET_UINT16_BE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? mbedtls_get_unaligned_uint16((data) + (offset))                  \
+        : MBEDTLS_BSWAP16(mbedtls_get_unaligned_uint16((data) + (offset))) \
     )
-#endif
 
 /**
  * Put in memory a 16 bits unsigned integer in big-endian order.
@@ -292,13 +289,17 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  * \param   offset  Offset from \p data where to put the most significant
  *                  byte of the 16 bits unsigned integer \p n.
  */
-#ifndef MBEDTLS_PUT_UINT16_BE
-#define MBEDTLS_PUT_UINT16_BE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_0( n );             \
+#define MBEDTLS_PUT_UINT16_BE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint16((data) + (offset), (uint16_t)(n));      \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint16((data) + (offset), MBEDTLS_BSWAP16((uint16_t)(n))); \
+    }                                                                        \
 }
-#endif
 
 /**
  * Get the unsigned 24 bits integer corresponding to three bytes in
@@ -309,14 +310,12 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  *                  byte of the three bytes to build the 24 bits unsigned
  *                  integer from.
  */
-#ifndef MBEDTLS_GET_UINT24_BE
 #define MBEDTLS_GET_UINT24_BE( data , offset )                  \
     (                                                           \
           ( (uint32_t) ( data )[( offset )    ] << 16 )         \
         | ( (uint32_t) ( data )[( offset ) + 1] << 8  )         \
         | ( (uint32_t) ( data )[( offset ) + 2]       )         \
     )
-#endif
 
 /**
  * Put in memory a 24 bits unsigned integer in big-endian order.
@@ -327,14 +326,12 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  * \param   offset  Offset from \p data where to put the most significant
  *                  byte of the 24 bits unsigned integer \p n.
  */
-#ifndef MBEDTLS_PUT_UINT24_BE
 #define MBEDTLS_PUT_UINT24_BE( n, data, offset )                \
 {                                                               \
     ( data )[( offset )    ] = MBEDTLS_BYTE_2( n );             \
     ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
     ( data )[( offset ) + 2] = MBEDTLS_BYTE_0( n );             \
 }
-#endif
 
 /**
  * Get the unsigned 24 bits integer corresponding to three bytes in
@@ -345,14 +342,12 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  *                  byte of the three bytes to build the 24 bits unsigned
  *                  integer from.
  */
-#ifndef MBEDTLS_GET_UINT24_LE
 #define MBEDTLS_GET_UINT24_LE( data, offset )                   \
     (                                                           \
           ( (uint32_t) ( data )[( offset )    ]       )         \
         | ( (uint32_t) ( data )[( offset ) + 1] <<  8 )         \
         | ( (uint32_t) ( data )[( offset ) + 2] << 16 )         \
     )
-#endif
 
 /**
  * Put in memory a 24 bits unsigned integer in little-endian order.
@@ -363,14 +358,12 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  * \param   offset  Offset from \p data where to put the least significant
  *                  byte of the 24 bits unsigned integer \p n.
  */
-#ifndef MBEDTLS_PUT_UINT24_LE
 #define MBEDTLS_PUT_UINT24_LE( n, data, offset )                \
 {                                                               \
     ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
     ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
     ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
 }
-#endif
 
 /**
  * Get the unsigned 64 bits integer corresponding to eight bytes in
@@ -381,19 +374,11 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  *                  byte of the eight bytes to build the 64 bits unsigned
  *                  integer from.
  */
-#ifndef MBEDTLS_GET_UINT64_BE
-#define MBEDTLS_GET_UINT64_BE( data, offset )                   \
-    (                                                           \
-          ( (uint64_t) ( data )[( offset )    ] << 56 )         \
-        | ( (uint64_t) ( data )[( offset ) + 1] << 48 )         \
-        | ( (uint64_t) ( data )[( offset ) + 2] << 40 )         \
-        | ( (uint64_t) ( data )[( offset ) + 3] << 32 )         \
-        | ( (uint64_t) ( data )[( offset ) + 4] << 24 )         \
-        | ( (uint64_t) ( data )[( offset ) + 5] << 16 )         \
-        | ( (uint64_t) ( data )[( offset ) + 6] <<  8 )         \
-        | ( (uint64_t) ( data )[( offset ) + 7]       )         \
+#define MBEDTLS_GET_UINT64_BE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? mbedtls_get_unaligned_uint64((data) + (offset))                  \
+        : MBEDTLS_BSWAP64(mbedtls_get_unaligned_uint64((data) + (offset))) \
     )
-#endif
 
 /**
  * Put in memory a 64 bits unsigned integer in big-endian order.
@@ -404,19 +389,17 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  * \param   offset  Offset from \p data where to put the most significant
  *                  byte of the 64 bits unsigned integer \p n.
  */
-#ifndef MBEDTLS_PUT_UINT64_BE
-#define MBEDTLS_PUT_UINT64_BE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_7( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_6( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_5( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_4( n );             \
-    ( data )[( offset ) + 4] = MBEDTLS_BYTE_3( n );             \
-    ( data )[( offset ) + 5] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 6] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 7] = MBEDTLS_BYTE_0( n );             \
+#define MBEDTLS_PUT_UINT64_BE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint64((data) + (offset), (uint64_t)(n));      \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint64((data) + (offset), MBEDTLS_BSWAP64((uint64_t)(n))); \
+    }                                                                        \
 }
-#endif
 
 /**
  * Get the unsigned 64 bits integer corresponding to eight bytes in
@@ -427,19 +410,11 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  *                  byte of the eight bytes to build the 64 bits unsigned
  *                  integer from.
  */
-#ifndef MBEDTLS_GET_UINT64_LE
-#define MBEDTLS_GET_UINT64_LE( data, offset )                   \
-    (                                                           \
-          ( (uint64_t) ( data )[( offset ) + 7] << 56 )         \
-        | ( (uint64_t) ( data )[( offset ) + 6] << 48 )         \
-        | ( (uint64_t) ( data )[( offset ) + 5] << 40 )         \
-        | ( (uint64_t) ( data )[( offset ) + 4] << 32 )         \
-        | ( (uint64_t) ( data )[( offset ) + 3] << 24 )         \
-        | ( (uint64_t) ( data )[( offset ) + 2] << 16 )         \
-        | ( (uint64_t) ( data )[( offset ) + 1] <<  8 )         \
-        | ( (uint64_t) ( data )[( offset )    ]       )         \
+#define MBEDTLS_GET_UINT64_LE( data, offset )                              \
+    ( ( MBEDTLS_IS_BIG_ENDIAN )                                            \
+        ? MBEDTLS_BSWAP64(mbedtls_get_unaligned_uint64((data) + (offset))) \
+        : mbedtls_get_unaligned_uint64((data) + (offset))                  \
     )
-#endif
 
 /**
  * Put in memory a 64 bits unsigned integer in little-endian order.
@@ -450,18 +425,16 @@ static const uint16_t mbedtls_byte_order_detector = { 0x100 };
  * \param   offset  Offset from \p data where to put the least significant
  *                  byte of the 64 bits unsigned integer \p n.
  */
-#ifndef MBEDTLS_PUT_UINT64_LE
-#define MBEDTLS_PUT_UINT64_LE( n, data, offset )                \
-{                                                               \
-    ( data )[( offset )    ] = MBEDTLS_BYTE_0( n );             \
-    ( data )[( offset ) + 1] = MBEDTLS_BYTE_1( n );             \
-    ( data )[( offset ) + 2] = MBEDTLS_BYTE_2( n );             \
-    ( data )[( offset ) + 3] = MBEDTLS_BYTE_3( n );             \
-    ( data )[( offset ) + 4] = MBEDTLS_BYTE_4( n );             \
-    ( data )[( offset ) + 5] = MBEDTLS_BYTE_5( n );             \
-    ( data )[( offset ) + 6] = MBEDTLS_BYTE_6( n );             \
-    ( data )[( offset ) + 7] = MBEDTLS_BYTE_7( n );             \
+#define MBEDTLS_PUT_UINT64_LE( n, data, offset )                             \
+{                                                                            \
+    if ( MBEDTLS_IS_BIG_ENDIAN )                                             \
+    {                                                                        \
+        mbedtls_put_unaligned_uint64((data) + (offset), MBEDTLS_BSWAP64((uint64_t)(n))); \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        mbedtls_put_unaligned_uint64((data) + (offset), (uint64_t)(n));      \
+    }                                                                        \
 }
-#endif
 
 #endif /* MBEDTLS_LIBRARY_ALIGNMENT_H */

From f7f1f748e3059a0c5e23acab891b08ab0e78771a Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Mon, 28 Nov 2022 14:52:45 +0000
Subject: [PATCH 45/56] Support built-in byteswap routines from clang, gcc,
 MSVC

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h | 49 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/library/alignment.h b/library/alignment.h
index 7638f9438..86e78217b 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -25,6 +25,7 @@
 
 #include <stdint.h>
 #include <string.h>
+#include <stdlib.h>
 
 #include "mbedtls/build_info.h"
 
@@ -120,13 +121,58 @@ inline void mbedtls_put_unaligned_uint64( void *p, uint64_t x )
 #define MBEDTLS_BYTE_6( x ) ( (uint8_t) ( ( ( x ) >> 48 ) & 0xff ) )
 #define MBEDTLS_BYTE_7( x ) ( (uint8_t) ( ( ( x ) >> 56 ) & 0xff ) )
 
+/*
+ * Detect GCC built-in byteswap routines
+ */
+#if defined(__GNUC__) && defined(__GNUC_PREREQ)
+#if __GNUC_PREREQ(4,8)
+#define MBEDTLS_BSWAP16 __builtin_bswap16
+#endif /* __GNUC_PREREQ(4,8) */
+#if __GNUC_PREREQ(4,3)
+#define MBEDTLS_BSWAP32 __builtin_bswap32
+#define MBEDTLS_BSWAP64 __builtin_bswap64
+#endif /* __GNUC_PREREQ(4,3) */
+#endif /* defined(__GNUC__) && defined(__GNUC_PREREQ) */
+
+/*
+ * Detect Clang built-in byteswap routines
+ */
+#if defined(__clang__) && defined(__has_builtin)
+#if __has_builtin(__builtin_bswap16)
+#define MBEDTLS_BSWAP16 __builtin_bswap16
+#endif /* __has_builtin(__builtin_bswap16) */
+#if __has_builtin(__builtin_bswap32)
+#define MBEDTLS_BSWAP32 __builtin_bswap32
+#endif /* __has_builtin(__builtin_bswap32) */
+#if __has_builtin(__builtin_bswap64)
+#define MBEDTLS_BSWAP64 __builtin_bswap64
+#endif /* __has_builtin(__builtin_bswap64) */
+#endif /* defined(__clang__) && defined(__has_builtin) */
+
+/*
+ * Detect MSVC built-in byteswap routines
+ */
+#if defined(_MSC_VER)
+#define MBEDTLS_BSWAP16 _byteswap_ushort
+#define MBEDTLS_BSWAP32 _byteswap_ulong
+#define MBEDTLS_BSWAP64 _byteswap_uint64
+#endif /* defined(_MSC_VER) */
+
+/*
+ * Where compiler built-ins are not present, fall back to C code that the
+ * compiler may be able to detect and transform into the relevant bswap or
+ * similar instruction.
+ */
+#if !defined(MBEDTLS_BSWAP16)
 static inline uint16_t mbedtls_bswap16( uint16_t x ) {
     return
          ( x & 0x00ff ) << 8 |
          ( x & 0xff00 ) >> 8;
 }
 #define MBEDTLS_BSWAP16 mbedtls_bswap16
+#endif /* !defined(MBEDTLS_BSWAP16) */
 
+#if !defined(MBEDTLS_BSWAP32)
 static inline uint32_t mbedtls_bswap32( uint32_t x ) {
     return
          ( x & 0x000000ff ) << 24 |
@@ -135,7 +181,9 @@ static inline uint32_t mbedtls_bswap32( uint32_t x ) {
          ( x & 0xff000000 ) >> 24;
 }
 #define MBEDTLS_BSWAP32 mbedtls_bswap32
+#endif /* !defined(MBEDTLS_BSWAP32) */
 
+#if !defined(MBEDTLS_BSWAP64)
 static inline uint64_t mbedtls_bswap64( uint64_t x ) {
     return
          ( x & 0x00000000000000ff ) << 56 |
@@ -148,6 +196,7 @@ static inline uint64_t mbedtls_bswap64( uint64_t x ) {
          ( x & 0xff00000000000000 ) >> 56;
 }
 #define MBEDTLS_BSWAP64 mbedtls_bswap64
+#endif /* !defined(MBEDTLS_BSWAP64) */
 
 #if !defined(__BYTE_ORDER__)
 static const uint16_t mbedtls_byte_order_detector = { 0x100 };

From 6d23ff60dd2b16ac18bd8020006b7d1c1708604c Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Mon, 28 Nov 2022 14:38:53 +0000
Subject: [PATCH 46/56] Make use of optimised bswap from bignum

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/bignum_core.c | 52 +++++++++++++------------------------------
 1 file changed, 16 insertions(+), 36 deletions(-)

diff --git a/library/bignum_core.c b/library/bignum_core.c
index 41d323968..e3451aeda 100644
--- a/library/bignum_core.c
+++ b/library/bignum_core.c
@@ -83,45 +83,25 @@ static mbedtls_mpi_uint mpi_bigendian_to_host_c( mbedtls_mpi_uint a )
 
 static mbedtls_mpi_uint mpi_bigendian_to_host( mbedtls_mpi_uint a )
 {
-#if defined(__BYTE_ORDER__)
-
-/* Nothing to do on bigendian systems. */
-#if ( __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ )
-    return( a );
-#endif /* __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ */
-
-#if ( __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ )
-
-/* For GCC and Clang, have builtins for byte swapping. */
-#if defined(__GNUC__) && defined(__GNUC_PREREQ)
-#if __GNUC_PREREQ(4,3)
-#define have_bswap
-#endif
-#endif
-
-#if defined(__clang__) && defined(__has_builtin)
-#if __has_builtin(__builtin_bswap32)  &&                 \
-    __has_builtin(__builtin_bswap64)
-#define have_bswap
-#endif
-#endif
-
-#if defined(have_bswap)
-    /* The compiler is hopefully able to statically evaluate this! */
-    switch( sizeof(mbedtls_mpi_uint) )
+    if ( MBEDTLS_IS_BIG_ENDIAN )
     {
-        case 4:
-            return( __builtin_bswap32(a) );
-        case 8:
-            return( __builtin_bswap64(a) );
+        /* Nothing to do on bigendian systems. */
+        return( a );
     }
-#endif
-#endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
-#endif /* __BYTE_ORDER__ */
+    else
+    {
+        switch( sizeof(mbedtls_mpi_uint) )
+        {
+            case 4:
+                return (mbedtls_mpi_uint) MBEDTLS_BSWAP32( (uint32_t)a );
+            case 8:
+                return (mbedtls_mpi_uint) MBEDTLS_BSWAP64( (uint64_t)a );
+        }
 
-    /* Fall back to C-based reordering if we don't know the byte order
-     * or we couldn't use a compiler-specific builtin. */
-    return( mpi_bigendian_to_host_c( a ) );
+        /* Fall back to C-based reordering if we don't know the byte order
+        * or we couldn't use a compiler-specific builtin. */
+        return( mpi_bigendian_to_host_c( a ) );
+    }
 }
 
 void mbedtls_mpi_core_bigendian_to_host( mbedtls_mpi_uint *A,

From 2d0f27d0fcbc0f95bf7b7ed8810db178ab32a02b Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 30 Nov 2022 11:54:34 +0000
Subject: [PATCH 47/56] Make use of optimised bswap from ARIA

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/aria.c | 41 +----------------------------------------
 1 file changed, 1 insertion(+), 40 deletions(-)

diff --git a/library/aria.c b/library/aria.c
index 682d4ec41..517e10a77 100644
--- a/library/aria.c
+++ b/library/aria.c
@@ -98,47 +98,8 @@ static inline uint32_t aria_p1( uint32_t x )
  * modify byte order: ( A B C D ) -> ( D C B A ), i.e. change endianness
  *
  * This is submatrix P3 in [1] Appendix B.1
- *
- * Some compilers fail to translate this to a single instruction,
- * so let's provide asm versions for common platforms with C fallback.
  */
-#if defined(MBEDTLS_HAVE_ASM)
-#if defined(__arm__) /* rev available from v6 up */
-/* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */
-#if defined(__GNUC__) && \
-    ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 ) && \
-    __ARM_ARCH >= 6
-static inline uint32_t aria_p3( uint32_t x )
-{
-    uint32_t r;
-    __asm( "rev %0, %1" : "=l" (r) : "l" (x) );
-    return( r );
-}
-#define ARIA_P3 aria_p3
-#elif defined(__ARMCC_VERSION) && __ARMCC_VERSION < 6000000 && \
-    ( __TARGET_ARCH_ARM >= 6 || __TARGET_ARCH_THUMB >= 3 )
-static inline uint32_t aria_p3( uint32_t x )
-{
-    uint32_t r;
-    __asm( "rev r, x" );
-    return( r );
-}
-#define ARIA_P3 aria_p3
-#endif
-#endif /* arm */
-#if defined(__GNUC__) && \
-    defined(__i386__) || defined(__amd64__) || defined( __x86_64__)
-static inline uint32_t aria_p3( uint32_t x )
-{
-    __asm( "bswap %0" : "=r" (x) : "0" (x) );
-    return( x );
-}
-#define ARIA_P3 aria_p3
-#endif /* x86 gnuc */
-#endif /* MBEDTLS_HAVE_ASM && GNUC */
-#if !defined(ARIA_P3)
-#define ARIA_P3(x) ARIA_P2( ARIA_P1 ( x ) )
-#endif
+#define ARIA_P3(x) MBEDTLS_BSWAP32(x)
 
 /*
  * ARIA Affine Transform

From 2dae4b3ef609865f0b19b39cc4d44995af3d4833 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 30 Nov 2022 12:07:36 +0000
Subject: [PATCH 48/56] Support armcc builtin byteswap routine

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 library/alignment.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/library/alignment.h b/library/alignment.h
index 86e78217b..3c5fa2360 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -158,6 +158,11 @@ inline void mbedtls_put_unaligned_uint64( void *p, uint64_t x )
 #define MBEDTLS_BSWAP64 _byteswap_uint64
 #endif /* defined(_MSC_VER) */
 
+/* Detect armcc built-in byteswap routine */
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 410000)
+#define MBEDTLS_BSWAP32 __rev
+#endif
+
 /*
  * Where compiler built-ins are not present, fall back to C code that the
  * compiler may be able to detect and transform into the relevant bswap or

From fb5fedcd00726555def8b1d47293e87562db0f86 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Wed, 30 Nov 2022 15:20:33 +0000
Subject: [PATCH 49/56] Add tests for alignment.h

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 tests/suites/test_suite_alignment.data     | 113 ++++++
 tests/suites/test_suite_alignment.function | 377 +++++++++++++++++++++
 2 files changed, 490 insertions(+)
 create mode 100644 tests/suites/test_suite_alignment.data
 create mode 100644 tests/suites/test_suite_alignment.function

diff --git a/tests/suites/test_suite_alignment.data b/tests/suites/test_suite_alignment.data
new file mode 100644
index 000000000..84b568b74
--- /dev/null
+++ b/tests/suites/test_suite_alignment.data
@@ -0,0 +1,113 @@
+Aligned 16-bit access
+mbedtls_unaligned_access:16:0
+
+Aligned 32-bit access
+mbedtls_unaligned_access:32:0
+
+Aligned 64-bit access
+mbedtls_unaligned_access:64:0
+
+Unaligned 16-bit access offset=1
+mbedtls_unaligned_access:16:1
+
+Unaligned 32-bit access offset=1
+mbedtls_unaligned_access:32:1
+
+Unaligned 64-bit access offset=1
+mbedtls_unaligned_access:64:1
+
+Unaligned 16-bit access offset=4
+mbedtls_unaligned_access:16:4
+
+Unaligned 32-bit access offset=4
+mbedtls_unaligned_access:32:4
+
+Unaligned 64-bit access offset=4
+mbedtls_unaligned_access:64:4
+
+Unaligned 16-bit access offset=7
+mbedtls_unaligned_access:16:7
+
+Unaligned 32-bit access offset=7
+mbedtls_unaligned_access:32:7
+
+Unaligned 64-bit access offset=7
+mbedtls_unaligned_access:64:7
+
+Unaligned 16-bit access offset=8
+mbedtls_unaligned_access:16:8
+
+Unaligned 32-bit access offset=8
+mbedtls_unaligned_access:32:8
+
+Unaligned 64-bit access offset=8
+mbedtls_unaligned_access:64:8
+
+Byteswap 16
+mbedtls_byteswap:0x07060504:0x03020100:16:0x00:0x01
+
+Byteswap 16 all-zero
+mbedtls_byteswap:0x0:0x0:16:0x00:0x0
+
+Byteswap 16 all-ones
+mbedtls_byteswap:0xffffffff:0xffffffff:16:0x00:0xffff
+
+Byteswap 32
+mbedtls_byteswap:0x07060504:0x03020100:32:0x00:0x010203
+
+Byteswap 32 all-zero
+mbedtls_byteswap:0x0:0x0:32:0x00:0x0
+
+Byteswap 32 all-ones
+mbedtls_byteswap:0xffffffff:0xffffffff:32:0x00:0xffffffff
+
+Byteswap 64
+mbedtls_byteswap:0x07060504:0x03020100:64:0x010203:0x04050607
+
+Byteswap 64 all-zero
+mbedtls_byteswap:0x0:0x0:64:0x00:0x0
+
+Byteswap 64 all-ones
+mbedtls_byteswap:0xffffffff:0xffffffff:64:0xffffffff:0xffffffff
+
+Get individual bytes
+get_byte
+
+Endian-aware unaligned 16-bit BE offset=0
+unaligned_access_endian_aware:16:0:1
+
+Endian-aware unaligned 16-bit BE offset=3
+unaligned_access_endian_aware:16:3:1
+
+Endian-aware unaligned 16-bit LE offset=0
+unaligned_access_endian_aware:16:0:0
+
+Endian-aware unaligned 16-bit LE offset=3
+unaligned_access_endian_aware:16:3:0
+
+Endian-aware unaligned 32-bit BE offset=0
+unaligned_access_endian_aware:32:0:1
+
+Endian-aware unaligned 32-bit BE offset=3
+unaligned_access_endian_aware:32:3:1
+
+Endian-aware unaligned 32-bit LE offset=0
+unaligned_access_endian_aware:32:0:0
+
+Endian-aware unaligned 32-bit LE offset=3
+unaligned_access_endian_aware:32:3:0
+
+Endian-aware unaligned 64-bit BE offset=0
+unaligned_access_endian_aware:64:0:1
+
+Endian-aware unaligned 64-bit BE offset=3
+unaligned_access_endian_aware:64:3:1
+
+Endian-aware unaligned 64-bit LE offset=0
+unaligned_access_endian_aware:64:0:0
+
+Endian-aware unaligned 64-bit LE offset=3
+unaligned_access_endian_aware:64:3:0
+
+Big-endian check
+mbedtls_is_big_endian
diff --git a/tests/suites/test_suite_alignment.function b/tests/suites/test_suite_alignment.function
new file mode 100644
index 000000000..31d877cdf
--- /dev/null
+++ b/tests/suites/test_suite_alignment.function
@@ -0,0 +1,377 @@
+/* BEGIN_HEADER */
+#include "../library/alignment.h"
+
+#include <stdint.h>
+/* END_HEADER */
+
+/* BEGIN_CASE */
+void mbedtls_unaligned_access( int size, int offset )
+{
+    /* Define 64-bit aligned raw byte array */
+    uint64_t raw[2];
+
+    /* Populate with known data */
+    uint8_t *x = (uint8_t *) raw;
+    for ( size_t i = 0; i < sizeof(raw); i++ )
+        x[i] = (uint8_t)i;
+
+    TEST_ASSERT( size == 16 || size == 32 || size == 64 );
+
+    uint64_t r = 0;
+    switch ( size )
+    {
+        case 16:
+            r = mbedtls_get_unaligned_uint16( x + offset );
+            break;
+        case 32:
+            r = mbedtls_get_unaligned_uint32( x + offset );
+            break;
+        case 64:
+            r = mbedtls_get_unaligned_uint64( x + offset );
+            break;
+    }
+
+    /* Generate expected result */
+    uint64_t expected = 0;
+    for ( uint8_t i = 0; i < 8; i++ )
+    {
+        uint8_t shift;
+        if ( MBEDTLS_IS_BIG_ENDIAN )
+        {
+            /*
+            * Similar to little-endian case described below, but the shift needs
+            * to be inverted
+            */
+            shift = 7 - ( i * 8 );
+        } else {
+            /* example for offset == 1:
+            * expected = (( 1 + 0 ) << (0 * 8)) | (( 1 + 1 ) << (1 * 8)) | (( 1 + 2 ) << (2 * 8)))
+            *          = (1 << 0) | (2 << 8) | (3 << 16) ...
+            *          = 0x0807060504030201
+            * x = { 0, 1, 2, 3, ... }
+            * ie expected is the value that would be read from x on a LE system, when
+            * byte swapping is not performed
+            */
+            shift = i * 8;
+        }
+        uint64_t b = offset + i;
+        expected |= b << shift;
+    }
+
+    /* Mask out excess bits from expected result */
+    switch ( size )
+    {
+        case 16:
+            expected &= 0xffff;
+            break;
+        case 32:
+            expected &= 0xffffffff;
+            break;
+    }
+
+    TEST_EQUAL( r, expected );
+
+    /* Write sentinel to the part of the array we will testing writing to */
+    for ( size_t i = 0; i < (size_t) ( size / 8 ); i++ )
+    {
+        x[i + offset] = 0xff;
+    }
+    /*
+        * Write back to the array with mbedtls_put_unaligned_uint16 and validate
+        * that the array is unchanged as a result.
+        */
+    switch ( size )
+    {
+        case 16:
+            mbedtls_put_unaligned_uint16( x + offset, r );
+            break;
+        case 32:
+            mbedtls_put_unaligned_uint32( x + offset, r );
+            break;
+        case 64:
+            mbedtls_put_unaligned_uint64( x + offset, r );
+            break;
+    }
+    for ( size_t i = 0; i < sizeof(x); i++ )
+    {
+        TEST_EQUAL( x[i], (uint8_t)i );
+    }
+}
+/* END_CASE */
+
+/* BEGIN_CASE */
+void mbedtls_byteswap( unsigned int input_h, unsigned int input_l, int size,
+    unsigned int expected_h, unsigned int expected_l )
+{
+    uint64_t input    = ( ((uint64_t)input_h   ) << 32 ) | ( (uint64_t)input_l    );
+    uint64_t expected = ( ((uint64_t)expected_h) << 32 ) | ( (uint64_t)expected_l );
+
+    /* Check against expected */
+    uint64_t r;
+    switch ( size )
+    {
+        case 16:
+            r = MBEDTLS_BSWAP16( input );
+            break;
+        case 32:
+            r = MBEDTLS_BSWAP32( input );
+            break;
+        case 64:
+            r = MBEDTLS_BSWAP64( input );
+            break;
+    }
+    TEST_EQUAL( r, expected );
+
+    /*
+     * Check byte by byte by extracting bytes from opposite ends of
+     * input and r.
+     */
+    for ( size_t i = 0; i < (size_t)( size / 8 ); i++ )
+    {
+        size_t s1 = i * 8;
+        size_t s2 = ( ( size / 8 - 1 ) - i ) * 8;
+        uint64_t a = ( input & ( (uint64_t)0xff << s1 ) ) >> s1;
+        uint64_t b = ( r & ( (uint64_t)0xff << s2 ) ) >> s2;
+        TEST_EQUAL( a, b );
+    }
+
+    /* Check BSWAP(BSWAP(x)) == x */
+    switch ( size )
+    {
+        case 16:
+            r = MBEDTLS_BSWAP16( r );
+            TEST_EQUAL( r, input & 0xffff );
+            break;
+        case 32:
+            r = MBEDTLS_BSWAP32( r );
+            TEST_EQUAL( r, input & 0xffffffff );
+            break;
+        case 64:
+            r = MBEDTLS_BSWAP64( r );
+            TEST_EQUAL( r, input );
+            break;
+    }
+}
+/* END_CASE */
+
+/* BEGIN_CASE */
+void get_byte()
+{
+    uint8_t data[16];
+
+    for ( size_t i = 0; i < sizeof(data); i++ )
+        data[i] = (uint8_t) i;
+
+    uint64_t u64 = 0x0706050403020100;
+    for ( size_t b = 0; b < 8 ; b++ )
+    {
+        uint8_t actual;
+        switch ( b )
+        {
+            case 0:
+                actual = MBEDTLS_BYTE_0( u64 );
+                break;
+            case 1:
+                actual = MBEDTLS_BYTE_1( u64 );
+                break;
+            case 2:
+                actual = MBEDTLS_BYTE_2( u64 );
+                break;
+            case 3:
+                actual = MBEDTLS_BYTE_3( u64 );
+                break;
+            case 4:
+                actual = MBEDTLS_BYTE_4( u64 );
+                break;
+            case 5:
+                actual = MBEDTLS_BYTE_5( u64 );
+                break;
+            case 6:
+                actual = MBEDTLS_BYTE_6( u64 );
+                break;
+            case 7:
+                actual = MBEDTLS_BYTE_7( u64 );
+                break;
+        }
+        uint8_t expected = b;
+        TEST_EQUAL( actual, expected );
+    }
+
+    uint32_t u32 = 0x03020100;
+    for ( size_t b = 0; b < 4 ; b++ )
+    {
+        uint8_t actual;
+        switch ( b )
+        {
+            case 0:
+                actual = MBEDTLS_BYTE_0( u32 );
+                break;
+            case 1:
+                actual = MBEDTLS_BYTE_1( u32 );
+                break;
+            case 2:
+                actual = MBEDTLS_BYTE_2( u32 );
+                break;
+            case 3:
+                actual = MBEDTLS_BYTE_3( u32 );
+                break;
+        }
+        uint8_t expected = b;
+        TEST_EQUAL( actual, expected );
+    }
+
+    uint16_t u16 = 0x0100;
+    for ( size_t b = 0; b < 2 ; b++ )
+    {
+        uint8_t actual;
+        switch ( b )
+        {
+            case 0:
+                actual = MBEDTLS_BYTE_0( u16 );
+                break;
+            case 1:
+                actual = MBEDTLS_BYTE_1( u16 );
+                break;
+        }
+        uint8_t expected = b;
+        TEST_EQUAL( actual, expected );
+    }
+
+    uint8_t u8 = 0x01;
+    uint8_t actual = MBEDTLS_BYTE_0( u8 );
+    TEST_EQUAL( actual, u8 );
+}
+/* END_CASE */
+
+/* BEGIN_CASE */
+void unaligned_access_endian_aware(int size, int offset, int big_endian )
+{
+    TEST_ASSERT( size == 16 || size == 24 || size == 32 || size == 64 );
+    TEST_ASSERT( offset >= 0 && offset < 8 );
+
+    /* Define 64-bit aligned raw byte array */
+    uint64_t raw[2];
+    /* Populate with known data: x == { 0, 1, 2, ... } */
+    uint8_t *x = (uint8_t *) raw;
+    for ( size_t i = 0; i < sizeof(raw); i++ )
+        x[i] = (uint8_t) i;
+
+    uint64_t read;
+    if ( big_endian )
+    {
+        switch ( size )
+        {
+            case 16:
+                read = MBEDTLS_GET_UINT16_BE( x, offset );
+                break;
+            case 24:
+                read = MBEDTLS_GET_UINT24_BE( x, offset );
+                break;
+            case 32:
+                read = MBEDTLS_GET_UINT32_BE( x, offset );
+                break;
+            case 64:
+                read = MBEDTLS_GET_UINT64_BE( x, offset );
+                break;
+        }
+    }
+    else
+    {
+        switch ( size )
+        {
+            case 16:
+                read = MBEDTLS_GET_UINT16_LE( x, offset );
+                break;
+            case 24:
+                read = MBEDTLS_GET_UINT24_LE( x, offset );
+                break;
+            case 32:
+                read = MBEDTLS_GET_UINT32_LE( x, offset );
+                break;
+            case 64:
+                read = MBEDTLS_GET_UINT64_LE( x, offset );
+                break;
+        }
+    }
+
+    /* Build up expected value byte by byte, in either big or little endian format */
+    uint64_t expected = 0;
+    for ( size_t i = 0; i < (size_t)(size / 8); i++ )
+    {
+        uint64_t b = x[i + offset];
+        uint8_t shift = (big_endian) ? (8 * ((size / 8 - 1) - i)) : (8 * i);
+        expected |= b << shift;
+    }
+
+    /* Verify read */
+    TEST_EQUAL( read, expected );
+
+    /* Test writing back to memory. First write sentiel */
+    for ( size_t i = 0; i < (size_t)(size / 8); i++ )
+    {
+        x[i + offset] = 0xff;
+    }
+    /* Overwrite sentinel with endian-aware write macro */
+    if ( big_endian )
+    {
+        switch ( size )
+        {
+            case 16:
+                MBEDTLS_PUT_UINT16_BE( read, x, offset );
+                break;
+            case 24:
+                MBEDTLS_PUT_UINT24_BE( read, x, offset );
+                break;
+            case 32:
+                MBEDTLS_PUT_UINT32_BE( read, x, offset );
+                break;
+            case 64:
+                MBEDTLS_PUT_UINT64_BE( read, x, offset );
+                break;
+        }
+    }
+    else
+    {
+        switch ( size )
+        {
+            case 16:
+                MBEDTLS_PUT_UINT16_LE( read, x, offset );
+                break;
+                case 24:
+                MBEDTLS_PUT_UINT24_LE( read, x, offset );
+                break;
+            case 32:
+                MBEDTLS_PUT_UINT32_LE( read, x, offset );
+                break;
+            case 64:
+                MBEDTLS_PUT_UINT64_LE( read, x, offset );
+                break;
+        }
+    }
+
+    /* Verify write - check memory is correct */
+    for ( size_t i = 0; i < sizeof(raw); i++ )
+        TEST_EQUAL( x[i], (uint8_t) i );
+}
+/* END_CASE */
+
+/* BEGIN_CASE */
+void mbedtls_is_big_endian()
+{
+    uint16_t check = 0x1234;
+    uint8_t* p = (uint8_t*) &check;
+
+    if ( MBEDTLS_IS_BIG_ENDIAN )
+    {
+        /* Big-endian: data stored MSB first, i.e. p == { 0x12, 0x34 } */
+        TEST_EQUAL( p[0], 0x12 );
+        TEST_EQUAL( p[1], 0x34 );
+    }
+    else
+    {
+        /* Little-endian: data stored LSB first, i.e. p == { 0x34, 0x12 } */
+        TEST_EQUAL( p[0], 0x34 );
+        TEST_EQUAL( p[1], 0x12 );
+    }
+}
+/* END_CASE */

From 7f62f36f820f368e27424a7721415b181432a76d Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 1 Dec 2022 09:44:31 +0000
Subject: [PATCH 50/56] Add changelog entry

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 ChangeLog.d/alignment-perf.txt | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 ChangeLog.d/alignment-perf.txt

diff --git a/ChangeLog.d/alignment-perf.txt b/ChangeLog.d/alignment-perf.txt
new file mode 100644
index 000000000..8ab649bbc
--- /dev/null
+++ b/ChangeLog.d/alignment-perf.txt
@@ -0,0 +1,4 @@
+Features
+   * Improvements to the code used to access unaligned and byte-swapped memory
+     reduce code size and improve performance (depending on compiler and target
+     architecture).

From 28f424f2380eac2e4ea6a53eb48adea4f31e440c Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 1 Dec 2022 09:49:44 +0000
Subject: [PATCH 51/56] Clarify support for mixed-endian platforms

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 1a4edb05c..8a23bd28c 100644
--- a/README.md
+++ b/README.md
@@ -261,6 +261,7 @@ Mbed TLS is mostly written in portable C99; however, it has a few platform requi
 - Signed integers must be represented using two's complement.
 - `int` and `size_t` must be at least 32 bits wide.
 - The types `uint8_t`, `uint16_t`, `uint32_t` and their signed equivalents must be available.
+- Mixed-endian platforms are not supported.
 
 PSA cryptography API
 --------------------

From 63e6a88874bcdb4a52738ba5e8d8e39a00ed6855 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 1 Dec 2022 10:01:08 +0000
Subject: [PATCH 52/56] Suppress over-eager compiler warning in tests

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 tests/suites/test_suite_alignment.function | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/suites/test_suite_alignment.function b/tests/suites/test_suite_alignment.function
index 31d877cdf..39bccb0c4 100644
--- a/tests/suites/test_suite_alignment.function
+++ b/tests/suites/test_suite_alignment.function
@@ -2,6 +2,8 @@
 #include "../library/alignment.h"
 
 #include <stdint.h>
+
+#pragma clang diagnostic ignored "-Wunreachable-code"
 /* END_HEADER */
 
 /* BEGIN_CASE */

From 9dc55ba9323f96ea8fc436853727ff4340a5d822 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 1 Dec 2022 10:49:53 +0000
Subject: [PATCH 53/56] Suppress over-eager compiler warnings in test code

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 tests/suites/test_suite_alignment.function | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/suites/test_suite_alignment.function b/tests/suites/test_suite_alignment.function
index 39bccb0c4..3014b436d 100644
--- a/tests/suites/test_suite_alignment.function
+++ b/tests/suites/test_suite_alignment.function
@@ -3,7 +3,9 @@
 
 #include <stdint.h>
 
+#if defined(__clang__)
 #pragma clang diagnostic ignored "-Wunreachable-code"
+#endif
 /* END_HEADER */
 
 /* BEGIN_CASE */
@@ -109,7 +111,7 @@ void mbedtls_byteswap( unsigned int input_h, unsigned int input_l, int size,
     uint64_t expected = ( ((uint64_t)expected_h) << 32 ) | ( (uint64_t)expected_l );
 
     /* Check against expected */
-    uint64_t r;
+    uint64_t r = 0;
     switch ( size )
     {
         case 16:
@@ -121,6 +123,8 @@ void mbedtls_byteswap( unsigned int input_h, unsigned int input_l, int size,
         case 64:
             r = MBEDTLS_BSWAP64( input );
             break;
+        default:
+            TEST_ASSERT( ! "size must be 16, 32 or 64" );
     }
     TEST_EQUAL( r, expected );
 
@@ -258,7 +262,7 @@ void unaligned_access_endian_aware(int size, int offset, int big_endian )
     for ( size_t i = 0; i < sizeof(raw); i++ )
         x[i] = (uint8_t) i;
 
-    uint64_t read;
+    uint64_t read = 0;
     if ( big_endian )
     {
         switch ( size )

From 7fc53dd83dcff40256c22ab964d63a41203d4301 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 1 Dec 2022 11:42:29 +0000
Subject: [PATCH 54/56] Suppress over-eager compiler warnings in tests

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 tests/suites/test_suite_alignment.function | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/suites/test_suite_alignment.function b/tests/suites/test_suite_alignment.function
index 3014b436d..fe576cf67 100644
--- a/tests/suites/test_suite_alignment.function
+++ b/tests/suites/test_suite_alignment.function
@@ -171,7 +171,8 @@ void get_byte()
     uint64_t u64 = 0x0706050403020100;
     for ( size_t b = 0; b < 8 ; b++ )
     {
-        uint8_t actual;
+        uint8_t expected = b;
+        uint8_t actual = b + 1;
         switch ( b )
         {
             case 0:
@@ -199,14 +200,14 @@ void get_byte()
                 actual = MBEDTLS_BYTE_7( u64 );
                 break;
         }
-        uint8_t expected = b;
         TEST_EQUAL( actual, expected );
     }
 
     uint32_t u32 = 0x03020100;
     for ( size_t b = 0; b < 4 ; b++ )
     {
-        uint8_t actual;
+        uint8_t expected = b;
+        uint8_t actual = b + 1;
         switch ( b )
         {
             case 0:
@@ -222,14 +223,14 @@ void get_byte()
                 actual = MBEDTLS_BYTE_3( u32 );
                 break;
         }
-        uint8_t expected = b;
         TEST_EQUAL( actual, expected );
     }
 
     uint16_t u16 = 0x0100;
     for ( size_t b = 0; b < 2 ; b++ )
     {
-        uint8_t actual;
+        uint8_t expected = b;
+        uint8_t actual = b + 1;
         switch ( b )
         {
             case 0:
@@ -239,7 +240,6 @@ void get_byte()
                 actual = MBEDTLS_BYTE_1( u16 );
                 break;
         }
-        uint8_t expected = b;
         TEST_EQUAL( actual, expected );
     }
 

From 481a5e427bb6efcde178bc38d666c2e5100f9dd8 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 1 Dec 2022 13:31:20 +0000
Subject: [PATCH 55/56] Improve parsing of test data

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 tests/suites/test_suite_alignment.data     | 24 +++++++++------
 tests/suites/test_suite_alignment.function | 34 ++++++++++++++++++----
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/tests/suites/test_suite_alignment.data b/tests/suites/test_suite_alignment.data
index 84b568b74..8c0c21d72 100644
--- a/tests/suites/test_suite_alignment.data
+++ b/tests/suites/test_suite_alignment.data
@@ -44,31 +44,37 @@ Unaligned 64-bit access offset=8
 mbedtls_unaligned_access:64:8
 
 Byteswap 16
-mbedtls_byteswap:0x07060504:0x03020100:16:0x00:0x01
+mbedtls_byteswap:"0100":16:"0001"
+
+Byteswap 16 with truncation
+mbedtls_byteswap:"0706050403020100":16:"0001"
 
 Byteswap 16 all-zero
-mbedtls_byteswap:0x0:0x0:16:0x00:0x0
+mbedtls_byteswap:"0000":16:"0000"
 
 Byteswap 16 all-ones
-mbedtls_byteswap:0xffffffff:0xffffffff:16:0x00:0xffff
+mbedtls_byteswap:"ffffffffffffffff":16:"ffff"
 
 Byteswap 32
-mbedtls_byteswap:0x07060504:0x03020100:32:0x00:0x010203
+mbedtls_byteswap:"03020100":32:"00010203"
+
+Byteswap 32 with truncation
+mbedtls_byteswap:"0706050403020100":32:"00010203"
 
 Byteswap 32 all-zero
-mbedtls_byteswap:0x0:0x0:32:0x00:0x0
+mbedtls_byteswap:"00000000":32:"00000000"
 
 Byteswap 32 all-ones
-mbedtls_byteswap:0xffffffff:0xffffffff:32:0x00:0xffffffff
+mbedtls_byteswap:"ffffffffffffffff":32:"ffffffff"
 
 Byteswap 64
-mbedtls_byteswap:0x07060504:0x03020100:64:0x010203:0x04050607
+mbedtls_byteswap:"0706050403020100":64:"01020304050607"
 
 Byteswap 64 all-zero
-mbedtls_byteswap:0x0:0x0:64:0x00:0x0
+mbedtls_byteswap:"0000000000000000":64:"0000000000000000"
 
 Byteswap 64 all-ones
-mbedtls_byteswap:0xffffffff:0xffffffff:64:0xffffffff:0xffffffff
+mbedtls_byteswap:"ffffffffffffffff":64:"ffffffffffffffff"
 
 Get individual bytes
 get_byte
diff --git a/tests/suites/test_suite_alignment.function b/tests/suites/test_suite_alignment.function
index fe576cf67..06c566888 100644
--- a/tests/suites/test_suite_alignment.function
+++ b/tests/suites/test_suite_alignment.function
@@ -6,6 +6,30 @@
 #if defined(__clang__)
 #pragma clang diagnostic ignored "-Wunreachable-code"
 #endif
+#include <stdio.h>
+
+/*
+ * Convert a string of the form "abcd" (case-insensitive) to a uint64_t.
+ */
+int parse_hex_string( char* hex_string, uint64_t *result )
+{
+    uint8_t raw[8];
+    size_t olen;
+    if ( mbedtls_test_unhexify(raw, sizeof(raw), hex_string, &olen) != 0 ) return 0;
+    *result = 0;
+    for ( size_t i = 0; i < olen; i++ )
+    {
+        if ( MBEDTLS_IS_BIG_ENDIAN ) {
+            *result |= ((uint64_t)raw[i]) << ( i * 8 );
+        }
+        else
+        {
+            *result |= ((uint64_t)raw[i]) << ( (olen - i - 1) * 8 );
+        }
+    }
+    return 1;
+}
+
 /* END_HEADER */
 
 /* BEGIN_CASE */
@@ -104,13 +128,13 @@ void mbedtls_unaligned_access( int size, int offset )
 /* END_CASE */
 
 /* BEGIN_CASE */
-void mbedtls_byteswap( unsigned int input_h, unsigned int input_l, int size,
-    unsigned int expected_h, unsigned int expected_l )
+void mbedtls_byteswap( char* input_str, int size, char *expected_str )
 {
-    uint64_t input    = ( ((uint64_t)input_h   ) << 32 ) | ( (uint64_t)input_l    );
-    uint64_t expected = ( ((uint64_t)expected_h) << 32 ) | ( (uint64_t)expected_l );
+    uint64_t input, expected;
+    TEST_ASSERT( parse_hex_string( input_str, &input ) );
+    TEST_ASSERT( parse_hex_string( expected_str, &expected ) );
 
-    /* Check against expected */
+    /* Check against expected result */
     uint64_t r = 0;
     switch ( size )
     {

From 852191e0b5231a359906e3414c9bb7f924e07917 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Fri, 9 Dec 2022 14:24:33 +0000
Subject: [PATCH 56/56] Improve Changelog

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
---
 ChangeLog.d/alignment-perf.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/ChangeLog.d/alignment-perf.txt b/ChangeLog.d/alignment-perf.txt
index 8ab649bbc..7a8e6fb4a 100644
--- a/ChangeLog.d/alignment-perf.txt
+++ b/ChangeLog.d/alignment-perf.txt
@@ -1,4 +1,8 @@
 Features
-   * Improvements to the code used to access unaligned and byte-swapped memory
-     reduce code size and improve performance (depending on compiler and target
+   * General performance improvements by accessing multiple bytes at a time.
+     Fixes #1666.
+   * Improvements to use of unaligned and byte-swapped memory, reducing code
+     size and improving performance (depending on compiler and target
      architecture).
+Changes
+   * Mixed-endian systems are explicitly not supported any more.