diff --git a/library/aesce.c b/library/aesce.c index 6b493a272..600326a08 100644 --- a/library/aesce.c +++ b/library/aesce.c @@ -101,59 +101,36 @@ int mbedtls_aesce_has_support(void) #endif } +/* Single round of AESCE encryption */ +#define AESCE_ENCRYPT_ROUND \ + block = vaeseq_u8(block, vld1q_u8(keys)); \ + block = vaesmcq_u8(block); \ + keys += 16 +/* Two rounds of AESCE encryption */ +#define AESCE_ENCRYPT_ROUND_X2 AESCE_ENCRYPT_ROUND; AESCE_ENCRYPT_ROUND + MBEDTLS_OPTIMIZE_ALWAYS static uint8x16_t aesce_encrypt_block(uint8x16_t block, unsigned char *keys, int rounds) { - /* Assume either 10, 12 or 14 rounds */ + /* Assume either 10, 12 or 14 rounds. + * Skip 4 or 2 rounds, if doing 10 or 12 rounds */ if (rounds == 10) { goto rounds_10; } if (rounds == 12) { goto rounds_12; } - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; + AESCE_ENCRYPT_ROUND_X2; rounds_12: - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; + AESCE_ENCRYPT_ROUND_X2; rounds_10: - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; - block = vaeseq_u8(block, vld1q_u8(keys)); - block = vaesmcq_u8(block); - keys += 16; + AESCE_ENCRYPT_ROUND_X2; + AESCE_ENCRYPT_ROUND_X2; + AESCE_ENCRYPT_ROUND_X2; + AESCE_ENCRYPT_ROUND_X2; + AESCE_ENCRYPT_ROUND; /* AES AddRoundKey for the previous round. * SubBytes, ShiftRows for the final round. */ @@ -168,74 +145,56 @@ rounds_10: return block; } +/* Single round of AESCE decryption + * + * AES AddRoundKey, SubBytes, ShiftRows + * + * block = vaesdq_u8(block, vld1q_u8(keys)); + * + * AES inverse MixColumns for the next round. + * + * This means that we switch the order of the inverse AddRoundKey and + * inverse MixColumns operations. We have to do this as AddRoundKey is + * done in an atomic instruction together with the inverses of SubBytes + * and ShiftRows. + * + * It works because MixColumns is a linear operation over GF(2^8) and + * AddRoundKey is an exclusive or, which is equivalent to addition over + * GF(2^8). (The inverse of MixColumns needs to be applied to the + * affected round keys separately which has been done when the + * decryption round keys were calculated.) + * + * block = vaesimcq_u8(block); + */ +#define AESCE_DECRYPT_ROUND \ + block = vaesdq_u8(block, vld1q_u8(keys)); \ + block = vaesimcq_u8(block); \ + keys += 16 +/* Two rounds of AESCE decryption */ +#define AESCE_DECRYPT_ROUND_X2 AESCE_DECRYPT_ROUND; AESCE_DECRYPT_ROUND + MBEDTLS_OPTIMIZE_ALWAYS static uint8x16_t aesce_decrypt_block(uint8x16_t block, unsigned char *keys, int rounds) { - /* Assume either 10, 12 or 14 rounds */ + /* Assume either 10, 12 or 14 rounds. + * Skip 4 or 2 rounds, if doing 10 or 12 rounds */ if (rounds == 10) { goto rounds_10; } if (rounds == 12) { goto rounds_12; } - - /* AES AddRoundKey, SubBytes, ShiftRows */ - block = vaesdq_u8(block, vld1q_u8(keys)); - /* AES inverse MixColumns for the next round. - * - * This means that we switch the order of the inverse AddRoundKey and - * inverse MixColumns operations. We have to do this as AddRoundKey is - * done in an atomic instruction together with the inverses of SubBytes - * and ShiftRows. - * - * It works because MixColumns is a linear operation over GF(2^8) and - * AddRoundKey is an exclusive or, which is equivalent to addition over - * GF(2^8). (The inverse of MixColumns needs to be applied to the - * affected round keys separately which has been done when the - * decryption round keys were calculated.) */ - block = vaesimcq_u8(block); - keys += 16; - - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; + AESCE_DECRYPT_ROUND_X2; rounds_12: - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; + AESCE_DECRYPT_ROUND_X2; rounds_10: - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; - block = vaesdq_u8(block, vld1q_u8(keys)); - block = vaesimcq_u8(block); - keys += 16; + AESCE_DECRYPT_ROUND_X2; + AESCE_DECRYPT_ROUND_X2; + AESCE_DECRYPT_ROUND_X2; + AESCE_DECRYPT_ROUND_X2; + AESCE_DECRYPT_ROUND; /* The inverses of AES AddRoundKey, SubBytes, ShiftRows finishing up the * last full round. */ diff --git a/library/aesce.h b/library/aesce.h index 61e73bfdd..b12bf76ba 100644 --- a/library/aesce.h +++ b/library/aesce.h @@ -52,8 +52,8 @@ int mbedtls_aesce_has_support(void); /** * \brief Internal AES-ECB block encryption and decryption * - * Note: this assumes that the context specifies either 10, 12 or 14 rounds - * and will behave incorrectly if this is not the case. + * \warning This assumes that the context specifies either 10, 12 or 14 + * rounds and will behave incorrectly if this is not the case. * * \param ctx AES context * \param mode MBEDTLS_AES_ENCRYPT or MBEDTLS_AES_DECRYPT