From 4489c8dcefb764ecee779d838fc4eb9850a7a5fb Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Wed, 24 May 2023 12:06:48 +0100 Subject: [PATCH 01/24] Disable bignum assembly for certain Arm M-class CPUs Signed-off-by: Dave Rodgman --- library/bn_mul.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/library/bn_mul.h b/library/bn_mul.h index ab59fbd64..c5124702b 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -673,6 +673,22 @@ #define MULADDC_CANNOT_USE_R7 #endif +/* + * Similarly, we need to disable the assembly below if: + * - compiler is armclang + * - optimisation is not -O0 + * - target is Thumb + * - target cpu is one of cortex-m0, cortex-m0plus, cortex-m1, cortex-m23, sc000 + * + * Checking for __ARM_ARCH_6M__ or __ARM_ARCH_8M_BASE__ seems to identify exactly these + * cpus and no others (tested against all values for -mcpu known to armclang 6.20). + */ +#if defined(__ARMCC_VERSION) && defined(__OPTIMIZE__) && defined(__thumb__) +#if defined(__ARM_ARCH_8M_BASE__) || defined(__ARM_ARCH_6M__) +#define MULADDC_CANNOT_USE_R7 +#endif +#endif + #if defined(__arm__) && !defined(MULADDC_CANNOT_USE_R7) #if defined(__thumb__) && !defined(__thumb2__) From a55e12c525d868c887590f9ca12cef67da60a497 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Wed, 24 May 2023 12:14:13 +0100 Subject: [PATCH 02/24] Add Changelog Signed-off-by: Dave Rodgman --- ChangeLog.d/armclang-compile-fix.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 ChangeLog.d/armclang-compile-fix.txt diff --git a/ChangeLog.d/armclang-compile-fix.txt b/ChangeLog.d/armclang-compile-fix.txt new file mode 100644 index 000000000..428de020f --- /dev/null +++ b/ChangeLog.d/armclang-compile-fix.txt @@ -0,0 +1,3 @@ +Bugfix + * Fix armclang compilation error when targetting certain Arm M-class CPUs (Cortex-M0, + Cortex-M0+, Cortex-M1, Cortex-M23, SecurCore SC000). Fixes #1077. From 99318e6138c257e6741343d6efda2ffd5ed08efe Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Wed, 24 May 2023 12:27:42 +0100 Subject: [PATCH 03/24] Add build test for armclang / Cortex-M0 / -Os Signed-off-by: Dave Rodgman --- tests/scripts/all.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh index 78666b41f..f34d0eb86 100755 --- a/tests/scripts/all.sh +++ b/tests/scripts/all.sh @@ -3894,6 +3894,9 @@ component_build_armcc () { # ARM Compiler 6 - Target ARMv8.2-A - AArch64 armc6_build_test "-O1 --target=aarch64-arm-none-eabi -march=armv8.2-a+crypto" + + # ARM Compiler 6 - Target Cortex-M0 + armc6_build_test "-Os --target=arm-arm-none-eabi -mcpu=cortex-m0" } support_build_armcc () { armc5_cc="$ARMC5_BIN_DIR/armcc" From a1f51c213cc612b1bd587a4a0c145b22e197cba8 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Wed, 24 May 2023 13:02:18 +0100 Subject: [PATCH 04/24] Fix Changelog formatting Signed-off-by: Dave Rodgman --- ChangeLog.d/armclang-compile-fix.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ChangeLog.d/armclang-compile-fix.txt b/ChangeLog.d/armclang-compile-fix.txt index 428de020f..b1f070d43 100644 --- a/ChangeLog.d/armclang-compile-fix.txt +++ b/ChangeLog.d/armclang-compile-fix.txt @@ -1,3 +1,4 @@ Bugfix - * Fix armclang compilation error when targetting certain Arm M-class CPUs (Cortex-M0, - Cortex-M0+, Cortex-M1, Cortex-M23, SecurCore SC000). Fixes #1077. + * Fix armclang compilation error when targetting certain Arm M-class CPUs + (Cortex-M0, Cortex-M0+, Cortex-M1, Cortex-M23, SecurCore SC000). + Fixes #1077. From 0ffc6f48fadf129413b070459e8d98a1cee3158d Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Wed, 24 May 2023 17:19:10 +0100 Subject: [PATCH 05/24] First draft at fixing the choice of asm Signed-off-by: Dave Rodgman --- library/bn_mul.h | 58 ++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index c5124702b..92a926630 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -659,39 +659,46 @@ #endif /* TriCore */ /* + * There is a fairly complex matrix of supported options for Thumb / Thumb2 / Arm + * assembly. Choosing the correct code path depends on the target, the compiler, + * and the optimisation level. + * * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about * our use of r7 below, unless -fomit-frame-pointer is passed. * * On the other hand, -fomit-frame-pointer is implied by any -Ox options with * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by * clang and armcc5 under the same conditions). - * - * So, only use the optimized assembly below for optimized build, which avoids - * the build error and is pretty reasonable anyway. */ -#if defined(__GNUC__) && !defined(__OPTIMIZE__) -#define MULADDC_CANNOT_USE_R7 + + +#if defined(__thumb__) && !defined(__thumb2__) // Thumb1 (not Thumb 2) ISA +// Only supported by gcc, when optimisation is enabled; only option A works +#if defined(__OPTIMIZE__) && !defined(__ARMCC_VERSION) +#define ARM_OPTION_A #endif -/* - * Similarly, we need to disable the assembly below if: - * - compiler is armclang - * - optimisation is not -O0 - * - target is Thumb - * - target cpu is one of cortex-m0, cortex-m0plus, cortex-m1, cortex-m23, sc000 - * - * Checking for __ARM_ARCH_6M__ or __ARM_ARCH_8M_BASE__ seems to identify exactly these - * cpus and no others (tested against all values for -mcpu known to armclang 6.20). - */ -#if defined(__ARMCC_VERSION) && defined(__OPTIMIZE__) && defined(__thumb__) -#if defined(__ARM_ARCH_8M_BASE__) || defined(__ARM_ARCH_6M__) -#define MULADDC_CANNOT_USE_R7 -#endif +#elif defined(__thumb2__) // Thumb 2 ISA + +#if !defined(__ARMCC_VERSION) && !defined(__OPTIMIZE__) +// gcc -O0 +// only option B builds +#define ARM_OPTION_B +#elif !defined(__ARMCC_VERSION) +// gcc with optimisation - any option builds +#define ARM_OPTION_A +#else +// armclang +// options A or C build +#define ARM_OPTION_A #endif -#if defined(__arm__) && !defined(MULADDC_CANNOT_USE_R7) +#elif defined(__arm__) // Arm ISA +// any option builds. A does not seem to work; B is about 2x faster than C (under emulation). +#define ARM_OPTION_B +#endif -#if defined(__thumb__) && !defined(__thumb2__) +#if defined(ARM_OPTION_A) #define MULADDC_X1_INIT \ asm( \ @@ -746,8 +753,7 @@ "r6", "r7", "r8", "r9", "cc" \ ); -#elif (__ARM_ARCH >= 6) && \ - defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) +#elif defined(ARM_OPTION_B) #define MULADDC_X1_INIT \ { \ @@ -812,7 +818,7 @@ ); \ } -#else +#elif defined(ARM_OPTION_C) #define MULADDC_X1_INIT \ asm( \ @@ -840,9 +846,7 @@ "r6", "r7", "cc" \ ); -#endif /* Thumb */ - -#endif /* ARMv3 */ +#endif /* Arm */ #if defined(__alpha__) From 1416cba81f009414914b3b13a512cb32781eb992 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Wed, 24 May 2023 18:07:48 +0100 Subject: [PATCH 06/24] Gate all arm asm on Armv6 or better architecture Signed-off-by: Dave Rodgman --- library/bn_mul.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/library/bn_mul.h b/library/bn_mul.h index 92a926630..475180252 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -671,6 +671,8 @@ * clang and armcc5 under the same conditions). */ +#if defined(__ARM_ARCH) +#if __ARM_ARCH >= 6 #if defined(__thumb__) && !defined(__thumb2__) // Thumb1 (not Thumb 2) ISA // Only supported by gcc, when optimisation is enabled; only option A works @@ -698,6 +700,9 @@ #define ARM_OPTION_B #endif +#endif +#endif + #if defined(ARM_OPTION_A) #define MULADDC_X1_INIT \ From ffbb7c5edc779df135f477bcf0a4e700184aa850 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Wed, 24 May 2023 18:28:46 +0100 Subject: [PATCH 07/24] Tidy-up macros and fix guards around option B Signed-off-by: Dave Rodgman --- library/bn_mul.h | 50 ++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index 475180252..e76a8fd91 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -671,37 +671,45 @@ * clang and armcc5 under the same conditions). */ -#if defined(__ARM_ARCH) -#if __ARM_ARCH >= 6 +//#if defined(__ARM_ARCH) +//#if __ARM_ARCH >= 6 #if defined(__thumb__) && !defined(__thumb2__) // Thumb1 (not Thumb 2) ISA -// Only supported by gcc, when optimisation is enabled; only option A works -#if defined(__OPTIMIZE__) && !defined(__ARMCC_VERSION) -#define ARM_OPTION_A -#endif + + // Only supported by gcc, when optimisation is enabled; only option A works + #if defined(__OPTIMIZE__) && !defined(__ARMCC_VERSION) + #define ARM_OPTION_A + #endif #elif defined(__thumb2__) // Thumb 2 ISA -#if !defined(__ARMCC_VERSION) && !defined(__OPTIMIZE__) -// gcc -O0 -// only option B builds -#define ARM_OPTION_B -#elif !defined(__ARMCC_VERSION) -// gcc with optimisation - any option builds -#define ARM_OPTION_A -#else -// armclang -// options A or C build -#define ARM_OPTION_A -#endif + #if !defined(__ARMCC_VERSION) && !defined(__OPTIMIZE__) + // gcc -O0: only option B builds + #if defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) + #define ARM_OPTION_B + #endif + #else + // gcc with optimisation, or armclang: any option builds + #define ARM_OPTION_B_OR_C + #endif #elif defined(__arm__) // Arm ISA -// any option builds. A does not seem to work; B is about 2x faster than C (under emulation). -#define ARM_OPTION_B -#endif + + // any option builds. A does not seem to work; B is about 2x faster than C (under emulation). + #define ARM_OPTION_B_OR_C #endif + +#if defined(ARM_OPTION_B_OR_C) +#if (__ARM_ARCH >= 6) && defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) +#define ARM_OPTION_B +#else +#define ARM_OPTION_C #endif +#endif + +//#endif +//#endif #if defined(ARM_OPTION_A) From 6adaca60626f53b1388a99fd05db23ba350ea4ac Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Wed, 24 May 2023 19:43:13 +0100 Subject: [PATCH 08/24] Minor tidy-up Signed-off-by: Dave Rodgman --- library/bn_mul.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index e76a8fd91..c6e9fab46 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -670,11 +670,7 @@ * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by * clang and armcc5 under the same conditions). */ - -//#if defined(__ARM_ARCH) -//#if __ARM_ARCH >= 6 - -#if defined(__thumb__) && !defined(__thumb2__) // Thumb1 (not Thumb 2) ISA +#if defined(__thumb__) && !defined(__thumb2__) // Thumb 1 (not Thumb 2) ISA // Only supported by gcc, when optimisation is enabled; only option A works #if defined(__OPTIMIZE__) && !defined(__ARMCC_VERSION) @@ -685,7 +681,7 @@ #if !defined(__ARMCC_VERSION) && !defined(__OPTIMIZE__) // gcc -O0: only option B builds - #if defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) + #if (__ARM_ARCH >= 6) && defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) #define ARM_OPTION_B #endif #else @@ -698,18 +694,16 @@ // any option builds. A does not seem to work; B is about 2x faster than C (under emulation). #define ARM_OPTION_B_OR_C -#endif +#endif /* Arm ISA selection */ #if defined(ARM_OPTION_B_OR_C) +// Prefer B, if we have the right features for it #if (__ARM_ARCH >= 6) && defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) #define ARM_OPTION_B #else #define ARM_OPTION_C #endif -#endif - -//#endif -//#endif +#endif /* defined(ARM_OPTION_B_OR_C) */ #if defined(ARM_OPTION_A) From 5c5a6dece6ec23d2374cd862c79387a9e1f5d7b1 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Wed, 24 May 2023 23:24:16 +0100 Subject: [PATCH 09/24] Give options clearer names Signed-off-by: Dave Rodgman --- library/bn_mul.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index c6e9fab46..614937506 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -674,7 +674,7 @@ // Only supported by gcc, when optimisation is enabled; only option A works #if defined(__OPTIMIZE__) && !defined(__ARMCC_VERSION) - #define ARM_OPTION_A + #define ARM_THUMB_1 #endif #elif defined(__thumb2__) // Thumb 2 ISA @@ -682,30 +682,30 @@ #if !defined(__ARMCC_VERSION) && !defined(__OPTIMIZE__) // gcc -O0: only option B builds #if (__ARM_ARCH >= 6) && defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) - #define ARM_OPTION_B + #define ARM_V6_DSP #endif #else // gcc with optimisation, or armclang: any option builds - #define ARM_OPTION_B_OR_C + #define ARM_V6_DSP_OR_THUMB_2 #endif #elif defined(__arm__) // Arm ISA // any option builds. A does not seem to work; B is about 2x faster than C (under emulation). - #define ARM_OPTION_B_OR_C + #define ARM_V6_DSP_OR_THUMB_2 #endif /* Arm ISA selection */ -#if defined(ARM_OPTION_B_OR_C) +#if defined(ARM_V6_DSP_OR_THUMB_2) // Prefer B, if we have the right features for it #if (__ARM_ARCH >= 6) && defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) -#define ARM_OPTION_B +#define ARM_V6_DSP #else -#define ARM_OPTION_C +#define ARM_THUMB_2 #endif -#endif /* defined(ARM_OPTION_B_OR_C) */ +#endif /* defined(ARM_V6_DSP_OR_THUMB_2) */ -#if defined(ARM_OPTION_A) +#if defined(ARM_THUMB_1) #define MULADDC_X1_INIT \ asm( \ @@ -760,7 +760,7 @@ "r6", "r7", "r8", "r9", "cc" \ ); -#elif defined(ARM_OPTION_B) +#elif defined(ARM_V6_DSP) #define MULADDC_X1_INIT \ { \ @@ -825,7 +825,7 @@ ); \ } -#elif defined(ARM_OPTION_C) +#elif defined(ARM_THUMB_2) #define MULADDC_X1_INIT \ asm( \ From 92e8a88390d89ff2b1633783506793f405ea56de Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 25 May 2023 08:10:33 +0100 Subject: [PATCH 10/24] Improve comments Signed-off-by: Dave Rodgman --- library/bn_mul.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index 614937506..51c0a3148 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -660,7 +660,7 @@ /* * There is a fairly complex matrix of supported options for Thumb / Thumb2 / Arm - * assembly. Choosing the correct code path depends on the target, the compiler, + * assembly. Choosing the correct codepath depends on the target, the compiler, * and the optimisation level. * * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about @@ -672,7 +672,7 @@ */ #if defined(__thumb__) && !defined(__thumb2__) // Thumb 1 (not Thumb 2) ISA - // Only supported by gcc, when optimisation is enabled; only option A works + // Only supported by gcc, when optimisation is enabled; only Thumb 1 codepath works #if defined(__OPTIMIZE__) && !defined(__ARMCC_VERSION) #define ARM_THUMB_1 #endif @@ -680,24 +680,26 @@ #elif defined(__thumb2__) // Thumb 2 ISA #if !defined(__ARMCC_VERSION) && !defined(__OPTIMIZE__) - // gcc -O0: only option B builds + // gcc -O0: only V6+DSP codepath builds #if (__ARM_ARCH >= 6) && defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) #define ARM_V6_DSP #endif #else - // gcc with optimisation, or armclang: any option builds + // gcc with optimisation, or armclang: any codepath builds #define ARM_V6_DSP_OR_THUMB_2 #endif #elif defined(__arm__) // Arm ISA - // any option builds. A does not seem to work; B is about 2x faster than C (under emulation). + // any option builds. Thumb 1 codepath does not seem to work. #define ARM_V6_DSP_OR_THUMB_2 #endif /* Arm ISA selection */ #if defined(ARM_V6_DSP_OR_THUMB_2) -// Prefer B, if we have the right features for it +// Prefer V6+DSP codepath, if we have the right features for it; otherwise +// fall back to generic Thumb 2 / Arm codepath +// V6+DSP codepath is about 2x faster than Thumb 2 (under emulation). #if (__ARM_ARCH >= 6) && defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) #define ARM_V6_DSP #else From 7d6ec95517d3ca56b93c1503924ffde9298df410 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 25 May 2023 09:41:42 +0100 Subject: [PATCH 11/24] Revert to detecting __GNUCC__ instead of armclang Signed-off-by: Dave Rodgman --- library/bn_mul.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index 51c0a3148..936810e4e 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -673,13 +673,13 @@ #if defined(__thumb__) && !defined(__thumb2__) // Thumb 1 (not Thumb 2) ISA // Only supported by gcc, when optimisation is enabled; only Thumb 1 codepath works - #if defined(__OPTIMIZE__) && !defined(__ARMCC_VERSION) + #if defined(__OPTIMIZE__) && defined(__GNUC__) #define ARM_THUMB_1 #endif #elif defined(__thumb2__) // Thumb 2 ISA - #if !defined(__ARMCC_VERSION) && !defined(__OPTIMIZE__) + #if defined(__GNUC__) && !defined(__OPTIMIZE__) // gcc -O0: only V6+DSP codepath builds #if (__ARM_ARCH >= 6) && defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) #define ARM_V6_DSP From 1ae50aebb9811409963dc4ed71e63e03637d6ab7 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 25 May 2023 09:46:34 +0100 Subject: [PATCH 12/24] Update Changelog Signed-off-by: Dave Rodgman --- ChangeLog.d/armclang-compile-fix.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ChangeLog.d/armclang-compile-fix.txt b/ChangeLog.d/armclang-compile-fix.txt index b1f070d43..7ef6a9711 100644 --- a/ChangeLog.d/armclang-compile-fix.txt +++ b/ChangeLog.d/armclang-compile-fix.txt @@ -1,4 +1,5 @@ Bugfix * Fix armclang compilation error when targetting certain Arm M-class CPUs - (Cortex-M0, Cortex-M0+, Cortex-M1, Cortex-M23, SecurCore SC000). - Fixes #1077. + (Cortex-M0, Cortex-M0+, Cortex-M1, Cortex-M23, SecurCore SC000). Enable + bignum optimisations for most Arm platforms when compiling with -O0, + (previously optimisations were not available in this case). Fixes #1077. From cee166e3f5f073860f2d3787ae905edb7f3a32e2 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 25 May 2023 11:00:05 +0100 Subject: [PATCH 13/24] Don't use r7 in generic codepath Signed-off-by: Dave Rodgman --- library/bn_mul.h | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index 936810e4e..c91743ac4 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -679,15 +679,8 @@ #elif defined(__thumb2__) // Thumb 2 ISA - #if defined(__GNUC__) && !defined(__OPTIMIZE__) - // gcc -O0: only V6+DSP codepath builds - #if (__ARM_ARCH >= 6) && defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) - #define ARM_V6_DSP - #endif - #else - // gcc with optimisation, or armclang: any codepath builds - #define ARM_V6_DSP_OR_THUMB_2 - #endif + // Any codepath builds. + #define ARM_V6_DSP_OR_THUMB_2 #elif defined(__arm__) // Arm ISA @@ -841,9 +834,9 @@ "mov r5, #0 \n\t" \ "ldr r6, [r1] \n\t" \ "umlal r2, r5, r3, r4 \n\t" \ - "adds r7, r6, r2 \n\t" \ + "adds r4, r6, r2 \n\t" \ "adc r2, r5, #0 \n\t" \ - "str r7, [r1], #4 \n\t" + "str r4, [r1], #4 \n\t" #define MULADDC_X1_STOP \ "str r2, %0 \n\t" \ @@ -852,7 +845,7 @@ : "=m" (c), "=m" (d), "=m" (s) \ : "m" (s), "m" (d), "m" (c), "m" (b) \ : "r0", "r1", "r2", "r3", "r4", "r5", \ - "r6", "r7", "cc" \ + "r6", "cc" \ ); #endif /* Arm */ From b047bf64e2d17535091bbf7c8df368da1db6a8b5 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 25 May 2023 11:01:41 +0100 Subject: [PATCH 14/24] Restrict use of r7 in Thumb 1 code Signed-off-by: Dave Rodgman --- library/bn_mul.h | 59 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index c91743ac4..af0adb6ef 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -662,20 +662,11 @@ * There is a fairly complex matrix of supported options for Thumb / Thumb2 / Arm * assembly. Choosing the correct codepath depends on the target, the compiler, * and the optimisation level. - * - * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about - * our use of r7 below, unless -fomit-frame-pointer is passed. - * - * On the other hand, -fomit-frame-pointer is implied by any -Ox options with - * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by - * clang and armcc5 under the same conditions). */ #if defined(__thumb__) && !defined(__thumb2__) // Thumb 1 (not Thumb 2) ISA // Only supported by gcc, when optimisation is enabled; only Thumb 1 codepath works - #if defined(__OPTIMIZE__) && defined(__GNUC__) #define ARM_THUMB_1 - #endif #elif defined(__thumb2__) // Thumb 2 ISA @@ -702,21 +693,48 @@ #if defined(ARM_THUMB_1) +#if defined(__OPTIMIZE__) && defined(__GNUC__) +/* + * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about + * our use of r7 below, unless -fomit-frame-pointer is passed. + * + * On the other hand, -fomit-frame-pointer is implied by any -Ox options with + * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by + * clang and armcc5 under the same conditions). + * + * If gcc needs to use r7, we use r1 as a scratch register and have a few extra + * instructions to preserve/restore it; otherwise, we can use r7 and avoid + * the preserve/restore overhead. + */ +#define MULADDC_SCRATCH "RS .req r1 \n\t" +#define MULADDC_PRESERVE_R1 "mov r10, r1 \n\t" +#define MULADDC_RESTORE_R1 "mov r1, r10 \n\t" +#define MULADDC_SCRATCH_CLOBBER "r10" +#else +#define MULADDC_SCRATCH "RS .req r7 \n\t" +#define MULADDC_PRESERVE_R1 "" +#define MULADDC_RESTORE_R1 "" +#define MULADDC_SCRATCH_CLOBBER "r7" +#endif + #define MULADDC_X1_INIT \ asm( \ + MULADDC_SCRATCH \ "ldr r0, %3 \n\t" \ "ldr r1, %4 \n\t" \ "ldr r2, %5 \n\t" \ "ldr r3, %6 \n\t" \ - "lsr r7, r3, #16 \n\t" \ - "mov r9, r7 \n\t" \ - "lsl r7, r3, #16 \n\t" \ - "lsr r7, r7, #16 \n\t" \ - "mov r8, r7 \n\t" + "lsr r4, r3, #16 \n\t" \ + "mov r9, r4 \n\t" \ + "lsl r4, r3, #16 \n\t" \ + "lsr r4, r4, #16 \n\t" \ + "mov r8, r4 \n\t" \ + #define MULADDC_X1_CORE \ + MULADDC_PRESERVE_R1 \ "ldmia r0!, {r6} \n\t" \ - "lsr r7, r6, #16 \n\t" \ + "lsr RS, r6, #16 \n\t" \ "lsl r6, r6, #16 \n\t" \ "lsr r6, r6, #16 \n\t" \ "mov r4, r8 \n\t" \ @@ -724,12 +742,12 @@ "mov r3, r9 \n\t" \ "mul r6, r3 \n\t" \ "mov r5, r9 \n\t" \ - "mul r5, r7 \n\t" \ + "mul r5, RS \n\t" \ "mov r3, r8 \n\t" \ - "mul r7, r3 \n\t" \ + "mul RS, r3 \n\t" \ "lsr r3, r6, #16 \n\t" \ "add r5, r5, r3 \n\t" \ - "lsr r3, r7, #16 \n\t" \ + "lsr r3, RS, #16 \n\t" \ "add r5, r5, r3 \n\t" \ "add r4, r4, r2 \n\t" \ "mov r2, #0 \n\t" \ @@ -737,9 +755,10 @@ "lsl r3, r6, #16 \n\t" \ "add r4, r4, r3 \n\t" \ "adc r5, r2 \n\t" \ - "lsl r3, r7, #16 \n\t" \ + "lsl r3, RS, #16 \n\t" \ "add r4, r4, r3 \n\t" \ "adc r5, r2 \n\t" \ + MULADDC_RESTORE_R1 \ "ldr r3, [r1] \n\t" \ "add r4, r4, r3 \n\t" \ "adc r2, r5 \n\t" \ @@ -752,7 +771,7 @@ : "=m" (c), "=m" (d), "=m" (s) \ : "m" (s), "m" (d), "m" (c), "m" (b) \ : "r0", "r1", "r2", "r3", "r4", "r5", \ - "r6", "r7", "r8", "r9", "cc" \ + "r6", MULADDC_SCRATCH_CLOBBER, "r8", "r9", "cc" \ ); #elif defined(ARM_V6_DSP) From 12b14b2c977c5671690507ed819ea11babf4c59e Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 25 May 2023 12:53:41 +0100 Subject: [PATCH 15/24] Simplify ifdefs Signed-off-by: Dave Rodgman --- library/bn_mul.h | 53 ++++++++++++++---------------------------------- 1 file changed, 15 insertions(+), 38 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index af0adb6ef..2aea1e841 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -658,42 +658,14 @@ #endif /* TriCore */ +#if defined(__arm__) + +#if defined(__thumb__) && !defined(__thumb2__) && !defined(__ARMCC_VERSION) /* - * There is a fairly complex matrix of supported options for Thumb / Thumb2 / Arm - * assembly. Choosing the correct codepath depends on the target, the compiler, - * and the optimisation level. + * Thumb 1 ISA. This code path does not work on armclang. */ -#if defined(__thumb__) && !defined(__thumb2__) // Thumb 1 (not Thumb 2) ISA - // Only supported by gcc, when optimisation is enabled; only Thumb 1 codepath works - #define ARM_THUMB_1 - -#elif defined(__thumb2__) // Thumb 2 ISA - - // Any codepath builds. - #define ARM_V6_DSP_OR_THUMB_2 - -#elif defined(__arm__) // Arm ISA - - // any option builds. Thumb 1 codepath does not seem to work. - #define ARM_V6_DSP_OR_THUMB_2 - -#endif /* Arm ISA selection */ - -#if defined(ARM_V6_DSP_OR_THUMB_2) -// Prefer V6+DSP codepath, if we have the right features for it; otherwise -// fall back to generic Thumb 2 / Arm codepath -// V6+DSP codepath is about 2x faster than Thumb 2 (under emulation). -#if (__ARM_ARCH >= 6) && defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) -#define ARM_V6_DSP -#else -#define ARM_THUMB_2 -#endif -#endif /* defined(ARM_V6_DSP_OR_THUMB_2) */ - -#if defined(ARM_THUMB_1) - -#if defined(__OPTIMIZE__) && defined(__GNUC__) +#if !defined(__OPTIMIZE__) && defined(__GNUC__) /* * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about * our use of r7 below, unless -fomit-frame-pointer is passed. @@ -710,12 +682,12 @@ #define MULADDC_PRESERVE_R1 "mov r10, r1 \n\t" #define MULADDC_RESTORE_R1 "mov r1, r10 \n\t" #define MULADDC_SCRATCH_CLOBBER "r10" -#else +#else /* !defined(__OPTIMIZE__) && defined(__GNUC__) */ #define MULADDC_SCRATCH "RS .req r7 \n\t" #define MULADDC_PRESERVE_R1 "" #define MULADDC_RESTORE_R1 "" #define MULADDC_SCRATCH_CLOBBER "r7" -#endif +#endif /* !defined(__OPTIMIZE__) && defined(__GNUC__) */ #define MULADDC_X1_INIT \ asm( \ @@ -774,7 +746,9 @@ "r6", MULADDC_SCRATCH_CLOBBER, "r8", "r9", "cc" \ ); -#elif defined(ARM_V6_DSP) +#elif (__ARM_ARCH >= 6) && \ + defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) +/* Armv6-M with DSP Instruction Set Extensions */ #define MULADDC_X1_INIT \ { \ @@ -839,7 +813,8 @@ ); \ } -#elif defined(ARM_THUMB_2) +#elif defined(__thumb2__) || !defined(__thumb__) +/* Thumb 2 or Arm ISA, without DSP extensions */ #define MULADDC_X1_INIT \ asm( \ @@ -867,7 +842,9 @@ "r6", "cc" \ ); -#endif /* Arm */ +#endif /* ISA codepath selection */ + +#endif /* defined(__arm__) */ #if defined(__alpha__) From 3964fe0f5e6622f68cc9059358b08e1fd7cc4d36 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 25 May 2023 18:53:57 +0100 Subject: [PATCH 16/24] Improve ISA detection Signed-off-by: Dave Rodgman --- library/bn_mul.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index 2aea1e841..83b65cd08 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -660,7 +660,8 @@ #if defined(__arm__) -#if defined(__thumb__) && !defined(__thumb2__) && !defined(__ARMCC_VERSION) +#if defined(__thumb__) && !defined(__thumb2__) +#if !defined(__ARMCC_VERSION) /* * Thumb 1 ISA. This code path does not work on armclang. */ @@ -745,10 +746,13 @@ : "r0", "r1", "r2", "r3", "r4", "r5", \ "r6", MULADDC_SCRATCH_CLOBBER, "r8", "r9", "cc" \ ); +#endif /* !defined(__ARMCC_VERSION) */ #elif (__ARM_ARCH >= 6) && \ defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) -/* Armv6-M with DSP Instruction Set Extensions */ +/* Armv6-M (or later) with DSP Instruction Set Extensions. + * Requires support for either Thumb 2 or Arm ISA. + */ #define MULADDC_X1_INIT \ { \ @@ -813,7 +817,7 @@ ); \ } -#elif defined(__thumb2__) || !defined(__thumb__) +#else /* Thumb 2 or Arm ISA, without DSP extensions */ #define MULADDC_X1_INIT \ From 49bd1f2cb2826d6f11588491e76e728fc5488ee4 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Fri, 2 Jun 2023 09:21:46 -0400 Subject: [PATCH 17/24] Fix spelling in Changelog Signed-off-by: Dave Rodgman --- ChangeLog.d/armclang-compile-fix.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog.d/armclang-compile-fix.txt b/ChangeLog.d/armclang-compile-fix.txt index 7ef6a9711..63c1eee02 100644 --- a/ChangeLog.d/armclang-compile-fix.txt +++ b/ChangeLog.d/armclang-compile-fix.txt @@ -1,5 +1,5 @@ Bugfix - * Fix armclang compilation error when targetting certain Arm M-class CPUs + * Fix armclang compilation error when targeting certain Arm M-class CPUs (Cortex-M0, Cortex-M0+, Cortex-M1, Cortex-M23, SecurCore SC000). Enable bignum optimisations for most Arm platforms when compiling with -O0, (previously optimisations were not available in this case). Fixes #1077. From 6df1e54c1d0cc9ef375b329aee89ae40d68713cd Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Fri, 2 Jun 2023 10:27:13 -0400 Subject: [PATCH 18/24] Do not use assembly on Thumb 1 / clang Signed-off-by: Dave Rodgman --- library/bn_mul.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index 83b65cd08..fce8d7dba 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -661,9 +661,9 @@ #if defined(__arm__) #if defined(__thumb__) && !defined(__thumb2__) -#if !defined(__ARMCC_VERSION) +#if !defined(__ARMCC_VERSION) && !defined(__clang__) /* - * Thumb 1 ISA. This code path does not work on armclang. + * Thumb 1 ISA. This code path does not build on clang or armclang. */ #if !defined(__OPTIMIZE__) && defined(__GNUC__) @@ -746,7 +746,7 @@ : "r0", "r1", "r2", "r3", "r4", "r5", \ "r6", MULADDC_SCRATCH_CLOBBER, "r8", "r9", "cc" \ ); -#endif /* !defined(__ARMCC_VERSION) */ +#endif /* !defined(__ARMCC_VERSION) && !defined(__clang__) */ #elif (__ARM_ARCH >= 6) && \ defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) From 8c315f2f747be8f80a6348c6afa906674d68abf7 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Fri, 2 Jun 2023 10:26:24 -0400 Subject: [PATCH 19/24] Add build tests for Thumb and Thumb2 with clang Signed-off-by: Dave Rodgman --- tests/scripts/all.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh index f34d0eb86..4bfb1aa59 100755 --- a/tests/scripts/all.sh +++ b/tests/scripts/all.sh @@ -3852,6 +3852,25 @@ component_build_arm_none_eabi_gcc_no_64bit_multiplication () { not grep __aeabi_lmul library/*.o } +component_build_arm_clang_thumb () { + # ~ 30s + + scripts/config.py baremetal + + msg "build: clang thumb 2, make" + make clean + make CC="clang" CFLAGS='-std=c99 -Werror -Os --target=arm-linux-gnueabihf -march=armv7-m -mthumb' lib + + # Some Thumb 1 asm is sensitive to optimisation level, so test both -O0 and -Os + msg "build: clang thumb 1 -O0, make" + make clean + make CC="clang" CFLAGS='-std=c99 -Werror -O0 --target=arm-linux-gnueabihf -mcpu=arm1136j-s -mthumb' lib + + msg "build: clang thumb 1 -Os, make" + make clean + make CC="clang" CFLAGS='-std=c99 -Werror -Os --target=arm-linux-gnueabihf -mcpu=arm1136j-s -mthumb' lib +} + component_build_armcc () { msg "build: ARM Compiler 5" scripts/config.py baremetal From b45d58b9a5b5361bb8bf534ca07ba6d1f95df5cd Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Fri, 2 Jun 2023 13:54:00 -0400 Subject: [PATCH 20/24] Add armclang -O0 build test Signed-off-by: Dave Rodgman --- tests/scripts/all.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh index 4bfb1aa59..d07592685 100755 --- a/tests/scripts/all.sh +++ b/tests/scripts/all.sh @@ -3894,7 +3894,7 @@ component_build_armcc () { make clean - # Compile with -O1 since some Arm inline assembly is disabled for -O0. + # Compile mostly with -O1 since some Arm inline assembly is disabled for -O0. # ARM Compiler 6 - Target ARMv7-A armc6_build_test "-O1 --target=arm-arm-none-eabi -march=armv7-a" @@ -3914,6 +3914,9 @@ component_build_armcc () { # ARM Compiler 6 - Target ARMv8.2-A - AArch64 armc6_build_test "-O1 --target=aarch64-arm-none-eabi -march=armv8.2-a+crypto" + # ARM Compiler 6 - Target Cortex-M0 - no optimisation + armc6_build_test "-O0 --target=arm-arm-none-eabi -mcpu=cortex-m0" + # ARM Compiler 6 - Target Cortex-M0 armc6_build_test "-Os --target=arm-arm-none-eabi -mcpu=cortex-m0" } From 4db4d6b9b0d1e389fb23328b1ea19f374c273e67 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Sun, 4 Jun 2023 20:41:24 -0400 Subject: [PATCH 21/24] Improve changelog Signed-off-by: Dave Rodgman --- ChangeLog.d/armclang-compile-fix.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ChangeLog.d/armclang-compile-fix.txt b/ChangeLog.d/armclang-compile-fix.txt index 63c1eee02..59ae1cd9d 100644 --- a/ChangeLog.d/armclang-compile-fix.txt +++ b/ChangeLog.d/armclang-compile-fix.txt @@ -1,5 +1,7 @@ Bugfix - * Fix armclang compilation error when targeting certain Arm M-class CPUs - (Cortex-M0, Cortex-M0+, Cortex-M1, Cortex-M23, SecurCore SC000). Enable - bignum optimisations for most Arm platforms when compiling with -O0, - (previously optimisations were not available in this case). Fixes #1077. + * Fix clang and armclang compilation error when targeting certain Arm + M-class CPUs (Cortex-M0, Cortex-M0+, Cortex-M1, Cortex-M23, + SecurCore SC000). Fixes #1077. +Changes + * Enable Arm / Thumb bignum assembly for most Arm platforms when + compiling with gcc, clang or armclang and -O0. From f89e3c5fbd0c83539f68450423abb186f1ea8787 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Sun, 4 Jun 2023 20:41:52 -0400 Subject: [PATCH 22/24] Improve docs & check for non-gcc compilers Signed-off-by: Dave Rodgman --- library/bn_mul.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index fce8d7dba..5d6e728b0 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -661,9 +661,14 @@ #if defined(__arm__) #if defined(__thumb__) && !defined(__thumb2__) -#if !defined(__ARMCC_VERSION) && !defined(__clang__) +#if !defined(__ARMCC_VERSION) && !defined(__clang__) \ + && !defined(__llvm__) && !defined(__INTEL_COMPILER) /* - * Thumb 1 ISA. This code path does not build on clang or armclang. + * Thumb 1 ISA. This code path has only been tested successfully on gcc; + * it does not compile on clang or armclang. + * + * Other compilers which define __GNUC__ may not work. The above macro + * attempts to exclude these untested compilers. */ #if !defined(__OPTIMIZE__) && defined(__GNUC__) From b6e06549f5d3677d7fc47c07cd987b99e8798c27 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Sun, 4 Jun 2023 20:42:17 -0400 Subject: [PATCH 23/24] Rename MULADDC_PRESERVE_R1 etc to MULADDC_PRESERVE_SCRATCH etc Signed-off-by: Dave Rodgman --- library/bn_mul.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index 5d6e728b0..a7473d1a3 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -684,15 +684,15 @@ * instructions to preserve/restore it; otherwise, we can use r7 and avoid * the preserve/restore overhead. */ -#define MULADDC_SCRATCH "RS .req r1 \n\t" -#define MULADDC_PRESERVE_R1 "mov r10, r1 \n\t" -#define MULADDC_RESTORE_R1 "mov r1, r10 \n\t" -#define MULADDC_SCRATCH_CLOBBER "r10" +#define MULADDC_SCRATCH "RS .req r1 \n\t" +#define MULADDC_PRESERVE_SCRATCH "mov r10, r1 \n\t" +#define MULADDC_RESTORE_SCRATCH "mov r1, r10 \n\t" +#define MULADDC_SCRATCH_CLOBBER "r10" #else /* !defined(__OPTIMIZE__) && defined(__GNUC__) */ -#define MULADDC_SCRATCH "RS .req r7 \n\t" -#define MULADDC_PRESERVE_R1 "" -#define MULADDC_RESTORE_R1 "" -#define MULADDC_SCRATCH_CLOBBER "r7" +#define MULADDC_SCRATCH "RS .req r7 \n\t" +#define MULADDC_PRESERVE_SCRATCH "" +#define MULADDC_RESTORE_SCRATCH "" +#define MULADDC_SCRATCH_CLOBBER "r7" #endif /* !defined(__OPTIMIZE__) && defined(__GNUC__) */ #define MULADDC_X1_INIT \ @@ -710,7 +710,7 @@ #define MULADDC_X1_CORE \ - MULADDC_PRESERVE_R1 \ + MULADDC_PRESERVE_SCRATCH \ "ldmia r0!, {r6} \n\t" \ "lsr RS, r6, #16 \n\t" \ "lsl r6, r6, #16 \n\t" \ @@ -736,7 +736,7 @@ "lsl r3, RS, #16 \n\t" \ "add r4, r4, r3 \n\t" \ "adc r5, r2 \n\t" \ - MULADDC_RESTORE_R1 \ + MULADDC_RESTORE_SCRATCH \ "ldr r3, [r1] \n\t" \ "add r4, r4, r3 \n\t" \ "adc r2, r5 \n\t" \ From 9a676a7f98309654b3b38500a01b09414dae3abb Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Sun, 4 Jun 2023 20:42:28 -0400 Subject: [PATCH 24/24] Comment tidy-up Signed-off-by: Dave Rodgman --- library/bn_mul.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/library/bn_mul.h b/library/bn_mul.h index a7473d1a3..c5994f704 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -710,7 +710,7 @@ #define MULADDC_X1_CORE \ - MULADDC_PRESERVE_SCRATCH \ + MULADDC_PRESERVE_SCRATCH \ "ldmia r0!, {r6} \n\t" \ "lsr RS, r6, #16 \n\t" \ "lsl r6, r6, #16 \n\t" \ @@ -736,7 +736,7 @@ "lsl r3, RS, #16 \n\t" \ "add r4, r4, r3 \n\t" \ "adc r5, r2 \n\t" \ - MULADDC_RESTORE_SCRATCH \ + MULADDC_RESTORE_SCRATCH \ "ldr r3, [r1] \n\t" \ "add r4, r4, r3 \n\t" \ "adc r2, r5 \n\t" \ @@ -822,8 +822,7 @@ ); \ } -#else -/* Thumb 2 or Arm ISA, without DSP extensions */ +#else /* Thumb 2 or Arm ISA, without DSP extensions */ #define MULADDC_X1_INIT \ asm( \