From c80c540be32e7b5872f2ef1af9cabd64af1c15c0 Mon Sep 17 00:00:00 2001 From: Isaac Aronson Date: Wed, 6 Sep 2023 17:17:09 -0500 Subject: [PATCH] Support older GCC and clang that lack some intrinsics --- src/video/SDL_blit_A_avx2.c | 2 ++ src/video/SDL_blit_A_sse4_1.c | 3 +++ src/video/SDL_blit_A_sse4_1.h | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/src/video/SDL_blit_A_avx2.c b/src/video/SDL_blit_A_avx2.c index d2a241874d..4f8d01b313 100644 --- a/src/video/SDL_blit_A_avx2.c +++ b/src/video/SDL_blit_A_avx2.c @@ -4,6 +4,8 @@ #ifdef SDL_AVX2_INTRINSICS +#define SDL_blit_A_avx2_c + #include "SDL_blit.h" #include "SDL_blit_A_sse4_1.h" diff --git a/src/video/SDL_blit_A_sse4_1.c b/src/video/SDL_blit_A_sse4_1.c index 2135c87001..b8bf6f4902 100644 --- a/src/video/SDL_blit_A_sse4_1.c +++ b/src/video/SDL_blit_A_sse4_1.c @@ -4,7 +4,10 @@ #ifdef SDL_SSE4_1_INTRINSICS +#define SDL_blit_A_sse4_1_c + #include "SDL_blit.h" +#include "SDL_blit_A_sse4_1.h" /** * Using the SSE4.1 instruction set, blit four pixels with alpha blending diff --git a/src/video/SDL_blit_A_sse4_1.h b/src/video/SDL_blit_A_sse4_1.h index f26196efc6..2850290fd5 100644 --- a/src/video/SDL_blit_A_sse4_1.h +++ b/src/video/SDL_blit_A_sse4_1.h @@ -12,4 +12,36 @@ void SDL_TARGETING("sse4.1") BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo *info); #endif +/* for compatibility with older compilers: */ +#if defined(SDL_blit_A_sse4_1_c) || defined(SDL_blit_A_avx2_c) +/* _mm_loadu_si64 : missing in clang < 3.9, missing in gcc < 9 + * _mm_storeu_si64: missing in clang < 8.0, missing in gcc < 9 + * __m128i_u type (to be used to define the missing two above): + * missing in gcc < 7, missing in clang < 9 + */ +#if defined(__clang__) +#if (__clang_major__ < 9) +#define MISSING__m128i_u +#endif +#if (__clang_major__ < 8) +#define MISSING__mm_storeu_si64 +#endif +#elif defined(__GNUC__) +#if (__GNUC__ < 7) +#define MISSING__m128i_u +#endif +#if (__GNUC__ < 9) +#define MISSING__mm_storeu_si64 +#endif +#endif + +#ifdef MISSING__m128i_u +typedef long long __m128i_u __attribute__((__vector_size__(16), __may_alias__, __aligned__(1))); +#endif +#ifdef MISSING__mm_storeu_si64 +#define _mm_loadu_si64(_x) _mm_loadl_epi64((__m128i_u*)(_x)) +#define _mm_storeu_si64(_x,_y) _mm_storel_epi64((__m128i_u*)(_x),(_y)) +#endif +#endif /**/ + #endif //SDL_SDL_BLIT_A_SSE4_1_H