diff --git a/src/video/SDL_blit_A_avx2.c b/src/video/SDL_blit_A_avx2.c index 66c9d01c16..78bdf9ecc7 100644 --- a/src/video/SDL_blit_A_avx2.c +++ b/src/video/SDL_blit_A_avx2.c @@ -9,7 +9,7 @@ #include "SDL_blit.h" #include "SDL_blit_A_sse4_1.h" -__m256i SDL_TARGETING("avx2") GetSDL_PixelFormatAlphaMask_AVX2(SDL_PixelFormat* dstfmt) { +__m256i SDL_TARGETING("avx2") GetSDL_PixelFormatAlphaMask_AVX2(const SDL_PixelFormat* dstfmt) { Uint8 index = dstfmt->Ashift / 4; /* Handle case where bad input sent */ if (dstfmt->Ashift == dstfmt->Bshift && dstfmt->Ashift == 0) { @@ -28,7 +28,7 @@ __m256i SDL_TARGETING("avx2") GetSDL_PixelFormatAlphaMask_AVX2(SDL_PixelFormat* * @param dst A pointer to four 32-bit pixels of ARGB format to retain visual data for while alpha blending * @return A 128-bit wide vector of four alpha-blended pixels in ARGB format */ -__m128i SDL_TARGETING("avx2") MixRGBA_AVX2(__m128i src, __m128i dst, __m256i alphaMask) { +__m128i SDL_TARGETING("avx2") MixRGBA_AVX2(const __m128i src, const __m128i dst, const __m256i alphaMask) { __m256i src_color = _mm256_cvtepu8_epi16(src); __m256i dst_color = _mm256_cvtepu8_epi16(dst); __m256i alpha = _mm256_shuffle_epi8(src_color, alphaMask); @@ -94,7 +94,7 @@ void SDL_TARGETING("avx2") BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info) if (remaining_pixels == 1) { Uint32 *src_ptr = ((Uint32*)(src + (offset * 4))); Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4))); - Uint32 pixel = AlignPixelToSDL_PixelFormat(*src_ptr, srcfmt); + Uint32 pixel = AlignPixelToSDL_PixelFormat(*src_ptr, srcfmt, dstfmt); /* Old GCC has bad or no _mm_loadu_si32 */ #if defined(__GNUC__) && (__GNUC__ < 11) __m128i c_src = _mm_set_epi32(0, 0, 0, pixel); diff --git a/src/video/SDL_blit_A_sse4_1.c b/src/video/SDL_blit_A_sse4_1.c index 5a779b01c2..7dfd669624 100644 --- a/src/video/SDL_blit_A_sse4_1.c +++ b/src/video/SDL_blit_A_sse4_1.c @@ -12,7 +12,7 @@ /** * A helper function to create an alpha mask for use with MixRGBA_SSE4_1 based on pixel format */ -__m128i SDL_TARGETING("sse4.1") GetSDL_PixelFormatAlphaMask_SSE4_1(SDL_PixelFormat* dstfmt) { +__m128i SDL_TARGETING("sse4.1") GetSDL_PixelFormatAlphaMask_SSE4_1(const SDL_PixelFormat* dstfmt) { Uint8 index = dstfmt->Ashift / 8; /* Handle case where bad input sent */ if (dstfmt->Ashift == dstfmt->Bshift && dstfmt->Ashift == 0) { @@ -29,7 +29,7 @@ __m128i SDL_TARGETING("sse4.1") GetSDL_PixelFormatAlphaMask_SSE4_1(SDL_PixelForm * @param dst A pointer to two 32-bit pixels of ARGB format to retain visual data for while alpha blending * @return A 128-bit wide vector of two alpha-blended pixels in ARGB format */ -__m128i SDL_TARGETING("sse4.1") MixRGBA_SSE4_1(__m128i src, __m128i dst, __m128i alphaMask) { +__m128i SDL_TARGETING("sse4.1") MixRGBA_SSE4_1(const __m128i src, const __m128i dst, const __m128i alphaMask) { __m128i src_color = _mm_cvtepu8_epi16(src); __m128i dst_color = _mm_cvtepu8_epi16(dst); /** @@ -47,34 +47,43 @@ __m128i SDL_TARGETING("sse4.1") MixRGBA_SSE4_1(__m128i src, __m128i dst, __m128i return _mm_add_epi8(reduced, dst); } -Uint32 AlignPixelToSDL_PixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat) { - Uint8 a = (color >> srcFormat->Ashift) & 0xFF; - Uint8 r = (color >> srcFormat->Rshift) & 0xFF; - Uint8 g = (color >> srcFormat->Gshift) & 0xFF; - Uint8 b = (color >> srcFormat->Bshift) & 0xFF; +Uint32 AlignPixelToSDL_PixelFormat(Uint32 color, const SDL_PixelFormat* srcfmt, const SDL_PixelFormat* dstfmt) { + Uint8 a = (color >> srcfmt->Ashift) & 0xFF; + Uint8 r = (color >> srcfmt->Rshift) & 0xFF; + Uint8 g = (color >> srcfmt->Gshift) & 0xFF; + Uint8 b = (color >> srcfmt->Bshift) & 0xFF; - return (a << 24) | (r << 16) | (g << 8) | b; + /* Handle case where bad input sent */ + Uint8 aShift = dstfmt->Ashift; + if (aShift == dstfmt->Bshift && aShift == 0) { + aShift = 24; + } + return (a << aShift) | + (r << dstfmt->Rshift) | + (g << dstfmt->Gshift) | + (b << dstfmt->Bshift); } /* * This helper function converts arbitrary pixel formats into a shuffle mask for _mm_shuffle_epi8 */ -__m128i SDL_TARGETING("sse4.1") GetSDL_PixelFormatShuffleMask(const SDL_PixelFormat* srcFormat, const SDL_PixelFormat* dstFormat) { +__m128i SDL_TARGETING("sse4.1") GetSDL_PixelFormatShuffleMask(const SDL_PixelFormat* srcfmt, + const SDL_PixelFormat* dstfmt) { /* Calculate shuffle indices based on the source and destination SDL_PixelFormat */ Uint8 shuffleIndices[16]; - Uint8 dstAshift = dstFormat->Ashift / 8; - Uint8 dstRshift = dstFormat->Rshift / 8; - Uint8 dstGshift = dstFormat->Gshift / 8; - Uint8 dstBshift = dstFormat->Bshift / 8; + Uint8 dstAshift = dstfmt->Ashift / 8; + Uint8 dstRshift = dstfmt->Rshift / 8; + Uint8 dstGshift = dstfmt->Gshift / 8; + Uint8 dstBshift = dstfmt->Bshift / 8; /* Handle case where bad input sent */ if (dstAshift == dstBshift && dstAshift == 0) { dstAshift = 3; } for (int i = 0; i < 4; ++i) { - shuffleIndices[dstAshift + i * 4] = srcFormat->Ashift / 8 + i * 4; - shuffleIndices[dstRshift + i * 4] = srcFormat->Rshift / 8 + i * 4; - shuffleIndices[dstGshift + i * 4] = srcFormat->Gshift / 8 + i * 4; - shuffleIndices[dstBshift + i * 4] = srcFormat->Bshift / 8 + i * 4; + shuffleIndices[dstAshift + i * 4] = srcfmt->Ashift / 8 + i * 4; + shuffleIndices[dstRshift + i * 4] = srcfmt->Rshift / 8 + i * 4; + shuffleIndices[dstGshift + i * 4] = srcfmt->Gshift / 8 + i * 4; + shuffleIndices[dstBshift + i * 4] = srcfmt->Bshift / 8 + i * 4; } /* Create shuffle mask based on the calculated indices */ @@ -135,7 +144,7 @@ void SDL_TARGETING("sse4.1") BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) { if (remaining_pixels == 1) { Uint32 *src_ptr = ((Uint32*)(src + (offset * 4))); Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4))); - Uint32 pixel = AlignPixelToSDL_PixelFormat(*src_ptr, srcfmt); + Uint32 pixel = AlignPixelToSDL_PixelFormat(*src_ptr, srcfmt, dstfmt); /* Old GCC has bad or no _mm_loadu_si32 */ #if defined(__GNUC__) && (__GNUC__ < 11) __m128i c_src = _mm_set_epi32(0, 0, 0, pixel); diff --git a/src/video/SDL_blit_A_sse4_1.h b/src/video/SDL_blit_A_sse4_1.h index ec7f8b7ae4..132120d051 100644 --- a/src/video/SDL_blit_A_sse4_1.h +++ b/src/video/SDL_blit_A_sse4_1.h @@ -2,11 +2,11 @@ #define SDL_SDL_BLIT_A_SSE4_1_H #ifdef SDL_SSE4_1_INTRINSICS -Uint32 AlignPixelToSDL_PixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat); +Uint32 AlignPixelToSDL_PixelFormat(Uint32 color, const SDL_PixelFormat* srcfmt, const SDL_PixelFormat* dstfmt); -__m128i SDL_TARGETING("sse4.1") GetSDL_PixelFormatAlphaMask_SSE4_1(SDL_PixelFormat* dstfmt); +__m128i SDL_TARGETING("sse4.1") GetSDL_PixelFormatAlphaMask_SSE4_1(const SDL_PixelFormat* dstfmt); -__m128i SDL_TARGETING("sse4.1") GetSDL_PixelFormatShuffleMask(const SDL_PixelFormat* srcFormat, const SDL_PixelFormat* dstFormat); +__m128i SDL_TARGETING("sse4.1") GetSDL_PixelFormatShuffleMask(const SDL_PixelFormat* srcfmt, const SDL_PixelFormat* dstfmt); __m128i SDL_TARGETING("sse4.1") MixRGBA_SSE4_1(__m128i src, __m128i dst, __m128i alphaMask);