stlib: Extract SDL_memcpy and SDL_memset to its own file respectively

This is done such that we can disable LTO for these 2 functions when
building with MSVC.

This is due to a limitation of Link Time Code Generation (LTCG).
Code generation might generate a new reference to memset after linking
has started. The LTCG must make assumptions about where memset is
defined which is normally the C runtime.
This commit is contained in:
Anonymous Maarten 2022-06-15 20:48:54 +02:00 committed by Sam Lantinga
parent a5d338bd2d
commit ae7446a959
8 changed files with 165 additions and 104 deletions

View file

@ -263,108 +263,6 @@ SDL_ScanFloat(const char *text, double *valuep)
}
#endif
void *
SDL_memset(SDL_OUT_BYTECAP(len) void *dst, int c, size_t len)
{
#if defined(HAVE_MEMSET)
return memset(dst, c, len);
#else
size_t left;
Uint32 *dstp4;
Uint8 *dstp1 = (Uint8 *) dst;
Uint8 value1;
Uint32 value4;
/* The value used in memset() is a byte, passed as an int */
c &= 0xff;
/* The destination pointer needs to be aligned on a 4-byte boundary to
* execute a 32-bit set. Set first bytes manually if needed until it is
* aligned. */
value1 = (Uint8)c;
while ((uintptr_t)dstp1 & 0x3) {
if (len--) {
*dstp1++ = value1;
} else {
return dst;
}
}
value4 = ((Uint32)c | ((Uint32)c << 8) | ((Uint32)c << 16) | ((Uint32)c << 24));
dstp4 = (Uint32 *) dstp1;
left = (len % 4);
len /= 4;
while (len--) {
*dstp4++ = value4;
}
dstp1 = (Uint8 *) dstp4;
switch (left) {
case 3:
*dstp1++ = value1;
case 2:
*dstp1++ = value1;
case 1:
*dstp1++ = value1;
}
return dst;
#endif /* HAVE_MEMSET */
}
void *
SDL_memcpy(SDL_OUT_BYTECAP(len) void *dst, SDL_IN_BYTECAP(len) const void *src, size_t len)
{
#ifdef __GNUC__
/* Presumably this is well tuned for speed.
On my machine this is twice as fast as the C code below.
*/
return __builtin_memcpy(dst, src, len);
#elif defined(HAVE_MEMCPY)
return memcpy(dst, src, len);
#elif defined(HAVE_BCOPY)
bcopy(src, dst, len);
return dst;
#else
/* GCC 4.9.0 with -O3 will generate movaps instructions with the loop
using Uint32* pointers, so we need to make sure the pointers are
aligned before we loop using them.
*/
if (((uintptr_t)src & 0x3) || ((uintptr_t)dst & 0x3)) {
/* Do an unaligned byte copy */
Uint8 *srcp1 = (Uint8 *)src;
Uint8 *dstp1 = (Uint8 *)dst;
while (len--) {
*dstp1++ = *srcp1++;
}
} else {
size_t left = (len % 4);
Uint32 *srcp4, *dstp4;
Uint8 *srcp1, *dstp1;
srcp4 = (Uint32 *) src;
dstp4 = (Uint32 *) dst;
len /= 4;
while (len--) {
*dstp4++ = *srcp4++;
}
srcp1 = (Uint8 *) srcp4;
dstp1 = (Uint8 *) dstp4;
switch (left) {
case 3:
*dstp1++ = *srcp1++;
case 2:
*dstp1++ = *srcp1++;
case 1:
*dstp1++ = *srcp1++;
}
}
return dst;
#endif /* __GNUC__ */
}
void *
SDL_memmove(SDL_OUT_BYTECAP(len) void *dst, SDL_IN_BYTECAP(len) const void *src, size_t len)
{