mirror of
https://github.com/libsdl-org/SDL.git
synced 2025-05-16 01:38:27 +00:00
stlib: Extract SDL_memcpy and SDL_memset to its own file respectively
This is done such that we can disable LTO for these 2 functions when building with MSVC. This is due to a limitation of Link Time Code Generation (LTCG). Code generation might generate a new reference to memset after linking has started. The LTCG must make assumptions about where memset is defined which is normally the C runtime.
This commit is contained in:
parent
a5d338bd2d
commit
ae7446a959
8 changed files with 165 additions and 104 deletions
|
@ -263,108 +263,6 @@ SDL_ScanFloat(const char *text, double *valuep)
|
|||
}
|
||||
#endif
|
||||
|
||||
void *
|
||||
SDL_memset(SDL_OUT_BYTECAP(len) void *dst, int c, size_t len)
|
||||
{
|
||||
#if defined(HAVE_MEMSET)
|
||||
return memset(dst, c, len);
|
||||
#else
|
||||
size_t left;
|
||||
Uint32 *dstp4;
|
||||
Uint8 *dstp1 = (Uint8 *) dst;
|
||||
Uint8 value1;
|
||||
Uint32 value4;
|
||||
|
||||
/* The value used in memset() is a byte, passed as an int */
|
||||
c &= 0xff;
|
||||
|
||||
/* The destination pointer needs to be aligned on a 4-byte boundary to
|
||||
* execute a 32-bit set. Set first bytes manually if needed until it is
|
||||
* aligned. */
|
||||
value1 = (Uint8)c;
|
||||
while ((uintptr_t)dstp1 & 0x3) {
|
||||
if (len--) {
|
||||
*dstp1++ = value1;
|
||||
} else {
|
||||
return dst;
|
||||
}
|
||||
}
|
||||
|
||||
value4 = ((Uint32)c | ((Uint32)c << 8) | ((Uint32)c << 16) | ((Uint32)c << 24));
|
||||
dstp4 = (Uint32 *) dstp1;
|
||||
left = (len % 4);
|
||||
len /= 4;
|
||||
while (len--) {
|
||||
*dstp4++ = value4;
|
||||
}
|
||||
|
||||
dstp1 = (Uint8 *) dstp4;
|
||||
switch (left) {
|
||||
case 3:
|
||||
*dstp1++ = value1;
|
||||
case 2:
|
||||
*dstp1++ = value1;
|
||||
case 1:
|
||||
*dstp1++ = value1;
|
||||
}
|
||||
|
||||
return dst;
|
||||
#endif /* HAVE_MEMSET */
|
||||
}
|
||||
|
||||
void *
|
||||
SDL_memcpy(SDL_OUT_BYTECAP(len) void *dst, SDL_IN_BYTECAP(len) const void *src, size_t len)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
/* Presumably this is well tuned for speed.
|
||||
On my machine this is twice as fast as the C code below.
|
||||
*/
|
||||
return __builtin_memcpy(dst, src, len);
|
||||
#elif defined(HAVE_MEMCPY)
|
||||
return memcpy(dst, src, len);
|
||||
#elif defined(HAVE_BCOPY)
|
||||
bcopy(src, dst, len);
|
||||
return dst;
|
||||
#else
|
||||
/* GCC 4.9.0 with -O3 will generate movaps instructions with the loop
|
||||
using Uint32* pointers, so we need to make sure the pointers are
|
||||
aligned before we loop using them.
|
||||
*/
|
||||
if (((uintptr_t)src & 0x3) || ((uintptr_t)dst & 0x3)) {
|
||||
/* Do an unaligned byte copy */
|
||||
Uint8 *srcp1 = (Uint8 *)src;
|
||||
Uint8 *dstp1 = (Uint8 *)dst;
|
||||
|
||||
while (len--) {
|
||||
*dstp1++ = *srcp1++;
|
||||
}
|
||||
} else {
|
||||
size_t left = (len % 4);
|
||||
Uint32 *srcp4, *dstp4;
|
||||
Uint8 *srcp1, *dstp1;
|
||||
|
||||
srcp4 = (Uint32 *) src;
|
||||
dstp4 = (Uint32 *) dst;
|
||||
len /= 4;
|
||||
while (len--) {
|
||||
*dstp4++ = *srcp4++;
|
||||
}
|
||||
|
||||
srcp1 = (Uint8 *) srcp4;
|
||||
dstp1 = (Uint8 *) dstp4;
|
||||
switch (left) {
|
||||
case 3:
|
||||
*dstp1++ = *srcp1++;
|
||||
case 2:
|
||||
*dstp1++ = *srcp1++;
|
||||
case 1:
|
||||
*dstp1++ = *srcp1++;
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
#endif /* __GNUC__ */
|
||||
}
|
||||
|
||||
void *
|
||||
SDL_memmove(SDL_OUT_BYTECAP(len) void *dst, SDL_IN_BYTECAP(len) const void *src, size_t len)
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue