mirror of
https://github.com/libsdl-org/SDL.git
synced 2025-05-19 19:28:28 +00:00
stdinc: Drastically improve SDL_StepUTF8() and make it a public API.
Fixes #10105.
This commit is contained in:
parent
9b8c5f642f
commit
a9cfcf6bde
8 changed files with 211 additions and 58 deletions
|
@ -1250,6 +1250,67 @@ extern SDL_DECLSPEC int SDLCALL SDL_strcasecmp(const char *str1, const char *str
|
||||||
*/
|
*/
|
||||||
extern SDL_DECLSPEC int SDLCALL SDL_strncasecmp(const char *str1, const char *str2, size_t maxlen);
|
extern SDL_DECLSPEC int SDLCALL SDL_strncasecmp(const char *str1, const char *str2, size_t maxlen);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Unicode REPLACEMENT CHARACTER codepoint.
|
||||||
|
*
|
||||||
|
* SDL_StepUTF8() reports this codepoint when it encounters a UTF-8 string
|
||||||
|
* with encoding errors.
|
||||||
|
*
|
||||||
|
* This tends to render as something like a question mark in most places.
|
||||||
|
*
|
||||||
|
* \since This macro is available since SDL 3.0.0.
|
||||||
|
*
|
||||||
|
* \sa SDL_StepUTF8
|
||||||
|
*/
|
||||||
|
#define SDL_INVALID_UNICODE_CODEPOINT 0xFFFD
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decode a UTF-8 string, one Unicode codepoint at a time.
|
||||||
|
*
|
||||||
|
* This will return the first Unicode codepoint in the UTF-8 encoded
|
||||||
|
* string in `*pstr`, and then advance `*pstr` past any consumed bytes
|
||||||
|
* before returning.
|
||||||
|
*
|
||||||
|
* It will not access more than `*pslen` bytes from the string.
|
||||||
|
* `*pslen` will be adjusted, as well, subtracting the number of
|
||||||
|
* bytes consumed.
|
||||||
|
*
|
||||||
|
* `pslen` is allowed to be NULL, in which case the string _must_ be
|
||||||
|
* NULL-terminated, as the function will blindly read until it sees
|
||||||
|
* the NULL char.
|
||||||
|
*
|
||||||
|
* if `*pslen` is zero, it assumes the end of string is reached and
|
||||||
|
* returns a zero codepoint regardless of the contents of the string
|
||||||
|
* buffer.
|
||||||
|
*
|
||||||
|
* If the resulting codepoint is zero (a NULL terminator), or `*pslen`
|
||||||
|
* is zero, it will not advance `*pstr` or `*pslen` at all.
|
||||||
|
*
|
||||||
|
* Generally this function is called in a loop until it returns zero,
|
||||||
|
* adjusting its parameters each iteration.
|
||||||
|
*
|
||||||
|
* If an invalid UTF-8 sequence is encountered, this function returns
|
||||||
|
* SDL_INVALID_UNICODE_CODEPOINT and advances the string/length by one
|
||||||
|
* byte (which is to say, a multibyte sequence might produce several
|
||||||
|
* SDL_INVALID_UNICODE_CODEPOINT returns before it syncs to the next
|
||||||
|
* valid UTF-8 sequence).
|
||||||
|
*
|
||||||
|
* Several things can generate invalid UTF-8 sequences, including
|
||||||
|
* overlong encodings, the use of UTF-16 surrogate values, and
|
||||||
|
* truncated data. Please refer to
|
||||||
|
* [RFC3629](https://www.ietf.org/rfc/rfc3629.txt) for details.
|
||||||
|
*
|
||||||
|
* \param pstr a pointer to a UTF-8 string pointer to be read and adjusted.
|
||||||
|
* \param pslen a pointer to the number of bytes in the string, to be read
|
||||||
|
* and adjusted. NULL is allowed.
|
||||||
|
* \returns the first Unicode codepoint in the string.
|
||||||
|
*
|
||||||
|
* \threadsafety It is safe to call this function from any thread.
|
||||||
|
*
|
||||||
|
* \since This function is available since SDL 3.0.0.
|
||||||
|
*/
|
||||||
|
extern SDL_DECLSPEC Uint32 SDLCALL SDL_StepUTF8(const char **pstr, size_t *pslen);
|
||||||
|
|
||||||
extern SDL_DECLSPEC int SDLCALL SDL_sscanf(const char *text, SDL_SCANF_FORMAT_STRING const char *fmt, ...) SDL_SCANF_VARARG_FUNC(2);
|
extern SDL_DECLSPEC int SDLCALL SDL_sscanf(const char *text, SDL_SCANF_FORMAT_STRING const char *fmt, ...) SDL_SCANF_VARARG_FUNC(2);
|
||||||
extern SDL_DECLSPEC int SDLCALL SDL_vsscanf(const char *text, SDL_SCANF_FORMAT_STRING const char *fmt, va_list ap) SDL_SCANF_VARARG_FUNCV(2);
|
extern SDL_DECLSPEC int SDLCALL SDL_vsscanf(const char *text, SDL_SCANF_FORMAT_STRING const char *fmt, va_list ap) SDL_SCANF_VARARG_FUNCV(2);
|
||||||
extern SDL_DECLSPEC int SDLCALL SDL_snprintf(SDL_OUT_Z_CAP(maxlen) char *text, size_t maxlen, SDL_PRINTF_FORMAT_STRING const char *fmt, ... ) SDL_PRINTF_VARARG_FUNC(3);
|
extern SDL_DECLSPEC int SDLCALL SDL_snprintf(SDL_OUT_Z_CAP(maxlen) char *text, size_t maxlen, SDL_PRINTF_FORMAT_STRING const char *fmt, ... ) SDL_PRINTF_VARARG_FUNC(3);
|
||||||
|
|
|
@ -788,6 +788,7 @@ SDL3_0.0.0 {
|
||||||
SDL_SignalCondition;
|
SDL_SignalCondition;
|
||||||
SDL_SoftStretch;
|
SDL_SoftStretch;
|
||||||
SDL_StartTextInput;
|
SDL_StartTextInput;
|
||||||
|
SDL_StepUTF8;
|
||||||
SDL_StopHapticEffect;
|
SDL_StopHapticEffect;
|
||||||
SDL_StopHapticEffects;
|
SDL_StopHapticEffects;
|
||||||
SDL_StopHapticRumble;
|
SDL_StopHapticRumble;
|
||||||
|
|
|
@ -813,6 +813,7 @@
|
||||||
#define SDL_SignalCondition SDL_SignalCondition_REAL
|
#define SDL_SignalCondition SDL_SignalCondition_REAL
|
||||||
#define SDL_SoftStretch SDL_SoftStretch_REAL
|
#define SDL_SoftStretch SDL_SoftStretch_REAL
|
||||||
#define SDL_StartTextInput SDL_StartTextInput_REAL
|
#define SDL_StartTextInput SDL_StartTextInput_REAL
|
||||||
|
#define SDL_StepUTF8 SDL_StepUTF8_REAL
|
||||||
#define SDL_StopHapticEffect SDL_StopHapticEffect_REAL
|
#define SDL_StopHapticEffect SDL_StopHapticEffect_REAL
|
||||||
#define SDL_StopHapticEffects SDL_StopHapticEffects_REAL
|
#define SDL_StopHapticEffects SDL_StopHapticEffects_REAL
|
||||||
#define SDL_StopHapticRumble SDL_StopHapticRumble_REAL
|
#define SDL_StopHapticRumble SDL_StopHapticRumble_REAL
|
||||||
|
|
|
@ -823,6 +823,7 @@ SDL_DYNAPI_PROC(int,SDL_ShowWindowSystemMenu,(SDL_Window *a, int b, int c),(a,b,
|
||||||
SDL_DYNAPI_PROC(int,SDL_SignalCondition,(SDL_Condition *a),(a),return)
|
SDL_DYNAPI_PROC(int,SDL_SignalCondition,(SDL_Condition *a),(a),return)
|
||||||
SDL_DYNAPI_PROC(int,SDL_SoftStretch,(SDL_Surface *a, const SDL_Rect *b, SDL_Surface *c, const SDL_Rect *d, SDL_ScaleMode e),(a,b,c,d,e),return)
|
SDL_DYNAPI_PROC(int,SDL_SoftStretch,(SDL_Surface *a, const SDL_Rect *b, SDL_Surface *c, const SDL_Rect *d, SDL_ScaleMode e),(a,b,c,d,e),return)
|
||||||
SDL_DYNAPI_PROC(int,SDL_StartTextInput,(SDL_Window *a),(a),return)
|
SDL_DYNAPI_PROC(int,SDL_StartTextInput,(SDL_Window *a),(a),return)
|
||||||
|
SDL_DYNAPI_PROC(Uint32,SDL_StepUTF8,(const char **a, size_t *b),(a,b),return)
|
||||||
SDL_DYNAPI_PROC(int,SDL_StopHapticEffect,(SDL_Haptic *a, int b),(a,b),return)
|
SDL_DYNAPI_PROC(int,SDL_StopHapticEffect,(SDL_Haptic *a, int b),(a,b),return)
|
||||||
SDL_DYNAPI_PROC(int,SDL_StopHapticEffects,(SDL_Haptic *a),(a),return)
|
SDL_DYNAPI_PROC(int,SDL_StopHapticEffects,(SDL_Haptic *a),(a),return)
|
||||||
SDL_DYNAPI_PROC(int,SDL_StopHapticRumble,(SDL_Haptic *a),(a),return)
|
SDL_DYNAPI_PROC(int,SDL_StopHapticRumble,(SDL_Haptic *a),(a),return)
|
||||||
|
|
|
@ -185,7 +185,7 @@ static char *CaseFoldUtf8String(const char *fname)
|
||||||
Uint32 codepoint;
|
Uint32 codepoint;
|
||||||
char *ptr = retval;
|
char *ptr = retval;
|
||||||
size_t remaining = allocation;
|
size_t remaining = allocation;
|
||||||
while ((codepoint = SDL_StepUTF8(&fname, 4)) != 0) {
|
while ((codepoint = SDL_StepUTF8(&fname, NULL)) != 0) {
|
||||||
Uint32 folded[3];
|
Uint32 folded[3];
|
||||||
const int num_folded = SDL_CaseFoldUnicode(codepoint, folded);
|
const int num_folded = SDL_CaseFoldUnicode(codepoint, folded);
|
||||||
SDL_assert(num_folded > 0);
|
SDL_assert(num_folded > 0);
|
||||||
|
|
|
@ -32,9 +32,6 @@
|
||||||
|
|
||||||
#include "SDL_casefolding.h"
|
#include "SDL_casefolding.h"
|
||||||
|
|
||||||
// this is the Unicode REPLACEMENT CHARACTER, used for invalid codepoint values.
|
|
||||||
#define INVALID_UNICODE_CODEPOINT 0xFFFD
|
|
||||||
|
|
||||||
#if defined(__SIZEOF_WCHAR_T__)
|
#if defined(__SIZEOF_WCHAR_T__)
|
||||||
#define SDL_SIZEOF_WCHAR_T __SIZEOF_WCHAR_T__
|
#define SDL_SIZEOF_WCHAR_T __SIZEOF_WCHAR_T__
|
||||||
#elif defined(SDL_PLATFORM_WINDOWS)
|
#elif defined(SDL_PLATFORM_WINDOWS)
|
||||||
|
@ -129,7 +126,7 @@ int SDL_CaseFoldUnicode(const Uint32 from, Uint32 *to)
|
||||||
cp1 = folded1[tail1++]; \
|
cp1 = folded1[tail1++]; \
|
||||||
} else { \
|
} else { \
|
||||||
const Uint##bits *str1start = (const Uint##bits *) str1; \
|
const Uint##bits *str1start = (const Uint##bits *) str1; \
|
||||||
head1 = SDL_CaseFoldUnicode(SDL_StepUTF##bits(&str1, slen1), folded1); \
|
head1 = SDL_CaseFoldUnicode(StepUTF##bits(&str1, slen1), folded1); \
|
||||||
update_slen1; \
|
update_slen1; \
|
||||||
cp1 = folded1[0]; \
|
cp1 = folded1[0]; \
|
||||||
tail1 = 1; \
|
tail1 = 1; \
|
||||||
|
@ -138,7 +135,7 @@ int SDL_CaseFoldUnicode(const Uint32 from, Uint32 *to)
|
||||||
cp2 = folded2[tail2++]; \
|
cp2 = folded2[tail2++]; \
|
||||||
} else { \
|
} else { \
|
||||||
const Uint##bits *str2start = (const Uint##bits *) str2; \
|
const Uint##bits *str2start = (const Uint##bits *) str2; \
|
||||||
head2 = SDL_CaseFoldUnicode(SDL_StepUTF##bits(&str2, slen2), folded2); \
|
head2 = SDL_CaseFoldUnicode(StepUTF##bits(&str2, slen2), folded2); \
|
||||||
update_slen2; \
|
update_slen2; \
|
||||||
cp2 = folded2[0]; \
|
cp2 = folded2[0]; \
|
||||||
tail2 = 1; \
|
tail2 = 1; \
|
||||||
|
@ -154,12 +151,23 @@ int SDL_CaseFoldUnicode(const Uint32 from, Uint32 *to)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
Uint32 SDL_StepUTF8(const char **_str, const size_t slen)
|
static Uint32 StepUTF8(const char **_str, const size_t slen)
|
||||||
{
|
{
|
||||||
const char *str = *_str;
|
/*
|
||||||
const Uint32 octet = (Uint32) (slen ? ((Uint8) *str) : 0);
|
* From rfc3629, the UTF-8 spec:
|
||||||
|
* https://www.ietf.org/rfc/rfc3629.txt
|
||||||
|
*
|
||||||
|
* Char. number range | UTF-8 octet sequence
|
||||||
|
* (hexadecimal) | (binary)
|
||||||
|
* --------------------+---------------------------------------------
|
||||||
|
* 0000 0000-0000 007F | 0xxxxxxx
|
||||||
|
* 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
|
||||||
|
* 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
|
||||||
|
* 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||||
|
*/
|
||||||
|
|
||||||
// !!! FIXME: this could have _way_ more error checking! Illegal surrogate codepoints, unexpected bit patterns, etc.
|
const Uint8 *str = (const Uint8 *) *_str;
|
||||||
|
const Uint32 octet = (Uint32) (slen ? *str : 0);
|
||||||
|
|
||||||
if (octet == 0) { // null terminator, end of string.
|
if (octet == 0) { // null terminator, end of string.
|
||||||
return 0; // don't advance `*_str`.
|
return 0; // don't advance `*_str`.
|
||||||
|
@ -167,41 +175,73 @@ Uint32 SDL_StepUTF8(const char **_str, const size_t slen)
|
||||||
(*_str)++;
|
(*_str)++;
|
||||||
return octet;
|
return octet;
|
||||||
} else if (((octet & 0xE0) == 0xC0) && (slen >= 2)) { // 110xxxxx 10xxxxxx: two byte codepoint.
|
} else if (((octet & 0xE0) == 0xC0) && (slen >= 2)) { // 110xxxxx 10xxxxxx: two byte codepoint.
|
||||||
if (slen >= 2) {
|
const Uint8 str1 = str[1];
|
||||||
*_str += 2;
|
if ((str1 & 0xC0) == 0x80) { // If trailing bytes aren't 10xxxxxx, sequence is bogus.
|
||||||
return ((octet & 0x1F) << 6) | (((Uint8) str[1]) & 0x3F);
|
const Uint32 retval = ((octet & 0x1F) << 6) | (str1 & 0x3F);
|
||||||
|
if (retval >= 0x0080) { // rfc3629 says you can't use overlong sequences for smaller values.
|
||||||
|
*_str += 2;
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (((octet & 0xF0) == 0xE0) && (slen >= 3)) { // 1110xxxx 10xxxxxx 10xxxxxx: three byte codepoint.
|
} else if (((octet & 0xF0) == 0xE0) && (slen >= 3)) { // 1110xxxx 10xxxxxx 10xxxxxx: three byte codepoint.
|
||||||
*_str += 3;
|
const Uint8 str1 = str[1];
|
||||||
const Uint32 octet2 = ((Uint32) (((Uint8) str[1]) & 0x1F)) << 6;
|
const Uint8 str2 = str[2];
|
||||||
const Uint32 octet3 = (Uint32) (((Uint8) str[2]) & 0x3F);
|
if (((str1 & 0xC0) == 0x80) && ((str2 & 0xC0) == 0x80)) { // If trailing bytes aren't 10xxxxxx, sequence is bogus.
|
||||||
return ((octet & 0x0F) << 12) | octet2 | octet3;
|
const Uint32 octet2 = ((Uint32) (str1 & 0x3F)) << 6;
|
||||||
|
const Uint32 octet3 = ((Uint32) (str2 & 0x3F));
|
||||||
|
const Uint32 retval = ((octet & 0x0F) << 12) | octet2 | octet3;
|
||||||
|
if (retval >= 0x800) { // rfc3629 says you can't use overlong sequences for smaller values.
|
||||||
|
if ((retval < 0xD800) || (retval > 0xDFFF)) { // UTF-16 surrogate values are illegal in UTF-8.
|
||||||
|
*_str += 3;
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} else if (((octet & 0xF8) == 0xF0) && (slen >= 4)) { // 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx: four byte codepoint.
|
} else if (((octet & 0xF8) == 0xF0) && (slen >= 4)) { // 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx: four byte codepoint.
|
||||||
*_str += 4;
|
const Uint8 str1 = str[1];
|
||||||
const Uint32 octet2 = ((Uint32) (((Uint8) str[1]) & 0x1F)) << 12;
|
const Uint8 str2 = str[2];
|
||||||
const Uint32 octet3 = ((Uint32) (((Uint8) str[2]) & 0x3F)) << 6;
|
const Uint8 str3 = str[3];
|
||||||
const Uint32 octet4 = (Uint32) (((Uint8) str[3]) & 0x3F);
|
if (((str1 & 0xC0) == 0x80) && ((str2 & 0xC0) == 0x80) && ((str3 & 0xC0) == 0x80)) { // If trailing bytes aren't 10xxxxxx, sequence is bogus.
|
||||||
return ((octet & 0x07) << 18) | octet2 | octet3 | octet4;
|
const Uint32 octet2 = ((Uint32) (str1 & 0x1F)) << 12;
|
||||||
|
const Uint32 octet3 = ((Uint32) (str2 & 0x3F)) << 6;
|
||||||
|
const Uint32 octet4 = ((Uint32) (str3 & 0x3F));
|
||||||
|
const Uint32 retval = ((octet & 0x07) << 18) | octet2 | octet3 | octet4;
|
||||||
|
if (retval >= 0x10000) { // rfc3629 says you can't use overlong sequences for smaller values.
|
||||||
|
*_str += 4;
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// bogus byte, skip ahead, return a REPLACEMENT CHARACTER.
|
// bogus byte, skip ahead, return a REPLACEMENT CHARACTER.
|
||||||
(*_str)++;
|
(*_str)++;
|
||||||
return INVALID_UNICODE_CODEPOINT;
|
return SDL_INVALID_UNICODE_CODEPOINT;
|
||||||
|
}
|
||||||
|
|
||||||
|
Uint32 SDL_StepUTF8(const char **pstr, size_t *pslen)
|
||||||
|
{
|
||||||
|
if (!pslen) {
|
||||||
|
return StepUTF8(pstr, 4); // 4 == max codepoint size.
|
||||||
|
}
|
||||||
|
const char *origstr = *pstr;
|
||||||
|
const Uint32 retval = StepUTF8(pstr, *pslen);
|
||||||
|
*pslen -= (size_t) (*pstr - origstr);
|
||||||
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (SDL_SIZEOF_WCHAR_T == 2)
|
#if (SDL_SIZEOF_WCHAR_T == 2)
|
||||||
static Uint32 SDL_StepUTF16(const Uint16 **_str, const size_t slen)
|
static Uint32 StepUTF16(const Uint16 **_str, const size_t slen)
|
||||||
{
|
{
|
||||||
const Uint16 *str = *_str;
|
const Uint16 *str = *_str;
|
||||||
Uint32 cp = (Uint32) *(str++);
|
Uint32 cp = (Uint32) *(str++);
|
||||||
if (cp == 0) {
|
if (cp == 0) {
|
||||||
return 0; // don't advance string pointer.
|
return 0; // don't advance string pointer.
|
||||||
} else if ((cp >= 0xDC00) && (cp <= 0xDFFF)) {
|
} else if ((cp >= 0xDC00) && (cp <= 0xDFFF)) {
|
||||||
cp = INVALID_UNICODE_CODEPOINT; // Orphaned second half of surrogate pair
|
cp = SDL_INVALID_UNICODE_CODEPOINT; // Orphaned second half of surrogate pair
|
||||||
} else if ((cp >= 0xD800) && (cp <= 0xDBFF)) { // start of surrogate pair!
|
} else if ((cp >= 0xD800) && (cp <= 0xDBFF)) { // start of surrogate pair!
|
||||||
const Uint32 pair = (Uint32) *str;
|
const Uint32 pair = (Uint32) *str;
|
||||||
if ((pair == 0) || ((pair < 0xDC00) || (pair > 0xDFFF))) {
|
if ((pair == 0) || ((pair < 0xDC00) || (pair > 0xDFFF))) {
|
||||||
cp = INVALID_UNICODE_CODEPOINT;
|
cp = SDL_INVALID_UNICODE_CODEPOINT;
|
||||||
} else {
|
} else {
|
||||||
str++; // eat the other surrogate.
|
str++; // eat the other surrogate.
|
||||||
cp = 0x10000 + (((cp - 0xD800) << 10) | (pair - 0xDC00));
|
cp = 0x10000 + (((cp - 0xD800) << 10) | (pair - 0xDC00));
|
||||||
|
@ -209,10 +249,10 @@ static Uint32 SDL_StepUTF16(const Uint16 **_str, const size_t slen)
|
||||||
}
|
}
|
||||||
|
|
||||||
*_str = str;
|
*_str = str;
|
||||||
return (cp > 0x10FFFF) ? INVALID_UNICODE_CODEPOINT : cp;
|
return (cp > 0x10FFFF) ? SDL_INVALID_UNICODE_CODEPOINT : cp;
|
||||||
}
|
}
|
||||||
#elif (SDL_SIZEOF_WCHAR_T == 4)
|
#elif (SDL_SIZEOF_WCHAR_T == 4)
|
||||||
static Uint32 SDL_StepUTF32(const Uint32 **_str, const size_t slen)
|
static Uint32 StepUTF32(const Uint32 **_str, const size_t slen)
|
||||||
{
|
{
|
||||||
if (!slen) {
|
if (!slen) {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -225,7 +265,7 @@ static Uint32 SDL_StepUTF32(const Uint32 **_str, const size_t slen)
|
||||||
}
|
}
|
||||||
|
|
||||||
(*_str)++;
|
(*_str)++;
|
||||||
return (cp > 0x10FFFF) ? INVALID_UNICODE_CODEPOINT : cp;
|
return (cp > 0x10FFFF) ? SDL_INVALID_UNICODE_CODEPOINT : cp;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -816,7 +856,7 @@ size_t SDL_utf8strlcpy(SDL_OUT_Z_CAP(dst_bytes) char *dst, const char *src, size
|
||||||
size_t SDL_utf8strlen(const char *str)
|
size_t SDL_utf8strlen(const char *str)
|
||||||
{
|
{
|
||||||
size_t retval = 0;
|
size_t retval = 0;
|
||||||
while (SDL_StepUTF8(&str, 4)) {
|
while (SDL_StepUTF8(&str, NULL)) {
|
||||||
retval++;
|
retval++;
|
||||||
}
|
}
|
||||||
return retval;
|
return retval;
|
||||||
|
@ -825,14 +865,9 @@ size_t SDL_utf8strlen(const char *str)
|
||||||
size_t SDL_utf8strnlen(const char *str, size_t bytes)
|
size_t SDL_utf8strnlen(const char *str, size_t bytes)
|
||||||
{
|
{
|
||||||
size_t retval = 0;
|
size_t retval = 0;
|
||||||
const char *strstart = str;
|
while (SDL_StepUTF8(&str, &bytes)) {
|
||||||
|
|
||||||
while (SDL_StepUTF8(&str, bytes)) {
|
|
||||||
bytes -= (size_t) (str - strstart);
|
|
||||||
strstart = str;
|
|
||||||
retval++;
|
retval++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -983,7 +1018,7 @@ char *SDL_strcasestr(const char *haystack, const char *needle)
|
||||||
if (SDL_strncasecmp(haystack, needle, length) == 0) {
|
if (SDL_strncasecmp(haystack, needle, length) == 0) {
|
||||||
return (char *)haystack;
|
return (char *)haystack;
|
||||||
}
|
}
|
||||||
} while (SDL_StepUTF8(&haystack, 4)); // move ahead by a full codepoint at a time, regardless of bytes.
|
} while (SDL_StepUTF8(&haystack, NULL)); // move ahead by a full codepoint at a time, regardless of bytes.
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,8 +25,6 @@
|
||||||
// most things you might need internally in here are public APIs, this is
|
// most things you might need internally in here are public APIs, this is
|
||||||
// just a few special pieces right now.
|
// just a few special pieces right now.
|
||||||
|
|
||||||
Uint32 SDL_StepUTF8(const char **_str, const size_t slen);
|
|
||||||
|
|
||||||
// this expects `from` to be a Unicode codepoint, and `to` to point to AT LEAST THREE Uint32s.
|
// this expects `from` to be a Unicode codepoint, and `to` to point to AT LEAST THREE Uint32s.
|
||||||
int SDL_CaseFoldUnicode(const Uint32 from, Uint32 *to);
|
int SDL_CaseFoldUnicode(const Uint32 from, Uint32 *to);
|
||||||
|
|
||||||
|
|
|
@ -10,13 +10,6 @@
|
||||||
freely.
|
freely.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* quiet windows compiler warnings */
|
|
||||||
#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS)
|
|
||||||
#define _CRT_SECURE_NO_WARNINGS
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#include <SDL3/SDL.h>
|
#include <SDL3/SDL.h>
|
||||||
#include <SDL3/SDL_main.h>
|
#include <SDL3/SDL_main.h>
|
||||||
#include <SDL3/SDL_test.h>
|
#include <SDL3/SDL_test.h>
|
||||||
|
@ -33,6 +26,34 @@ widelen(char *data)
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static char *get_next_line(Uint8 **fdataptr, size_t *fdatalen)
|
||||||
|
{
|
||||||
|
char *retval = (char *) *fdataptr;
|
||||||
|
Uint8 *ptr = *fdataptr;
|
||||||
|
size_t len = *fdatalen;
|
||||||
|
|
||||||
|
if (len == 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (len > 0) {
|
||||||
|
if (*ptr == '\r') {
|
||||||
|
*ptr = '\0';
|
||||||
|
} else if (*ptr == '\n') {
|
||||||
|
*ptr = '\0';
|
||||||
|
ptr++;
|
||||||
|
len--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ptr++;
|
||||||
|
len--;
|
||||||
|
}
|
||||||
|
|
||||||
|
*fdataptr = ptr;
|
||||||
|
*fdatalen = len;
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
const char *formats[] = {
|
const char *formats[] = {
|
||||||
|
@ -51,13 +72,15 @@ int main(int argc, char *argv[])
|
||||||
};
|
};
|
||||||
|
|
||||||
char *fname = NULL;
|
char *fname = NULL;
|
||||||
char buffer[BUFSIZ];
|
|
||||||
char *ucs4;
|
char *ucs4;
|
||||||
char *test[2];
|
char *test[2];
|
||||||
int i;
|
int i;
|
||||||
FILE *file;
|
|
||||||
int errors = 0;
|
int errors = 0;
|
||||||
SDLTest_CommonState *state;
|
SDLTest_CommonState *state;
|
||||||
|
Uint8 *fdata = NULL;
|
||||||
|
Uint8 *fdataptr = NULL;
|
||||||
|
char *line = NULL;
|
||||||
|
size_t fdatalen = 0;
|
||||||
|
|
||||||
/* Initialize test framework */
|
/* Initialize test framework */
|
||||||
state = SDLTest_CommonCreateState(argv, 0);
|
state = SDLTest_CommonCreateState(argv, 0);
|
||||||
|
@ -89,20 +112,19 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
fname = GetResourceFilename(fname, "utf8.txt");
|
fname = GetResourceFilename(fname, "utf8.txt");
|
||||||
file = fopen(fname, "rb");
|
fdata = (Uint8 *) (fname ? SDL_LoadFile(fname, &fdatalen) : NULL);
|
||||||
if (!file) {
|
if (!fdata) {
|
||||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Unable to open %s\n", fname);
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Unable to load %s\n", fname);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
SDL_free(fname);
|
|
||||||
|
|
||||||
while (fgets(buffer, sizeof(buffer), file)) {
|
fdataptr = fdata;
|
||||||
|
while ((line = get_next_line(&fdataptr, &fdatalen)) != NULL) {
|
||||||
/* Convert to UCS-4 */
|
/* Convert to UCS-4 */
|
||||||
size_t len;
|
size_t len;
|
||||||
ucs4 =
|
ucs4 = SDL_iconv_string("UCS-4", "UTF-8", line, SDL_strlen(line) + 1);
|
||||||
SDL_iconv_string("UCS-4", "UTF-8", buffer,
|
|
||||||
SDL_strlen(buffer) + 1);
|
|
||||||
len = (widelen(ucs4) + 1) * 4;
|
len = (widelen(ucs4) + 1) * 4;
|
||||||
|
|
||||||
for (i = 0; i < SDL_arraysize(formats); ++i) {
|
for (i = 0; i < SDL_arraysize(formats); ++i) {
|
||||||
test[0] = SDL_iconv_string(formats[i], "UCS-4", ucs4, len);
|
test[0] = SDL_iconv_string(formats[i], "UCS-4", ucs4, len);
|
||||||
test[1] = SDL_iconv_string("UCS-4", formats[i], test[0], len);
|
test[1] = SDL_iconv_string("UCS-4", formats[i], test[0], len);
|
||||||
|
@ -115,10 +137,44 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
test[0] = SDL_iconv_string("UTF-8", "UCS-4", ucs4, len);
|
test[0] = SDL_iconv_string("UTF-8", "UCS-4", ucs4, len);
|
||||||
SDL_free(ucs4);
|
SDL_free(ucs4);
|
||||||
(void)fputs(test[0], stdout);
|
SDL_Log("%s", test[0]);
|
||||||
SDL_free(test[0]);
|
SDL_free(test[0]);
|
||||||
}
|
}
|
||||||
(void)fclose(file);
|
SDL_free(fdata);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
{
|
||||||
|
Uint32 *ucs4buf;
|
||||||
|
Uint32 *ucs4ptr;
|
||||||
|
char *utf8out;
|
||||||
|
Uint32 cp;
|
||||||
|
SDL_IOStream *io;
|
||||||
|
|
||||||
|
fdata = (Uint8 *) (fname ? SDL_LoadFile(fname, &fdatalen) : NULL);
|
||||||
|
if (!fdata) {
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Unable to load %s\n", fname);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ucs4buf = (Uint32 *) SDL_malloc(fdatalen * 4);
|
||||||
|
ucs4ptr = ucs4buf;
|
||||||
|
|
||||||
|
fdataptr = fdata;
|
||||||
|
while ((cp = SDL_StepUTF8((const char **) &fdataptr, &fdatalen)) != 0) {
|
||||||
|
*(ucs4ptr++) = SDL_Swap32BE(cp);
|
||||||
|
}
|
||||||
|
*(ucs4ptr++) = 0;
|
||||||
|
utf8out = SDL_iconv_string("UTF-8", "UCS-4", (const char *) ucs4buf, (size_t) ((ucs4ptr - ucs4buf)) * 4);
|
||||||
|
io = SDL_IOFromFile("test_steputf8.txt", "wb");
|
||||||
|
SDL_WriteIO(io, utf8out, SDL_strlen(utf8out));
|
||||||
|
SDL_CloseIO(io);
|
||||||
|
SDL_free(ucs4buf);
|
||||||
|
SDL_free(utf8out);
|
||||||
|
SDL_free(fdata);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
SDL_free(fname);
|
||||||
|
|
||||||
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, "Total errors: %d\n", errors);
|
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, "Total errors: %d\n", errors);
|
||||||
SDL_Quit();
|
SDL_Quit();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue