29 #ifndef AGS_SHARED_UTIL_UTF8_H 30 #define AGS_SHARED_UTIL_UTF8_H 32 #include "common/std/algorithm.h" 33 #include "ags/shared/core/types.h" 39 const size_t UtfSz = 4;
40 const Rune RuneInvalid = 0xFFFD;
42 const unsigned char utfbyte[UtfSz + 1] = { 0x80, 0, 0xC0, 0xE0, 0xF0 };
43 const unsigned char utfmask[UtfSz + 1] = { 0xC0, 0x80, 0xE0, 0xF0, 0xF8 };
44 const Rune utfmin[UtfSz + 1] = { 0, 0, 0x80, 0x800, 0x10000 };
45 const Rune utfmax[UtfSz + 1] = { 0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF };
48 inline size_t Validate(Rune *u,
size_t i) {
49 if (*u < utfmin[i] || *u > utfmax[i] || (0xD800 <= *u && *u <= 0xDFFF))
51 for (i = 1; *u > utfmax[i]; ++i)
56 inline Rune DecodeByte(
unsigned char c,
size_t *i) {
57 for (*i = 0; *i < UtfSz + 1; ++(*i))
58 if ((c & utfmask[*i]) == utfbyte[*i])
59 return c & ~utfmask[*i];
63 inline char EncodeByte(Rune u,
size_t i) {
64 return utfbyte[i] | (u & ~utfmask[i]);
69 inline size_t GetChar(
const char *c,
size_t clen, Rune *u) {
70 size_t i, j, len, type;
75 udecoded = DecodeByte(c[0], &len);
76 if (len < 1 || len > UtfSz)
78 for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
79 udecoded = (udecoded << 6) | DecodeByte(c[i], &type);
91 inline size_t SetChar(Rune u,
char *c,
size_t clen) {
93 len = Validate(&u, 0);
94 if (len > UtfSz || len > clen)
96 for (i = len - 1; i != 0; --i) {
97 c[i] = EncodeByte(u, 0);
100 c[0] = EncodeByte(u, len);
105 inline size_t GetLength(
const char *c) {
108 for (
size_t chr_sz = 0; (chr_sz = GetChar(c, UtfSz, &r)) > 0; c += chr_sz, ++len);