ScummVM API documentation
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
utf8.h
1 /* ScummVM - Graphic Adventure Engine
2  *
3  * ScummVM is the legal property of its developers, whose names
4  * are too numerous to list here. Please refer to the COPYRIGHT
5  * file distributed with this source distribution.
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program. If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 
22  //=============================================================================
23  //
24  // UTF-8 utilities.
25  // Based on utf8 code from https://c9x.me/irc/ (public domain)
26  //
27  //=============================================================================
28 
29 #ifndef AGS_SHARED_UTIL_UTF8_H
30 #define AGS_SHARED_UTIL_UTF8_H
31 
32 #include "common/std/algorithm.h"
33 #include "ags/shared/core/types.h"
34 
35 namespace AGS3 {
36 namespace Utf8 {
37 
38 typedef int32_t Rune;
39 const size_t UtfSz = 4;
40 const Rune RuneInvalid = 0xFFFD;
41 
42 const unsigned char utfbyte[UtfSz + 1] = { 0x80, 0, 0xC0, 0xE0, 0xF0 };
43 const unsigned char utfmask[UtfSz + 1] = { 0xC0, 0x80, 0xE0, 0xF0, 0xF8 };
44 const Rune utfmin[UtfSz + 1] = { 0, 0, 0x80, 0x800, 0x10000 };
45 const Rune utfmax[UtfSz + 1] = { 0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF };
46 
47 
48 inline size_t Validate(Rune *u, size_t i) {
49  if (*u < utfmin[i] || *u > utfmax[i] || (0xD800 <= *u && *u <= 0xDFFF))
50  *u = RuneInvalid;
51  for (i = 1; *u > utfmax[i]; ++i)
52  ;
53  return i;
54 }
55 
56 inline Rune DecodeByte(unsigned char c, size_t *i) {
57  for (*i = 0; *i < UtfSz + 1; ++(*i))
58  if ((c & utfmask[*i]) == utfbyte[*i])
59  return c & ~utfmask[*i];
60  return 0;
61 }
62 
63 inline char EncodeByte(Rune u, size_t i) {
64  return utfbyte[i] | (u & ~utfmask[i]);
65 }
66 
67 // Read a single utf8 codepoint from the c-string;
68 // returns codepoint's size in bytes (may be used to advance string pos)
69 inline size_t GetChar(const char *c, size_t clen, Rune *u) {
70  size_t i, j, len, type;
71  Rune udecoded;
72  *u = RuneInvalid;
73  if (!clen || !*c)
74  return 0;
75  udecoded = DecodeByte(c[0], &len);
76  if (len < 1 || len > UtfSz)
77  return 1;
78  for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
79  udecoded = (udecoded << 6) | DecodeByte(c[i], &type);
80  if (type != 0)
81  return j;
82  }
83  if (j < len)
84  return 0;
85  *u = udecoded;
86  Validate(u, len);
87  return len;
88 }
89 
90 // Convert utf8 codepoint to the string representation and write to the buffer
91 inline size_t SetChar(Rune u, char *c, size_t clen) {
92  size_t len, i;
93  len = Validate(&u, 0);
94  if (len > UtfSz || len > clen)
95  return 0;
96  for (i = len - 1; i != 0; --i) {
97  c[i] = EncodeByte(u, 0);
98  u >>= 6;
99  }
100  c[0] = EncodeByte(u, len);
101  return len;
102 }
103 
104 // Calculates utf8 string length in characters
105 inline size_t GetLength(const char *c) {
106  size_t len = 0;
107  Rune r;
108  for (size_t chr_sz = 0; (chr_sz = GetChar(c, UtfSz, &r)) > 0; c += chr_sz, ++len);
109  return len;
110 }
111 
112 } // namespace Utf8
113 } // namespace AGS3
114 
115 #endif
Definition: ags.h:40