31 namespace OVR {
namespace UTF8Util {
40 while (p - buf < buflen)
58 const char* buf = putf8str;
63 while (buf - putf8str < length)
92 const char* buf = putf8str;
96 while ((buf - putf8str) < length && index > 0)
119 if (ucs_character <= 0x7F)
121 else if (ucs_character <= 0x7FF)
123 else if (ucs_character <= 0xFFFF)
125 else if (ucs_character <= 0x1FFFFF)
127 else if (ucs_character <= 0x3FFFFFF)
129 else if (ucs_character <= 0x7FFFFFFF)
157 #define INVALID_CHAR 0x0FFFD
159 #define FIRST_BYTE(mask, shift) \
160 uc = (c & (mask)) << (shift);
162 #define NEXT_BYTE(shift) \
164 if (c == 0) return 0; \
165 if ((c & 0xC0) != 0x80) return INVALID_CHAR; \
167 uc |= (c & 0x3F) << shift;
174 if ((c & 0x80) == 0)
return (
UInt32) c;
177 if ((c & 0xE0) == 0xC0)
185 else if ((c & 0xF0) == 0xE0)
198 else if ((c & 0xF8) == 0xF0)
208 else if ((c & 0xFC) == 0xF8)
219 else if ((c & 0xFE) == 0xFC)
241 if (ucs_character <= 0x7F)
244 pbuffer[(*pindex)++] = (char) ucs_character;
246 else if (ucs_character <= 0x7FF)
249 pbuffer[(*pindex)++] = 0xC0 | (char)(ucs_character >> 6);
250 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 0) & 0x3F);
252 else if (ucs_character <= 0xFFFF)
255 pbuffer[(*pindex)++] = 0xE0 | (char)(ucs_character >> 12);
256 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 6) & 0x3F);
257 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 0) & 0x3F);
259 else if (ucs_character <= 0x1FFFFF)
262 pbuffer[(*pindex)++] = 0xF0 | (char)(ucs_character >> 18);
263 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 12) & 0x3F);
264 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 6) & 0x3F);
265 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 0) & 0x3F);
267 else if (ucs_character <= 0x3FFFFFF)
270 pbuffer[(*pindex)++] = 0xF8 | (char)(ucs_character >> 24);
271 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 18) & 0x3F);
272 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 12) & 0x3F);
273 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 6) & 0x3F);
274 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 0) & 0x3F);
276 else if (ucs_character <= 0x7FFFFFFF)
279 pbuffer[(*pindex)++] = 0xFC | (char)(ucs_character >> 30);
280 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 24) & 0x3F);
281 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 18) & 0x3F);
282 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 12) & 0x3F);
283 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 6) & 0x3F);
284 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 0) & 0x3F);
296 for (
int i = 0; i < length; i++)
301 for (
int i = 0;; i++)
315 for (
int i = 0; i < length; i++)
322 for (
int i = 0;; i++)
334 wchar_t *pbegin = pbuff;
342 else if (ch >= 0xFFFF)
349 const char* p = putf8str;
350 while ((p - putf8str) < bytesLen)
360 return pbuff - pbegin;
364 #ifdef UTF8_UNIT_TEST
383 #include "base/utility.h"
387 bool check_equal(
const char* utf8_in,
const UInt32* ucs_in)
391 UInt32 next_ucs = *ucs_in++;
392 UInt32 next_ucs_from_utf8 = utf8::decode_next_unicode_character(&utf8_in);
393 if (next_ucs != next_ucs_from_utf8)
408 void log_ascii(
const char* line)
412 unsigned char c = (
unsigned char) *line++;
419 && (c < 32 || c > 127))
422 printf(
"<0x%02X>", (
int) c);
432 void log_ucs(
const UInt32* line)
443 && (uc < 32 || uc > 127))
446 printf(
"<U-%04X>", uc);
450 printf(
"%c", (
char) uc);
457 int main(
int argc,
const char* argv[])
460 const char* test8 =
"Ignacio CastaƱo";
463 0x49, 0x67, 0x6E, 0x61, 0x63,
464 0x69, 0x6F, 0x20, 0x43, 0x61,
465 0x73, 0x74, 0x61, 0xF1, 0x6F,
475 const char* filename = argv[1];
476 FILE* fp = fopen(filename,
"rb");
479 printf(
"Can't open file '%s'\n", filename);
484 const int LINE_SIZE = 200;
485 char line_buffer_utf8[LINE_SIZE];
486 char reencoded_utf8[6 * LINE_SIZE];
487 UInt32 line_buffer_ucs[LINE_SIZE];
489 int byte_counter = 0;
498 line_buffer_utf8[byte_counter++] = c;
499 if (c ==
'\n' || byte_counter >= LINE_SIZE - 2)
502 line_buffer_utf8[byte_counter++] = 0;
505 const char* p = line_buffer_utf8;
506 UInt32* q = line_buffer_ucs;
525 int last_index =
index;
534 log_ucs(line_buffer_ucs);
535 log_ascii(reencoded_utf8);
538 OVR_ASSERT(check_equal(line_buffer_utf8, line_buffer_ucs));
539 OVR_ASSERT(check_equal(reencoded_utf8, line_buffer_ucs));
553 #endif // UTF8_UNIT_TEST
UInt32 OVR_STDCALL GetCharAt(SPInt index, const char *putf8str, SPInt length)
int OVR_STDCALL GetEncodeCharSize(UInt32 ucs_character)
SPInt OVR_STDCALL GetLength(const char *buf, SPInt buflen)
UInt32 OVR_STDCALL DecodeNextChar_Advance0(const char **putf8Buffer)
UPInt OVR_STDCALL DecodeString(wchar_t *pbuff, const char *putf8str, SPInt bytesLen)
void OVR_STDCALL EncodeChar(char *pbuffer, SPInt *pindex, UInt32 ucs_character)
UInt32 DecodeNextChar(const char **putf8Buffer)
SPInt OVR_STDCALL GetByteIndex(SPInt index, const char *putf8str, SPInt length)
void OVR_STDCALL EncodeString(char *pbuff, const wchar_t *pchar, SPInt length)
SPInt OVR_STDCALL GetEncodeStringSize(const wchar_t *pchar, SPInt length)
int char * index(const char *__s, int __c) __THROW __attribute_pure__ __nonnull((1))
#define FIRST_BYTE(mask, shift)