2025-12-03 22:00:29 +11:00
|
|
|
#include "Utf8.h"
|
|
|
|
|
|
2026-01-22 00:07:29 +07:00
|
|
|
// Debug logging for Thai rendering investigation
|
|
|
|
|
// Set to 1 to enable verbose UTF-8 decode logging
|
|
|
|
|
#define UTF8_DEBUG_LOGGING 0
|
|
|
|
|
|
|
|
|
|
#if UTF8_DEBUG_LOGGING
|
|
|
|
|
#include <Arduino.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-12-03 22:00:29 +11:00
|
|
|
int utf8CodepointLen(const unsigned char c) {
|
|
|
|
|
if (c < 0x80) return 1; // 0xxxxxxx
|
|
|
|
|
if ((c >> 5) == 0x6) return 2; // 110xxxxx
|
|
|
|
|
if ((c >> 4) == 0xE) return 3; // 1110xxxx
|
|
|
|
|
if ((c >> 3) == 0x1E) return 4; // 11110xxx
|
|
|
|
|
return 1; // fallback for invalid
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t utf8NextCodepoint(const unsigned char** string) {
|
|
|
|
|
if (**string == 0) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const int bytes = utf8CodepointLen(**string);
|
|
|
|
|
const uint8_t* chr = *string;
|
|
|
|
|
*string += bytes;
|
|
|
|
|
|
|
|
|
|
if (bytes == 1) {
|
|
|
|
|
return chr[0];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t cp = chr[0] & ((1 << (7 - bytes)) - 1); // mask header bits
|
|
|
|
|
|
|
|
|
|
for (int i = 1; i < bytes; i++) {
|
2026-01-22 00:07:29 +07:00
|
|
|
// Validate continuation bytes: must be 10xxxxxx (0x80-0xBF)
|
|
|
|
|
if ((chr[i] & 0xC0) != 0x80) {
|
|
|
|
|
#if UTF8_DEBUG_LOGGING
|
|
|
|
|
Serial.printf("[UTF8] Invalid continuation byte at pos %d: 0x%02X (expected 0x80-0xBF), bytes=[", i, chr[i]);
|
|
|
|
|
for (int j = 0; j < bytes; j++) {
|
|
|
|
|
Serial.printf("0x%02X%s", chr[j], j < bytes - 1 ? " " : "");
|
|
|
|
|
}
|
|
|
|
|
Serial.printf("]\n");
|
|
|
|
|
#endif
|
|
|
|
|
return REPLACEMENT_GLYPH; // Return U+FFFD for invalid sequence
|
|
|
|
|
}
|
2025-12-03 22:00:29 +11:00
|
|
|
cp = (cp << 6) | (chr[i] & 0x3F);
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-22 00:07:29 +07:00
|
|
|
#if UTF8_DEBUG_LOGGING
|
|
|
|
|
// Log Thai codepoints specifically
|
|
|
|
|
if (cp >= 0x0E00 && cp <= 0x0E7F) {
|
|
|
|
|
Serial.printf("[UTF8] Thai codepoint U+%04X from bytes=[", cp);
|
|
|
|
|
for (int j = 0; j < bytes; j++) {
|
|
|
|
|
Serial.printf("0x%02X%s", chr[j], j < bytes - 1 ? " " : "");
|
|
|
|
|
}
|
|
|
|
|
Serial.printf("]\n");
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-12-03 22:00:29 +11:00
|
|
|
return cp;
|
|
|
|
|
}
|