322 lines
9.2 KiB
C++
322 lines
9.2 KiB
C++
#include "ThaiClusterBuilder.h"
|
|
|
|
#include <Utf8.h>
|
|
|
|
// Debug logging for Thai rendering investigation
|
|
// Set to 1 to enable verbose cluster building logging
|
|
#define THAI_CLUSTER_DEBUG_LOGGING 0
|
|
|
|
#if THAI_CLUSTER_DEBUG_LOGGING
|
|
#include <Arduino.h>
|
|
#endif
|
|
|
|
namespace ThaiShaper {
|
|
|
|
bool ThaiClusterBuilder::isAscenderConsonant(uint32_t cp) {
|
|
// Thai consonants with tall ascenders that may affect mark positioning
|
|
// These have parts that extend above the normal consonant height
|
|
switch (cp) {
|
|
case 0x0E1B: // PO PLA (ป)
|
|
case 0x0E1D: // FO FA (ฝ)
|
|
case 0x0E1F: // FO FAN (ฟ)
|
|
case 0x0E2C: // LO CHULA (ฬ)
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool ThaiClusterBuilder::isDescenderConsonant(uint32_t cp) {
|
|
// Thai consonants with descenders that extend below the baseline
|
|
// These may affect below-vowel positioning
|
|
switch (cp) {
|
|
case 0x0E0E: // DO CHADA (ฎ)
|
|
case 0x0E0F: // TO PATAK (ฏ)
|
|
case 0x0E24: // RU (ฤ)
|
|
case 0x0E26: // LU (ฦ)
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
std::vector<ThaiCluster> ThaiClusterBuilder::buildClusters(const char* text) {
|
|
std::vector<ThaiCluster> clusters;
|
|
|
|
if (text == nullptr || *text == '\0') {
|
|
return clusters;
|
|
}
|
|
|
|
#if THAI_CLUSTER_DEBUG_LOGGING
|
|
Serial.printf("[THAI] buildClusters input bytes: ");
|
|
const uint8_t* debugPtr = reinterpret_cast<const uint8_t*>(text);
|
|
for (int i = 0; i < 32 && debugPtr[i] != '\0'; i++) {
|
|
Serial.printf("%02X ", debugPtr[i]);
|
|
}
|
|
Serial.printf("\n");
|
|
#endif
|
|
|
|
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text);
|
|
|
|
while (*ptr != '\0') {
|
|
ThaiCluster cluster = buildNextCluster(&ptr);
|
|
if (!cluster.glyphs.empty()) {
|
|
clusters.push_back(std::move(cluster));
|
|
}
|
|
}
|
|
|
|
#if THAI_CLUSTER_DEBUG_LOGGING
|
|
Serial.printf("[THAI] Built %zu clusters\n", clusters.size());
|
|
#endif
|
|
|
|
return clusters;
|
|
}
|
|
|
|
ThaiCluster ThaiClusterBuilder::buildNextCluster(const uint8_t** text) {
|
|
ThaiCluster cluster;
|
|
|
|
if (*text == nullptr || **text == '\0') {
|
|
return cluster;
|
|
}
|
|
|
|
#if THAI_CLUSTER_DEBUG_LOGGING
|
|
// Log raw bytes at current position
|
|
Serial.printf("[THAI] buildNextCluster at ptr=%p, bytes: ", (void*)*text);
|
|
for (int i = 0; i < 6 && (*text)[i] != '\0'; i++) {
|
|
Serial.printf("%02X ", (*text)[i]);
|
|
}
|
|
Serial.printf("\n");
|
|
#endif
|
|
|
|
// Peek at first codepoint to determine cluster type
|
|
const uint8_t* peekPtr = *text;
|
|
uint32_t firstCp = utf8NextCodepoint(&peekPtr);
|
|
|
|
#if THAI_CLUSTER_DEBUG_LOGGING
|
|
Serial.printf("[THAI] First codepoint: U+%04X\n", firstCp);
|
|
#endif
|
|
|
|
// Non-Thai character: return as single-glyph cluster
|
|
if (!isThaiCodepoint(firstCp)) {
|
|
utf8NextCodepoint(text); // Consume the codepoint
|
|
PositionedGlyph glyph;
|
|
glyph.codepoint = firstCp;
|
|
glyph.xOffset = 0;
|
|
glyph.yOffset = 0;
|
|
glyph.zeroAdvance = false;
|
|
cluster.glyphs.push_back(glyph);
|
|
#if THAI_CLUSTER_DEBUG_LOGGING
|
|
Serial.printf("[THAI] Non-Thai cluster: U+%04X\n", firstCp);
|
|
#endif
|
|
return cluster;
|
|
}
|
|
|
|
// Collect all codepoints that form this Thai cluster
|
|
uint32_t leadingVowel = 0;
|
|
uint32_t baseConsonant = 0;
|
|
uint32_t aboveVowel = 0;
|
|
uint32_t belowVowel = 0;
|
|
uint32_t toneMark = 0;
|
|
uint32_t followVowel = 0;
|
|
uint32_t thanthakhat = 0; // ์ or ํ (nikhahit)
|
|
|
|
// Parse the cluster: consume codepoints until we hit a cluster boundary
|
|
while (**text != '\0') {
|
|
peekPtr = *text;
|
|
uint32_t cp = utf8NextCodepoint(&peekPtr);
|
|
|
|
if (!isThaiCodepoint(cp)) {
|
|
break; // Non-Thai ends the cluster
|
|
}
|
|
|
|
ThaiCharType type = getThaiCharType(cp);
|
|
|
|
switch (type) {
|
|
case ThaiCharType::LEADING_VOWEL:
|
|
if (leadingVowel != 0 || baseConsonant != 0) {
|
|
// Another leading vowel or we already have base = new cluster
|
|
goto done_parsing;
|
|
}
|
|
leadingVowel = cp;
|
|
utf8NextCodepoint(text);
|
|
break;
|
|
|
|
case ThaiCharType::CONSONANT:
|
|
if (baseConsonant != 0) {
|
|
// Second consonant = new cluster
|
|
goto done_parsing;
|
|
}
|
|
baseConsonant = cp;
|
|
utf8NextCodepoint(text);
|
|
break;
|
|
|
|
case ThaiCharType::ABOVE_VOWEL:
|
|
if (aboveVowel != 0) {
|
|
// Multiple above vowels - take first, new cluster for next
|
|
goto done_parsing;
|
|
}
|
|
aboveVowel = cp;
|
|
utf8NextCodepoint(text);
|
|
break;
|
|
|
|
case ThaiCharType::BELOW_VOWEL:
|
|
if (belowVowel != 0) {
|
|
goto done_parsing;
|
|
}
|
|
belowVowel = cp;
|
|
utf8NextCodepoint(text);
|
|
break;
|
|
|
|
case ThaiCharType::TONE_MARK:
|
|
if (toneMark != 0) {
|
|
goto done_parsing;
|
|
}
|
|
toneMark = cp;
|
|
utf8NextCodepoint(text);
|
|
break;
|
|
|
|
case ThaiCharType::FOLLOW_VOWEL:
|
|
if (followVowel != 0) {
|
|
goto done_parsing;
|
|
}
|
|
followVowel = cp;
|
|
utf8NextCodepoint(text);
|
|
// Follow vowel typically ends the syllable
|
|
goto done_parsing;
|
|
|
|
case ThaiCharType::NIKHAHIT:
|
|
case ThaiCharType::YAMAKKAN:
|
|
if (thanthakhat != 0) {
|
|
goto done_parsing;
|
|
}
|
|
thanthakhat = cp;
|
|
utf8NextCodepoint(text);
|
|
break;
|
|
|
|
case ThaiCharType::THAI_DIGIT:
|
|
case ThaiCharType::THAI_SYMBOL:
|
|
// Digits and symbols are standalone clusters
|
|
if (leadingVowel == 0 && baseConsonant == 0) {
|
|
// Start of cluster with digit/symbol
|
|
utf8NextCodepoint(text);
|
|
PositionedGlyph glyph;
|
|
glyph.codepoint = cp;
|
|
glyph.xOffset = 0;
|
|
glyph.yOffset = 0;
|
|
glyph.zeroAdvance = false;
|
|
cluster.glyphs.push_back(glyph);
|
|
return cluster;
|
|
}
|
|
// Otherwise end current cluster
|
|
goto done_parsing;
|
|
|
|
default:
|
|
// Unknown Thai character - treat as cluster boundary
|
|
goto done_parsing;
|
|
}
|
|
}
|
|
|
|
done_parsing:
|
|
// Now build positioned glyphs from collected codepoints
|
|
|
|
// 1. Leading vowel (if any) - rendered FIRST but stored after consonant in Unicode
|
|
if (leadingVowel != 0) {
|
|
PositionedGlyph glyph;
|
|
glyph.codepoint = leadingVowel;
|
|
glyph.xOffset = 0;
|
|
glyph.yOffset = 0;
|
|
glyph.zeroAdvance = false; // Leading vowel has its own advance
|
|
cluster.glyphs.push_back(glyph);
|
|
}
|
|
|
|
// 2. Base consonant
|
|
if (baseConsonant != 0) {
|
|
PositionedGlyph glyph;
|
|
glyph.codepoint = baseConsonant;
|
|
glyph.xOffset = 0;
|
|
glyph.yOffset = 0;
|
|
glyph.zeroAdvance = false;
|
|
cluster.glyphs.push_back(glyph);
|
|
|
|
// Check if this is an ascender consonant for mark positioning
|
|
bool hasAscender = isAscenderConsonant(baseConsonant);
|
|
|
|
// 3. Above vowel (positioned above base)
|
|
if (aboveVowel != 0) {
|
|
PositionedGlyph aboveGlyph;
|
|
aboveGlyph.codepoint = aboveVowel;
|
|
aboveGlyph.xOffset = hasAscender ? ThaiOffset::ASCENDER_X_SHIFT : 0;
|
|
aboveGlyph.yOffset = ThaiOffset::ABOVE_VOWEL;
|
|
aboveGlyph.zeroAdvance = true; // Above vowel doesn't advance cursor
|
|
cluster.glyphs.push_back(aboveGlyph);
|
|
}
|
|
|
|
// 4. Below vowel (positioned below base)
|
|
if (belowVowel != 0) {
|
|
PositionedGlyph belowGlyph;
|
|
belowGlyph.codepoint = belowVowel;
|
|
belowGlyph.xOffset = 0;
|
|
belowGlyph.yOffset = ThaiOffset::BELOW_VOWEL;
|
|
belowGlyph.zeroAdvance = true;
|
|
cluster.glyphs.push_back(belowGlyph);
|
|
}
|
|
|
|
// 5. Tone mark (positioned above everything else)
|
|
if (toneMark != 0) {
|
|
PositionedGlyph toneGlyph;
|
|
toneGlyph.codepoint = toneMark;
|
|
toneGlyph.xOffset = hasAscender ? ThaiOffset::ASCENDER_X_SHIFT : 0;
|
|
// Tone mark goes above above-vowel if present, otherwise just above base
|
|
toneGlyph.yOffset = aboveVowel != 0 ? ThaiOffset::TONE_MARK : ThaiOffset::TONE_MARK_ALONE;
|
|
toneGlyph.zeroAdvance = true;
|
|
cluster.glyphs.push_back(toneGlyph);
|
|
}
|
|
|
|
// 6. Thanthakhat/Nikhahit (positioned above)
|
|
if (thanthakhat != 0) {
|
|
PositionedGlyph thanGlyph;
|
|
thanGlyph.codepoint = thanthakhat;
|
|
thanGlyph.xOffset = 0;
|
|
// Position depends on what's already above
|
|
if (toneMark != 0) {
|
|
thanGlyph.yOffset = ThaiOffset::TONE_MARK - 2; // Above tone mark
|
|
} else if (aboveVowel != 0) {
|
|
thanGlyph.yOffset = ThaiOffset::TONE_MARK; // Above above-vowel
|
|
} else {
|
|
thanGlyph.yOffset = ThaiOffset::TONE_MARK_ALONE;
|
|
}
|
|
thanGlyph.zeroAdvance = true;
|
|
cluster.glyphs.push_back(thanGlyph);
|
|
}
|
|
}
|
|
|
|
// 7. Follow vowel (displayed after base)
|
|
if (followVowel != 0) {
|
|
PositionedGlyph glyph;
|
|
glyph.codepoint = followVowel;
|
|
glyph.xOffset = 0;
|
|
glyph.yOffset = 0;
|
|
glyph.zeroAdvance = false; // Follow vowel advances cursor
|
|
cluster.glyphs.push_back(glyph);
|
|
}
|
|
|
|
// Handle edge case: leading vowel with no consonant (shouldn't happen in valid Thai)
|
|
if (leadingVowel != 0 && baseConsonant == 0) {
|
|
// Just the leading vowel by itself
|
|
// Already added above
|
|
}
|
|
|
|
#if THAI_CLUSTER_DEBUG_LOGGING
|
|
Serial.printf("[THAI] Cluster built with %zu glyphs: ", cluster.glyphs.size());
|
|
for (const auto& g : cluster.glyphs) {
|
|
Serial.printf("U+%04X ", g.codepoint);
|
|
}
|
|
Serial.printf("(lead=%04X base=%04X above=%04X below=%04X tone=%04X follow=%04X)\n",
|
|
leadingVowel, baseConsonant, aboveVowel, belowVowel, toneMark, followVowel);
|
|
#endif
|
|
|
|
return cluster;
|
|
}
|
|
|
|
} // namespace ThaiShaper
|