#include "ThaiClusterBuilder.h" #include // Debug logging for Thai rendering investigation // Set to 1 to enable verbose cluster building logging #define THAI_CLUSTER_DEBUG_LOGGING 0 #if THAI_CLUSTER_DEBUG_LOGGING #include #endif namespace ThaiShaper { bool ThaiClusterBuilder::isAscenderConsonant(uint32_t cp) { // Thai consonants with tall ascenders that may affect mark positioning // These have parts that extend above the normal consonant height switch (cp) { case 0x0E1B: // PO PLA (ป) case 0x0E1D: // FO FA (ฝ) case 0x0E1F: // FO FAN (ฟ) case 0x0E2C: // LO CHULA (ฬ) return true; default: return false; } } bool ThaiClusterBuilder::isDescenderConsonant(uint32_t cp) { // Thai consonants with descenders that extend below the baseline // These may affect below-vowel positioning switch (cp) { case 0x0E0E: // DO CHADA (ฎ) case 0x0E0F: // TO PATAK (ฏ) case 0x0E24: // RU (ฤ) case 0x0E26: // LU (ฦ) return true; default: return false; } } std::vector ThaiClusterBuilder::buildClusters(const char* text) { std::vector clusters; if (text == nullptr || *text == '\0') { return clusters; } #if THAI_CLUSTER_DEBUG_LOGGING Serial.printf("[THAI] buildClusters input bytes: "); const uint8_t* debugPtr = reinterpret_cast(text); for (int i = 0; i < 32 && debugPtr[i] != '\0'; i++) { Serial.printf("%02X ", debugPtr[i]); } Serial.printf("\n"); #endif const uint8_t* ptr = reinterpret_cast(text); while (*ptr != '\0') { ThaiCluster cluster = buildNextCluster(&ptr); if (!cluster.glyphs.empty()) { clusters.push_back(std::move(cluster)); } } #if THAI_CLUSTER_DEBUG_LOGGING Serial.printf("[THAI] Built %zu clusters\n", clusters.size()); #endif return clusters; } ThaiCluster ThaiClusterBuilder::buildNextCluster(const uint8_t** text) { ThaiCluster cluster; if (*text == nullptr || **text == '\0') { return cluster; } #if THAI_CLUSTER_DEBUG_LOGGING // Log raw bytes at current position Serial.printf("[THAI] buildNextCluster at ptr=%p, bytes: ", (void*)*text); for (int i = 0; i < 6 && (*text)[i] != '\0'; i++) { Serial.printf("%02X ", (*text)[i]); } Serial.printf("\n"); #endif // Peek at first codepoint to determine cluster type const uint8_t* peekPtr = *text; uint32_t firstCp = utf8NextCodepoint(&peekPtr); #if THAI_CLUSTER_DEBUG_LOGGING Serial.printf("[THAI] First codepoint: U+%04X\n", firstCp); #endif // Non-Thai character: return as single-glyph cluster if (!isThaiCodepoint(firstCp)) { utf8NextCodepoint(text); // Consume the codepoint PositionedGlyph glyph; glyph.codepoint = firstCp; glyph.xOffset = 0; glyph.yOffset = 0; glyph.zeroAdvance = false; cluster.glyphs.push_back(glyph); #if THAI_CLUSTER_DEBUG_LOGGING Serial.printf("[THAI] Non-Thai cluster: U+%04X\n", firstCp); #endif return cluster; } // Collect all codepoints that form this Thai cluster uint32_t leadingVowel = 0; uint32_t baseConsonant = 0; uint32_t aboveVowel = 0; uint32_t belowVowel = 0; uint32_t toneMark = 0; uint32_t followVowel = 0; uint32_t thanthakhat = 0; // ์ or ํ (nikhahit) // Parse the cluster: consume codepoints until we hit a cluster boundary while (**text != '\0') { peekPtr = *text; uint32_t cp = utf8NextCodepoint(&peekPtr); if (!isThaiCodepoint(cp)) { break; // Non-Thai ends the cluster } ThaiCharType type = getThaiCharType(cp); switch (type) { case ThaiCharType::LEADING_VOWEL: if (leadingVowel != 0 || baseConsonant != 0) { // Another leading vowel or we already have base = new cluster goto done_parsing; } leadingVowel = cp; utf8NextCodepoint(text); break; case ThaiCharType::CONSONANT: if (baseConsonant != 0) { // Second consonant = new cluster goto done_parsing; } baseConsonant = cp; utf8NextCodepoint(text); break; case ThaiCharType::ABOVE_VOWEL: if (aboveVowel != 0) { // Multiple above vowels - take first, new cluster for next goto done_parsing; } aboveVowel = cp; utf8NextCodepoint(text); break; case ThaiCharType::BELOW_VOWEL: if (belowVowel != 0) { goto done_parsing; } belowVowel = cp; utf8NextCodepoint(text); break; case ThaiCharType::TONE_MARK: if (toneMark != 0) { goto done_parsing; } toneMark = cp; utf8NextCodepoint(text); break; case ThaiCharType::FOLLOW_VOWEL: if (followVowel != 0) { goto done_parsing; } followVowel = cp; utf8NextCodepoint(text); // Follow vowel typically ends the syllable goto done_parsing; case ThaiCharType::NIKHAHIT: case ThaiCharType::YAMAKKAN: if (thanthakhat != 0) { goto done_parsing; } thanthakhat = cp; utf8NextCodepoint(text); break; case ThaiCharType::THAI_DIGIT: case ThaiCharType::THAI_SYMBOL: // Digits and symbols are standalone clusters if (leadingVowel == 0 && baseConsonant == 0) { // Start of cluster with digit/symbol utf8NextCodepoint(text); PositionedGlyph glyph; glyph.codepoint = cp; glyph.xOffset = 0; glyph.yOffset = 0; glyph.zeroAdvance = false; cluster.glyphs.push_back(glyph); return cluster; } // Otherwise end current cluster goto done_parsing; default: // Unknown Thai character - treat as cluster boundary goto done_parsing; } } done_parsing: // Now build positioned glyphs from collected codepoints // 1. Leading vowel (if any) - rendered FIRST but stored after consonant in Unicode if (leadingVowel != 0) { PositionedGlyph glyph; glyph.codepoint = leadingVowel; glyph.xOffset = 0; glyph.yOffset = 0; glyph.zeroAdvance = false; // Leading vowel has its own advance cluster.glyphs.push_back(glyph); } // 2. Base consonant if (baseConsonant != 0) { PositionedGlyph glyph; glyph.codepoint = baseConsonant; glyph.xOffset = 0; glyph.yOffset = 0; glyph.zeroAdvance = false; cluster.glyphs.push_back(glyph); // Check if this is an ascender consonant for mark positioning bool hasAscender = isAscenderConsonant(baseConsonant); // 3. Above vowel (positioned above base) if (aboveVowel != 0) { PositionedGlyph aboveGlyph; aboveGlyph.codepoint = aboveVowel; aboveGlyph.xOffset = hasAscender ? ThaiOffset::ASCENDER_X_SHIFT : 0; aboveGlyph.yOffset = ThaiOffset::ABOVE_VOWEL; aboveGlyph.zeroAdvance = true; // Above vowel doesn't advance cursor cluster.glyphs.push_back(aboveGlyph); } // 4. Below vowel (positioned below base) if (belowVowel != 0) { PositionedGlyph belowGlyph; belowGlyph.codepoint = belowVowel; belowGlyph.xOffset = 0; belowGlyph.yOffset = ThaiOffset::BELOW_VOWEL; belowGlyph.zeroAdvance = true; cluster.glyphs.push_back(belowGlyph); } // 5. Tone mark (positioned above everything else) if (toneMark != 0) { PositionedGlyph toneGlyph; toneGlyph.codepoint = toneMark; toneGlyph.xOffset = hasAscender ? ThaiOffset::ASCENDER_X_SHIFT : 0; // Tone mark goes above above-vowel if present, otherwise just above base toneGlyph.yOffset = aboveVowel != 0 ? ThaiOffset::TONE_MARK : ThaiOffset::TONE_MARK_ALONE; toneGlyph.zeroAdvance = true; cluster.glyphs.push_back(toneGlyph); } // 6. Thanthakhat/Nikhahit (positioned above) if (thanthakhat != 0) { PositionedGlyph thanGlyph; thanGlyph.codepoint = thanthakhat; thanGlyph.xOffset = 0; // Position depends on what's already above if (toneMark != 0) { thanGlyph.yOffset = ThaiOffset::TONE_MARK - 2; // Above tone mark } else if (aboveVowel != 0) { thanGlyph.yOffset = ThaiOffset::TONE_MARK; // Above above-vowel } else { thanGlyph.yOffset = ThaiOffset::TONE_MARK_ALONE; } thanGlyph.zeroAdvance = true; cluster.glyphs.push_back(thanGlyph); } } // 7. Follow vowel (displayed after base) if (followVowel != 0) { PositionedGlyph glyph; glyph.codepoint = followVowel; glyph.xOffset = 0; glyph.yOffset = 0; glyph.zeroAdvance = false; // Follow vowel advances cursor cluster.glyphs.push_back(glyph); } // Handle edge case: leading vowel with no consonant (shouldn't happen in valid Thai) if (leadingVowel != 0 && baseConsonant == 0) { // Just the leading vowel by itself // Already added above } #if THAI_CLUSTER_DEBUG_LOGGING Serial.printf("[THAI] Cluster built with %zu glyphs: ", cluster.glyphs.size()); for (const auto& g : cluster.glyphs) { Serial.printf("U+%04X ", g.codepoint); } Serial.printf("(lead=%04X base=%04X above=%04X below=%04X tone=%04X follow=%04X)\n", leadingVowel, baseConsonant, aboveVowel, belowVowel, toneMark, followVowel); #endif return cluster; } } // namespace ThaiShaper