crosspoint-reader/lib/ThaiShaper/ThaiCharacter.cpp

98 lines
2.2 KiB
C++
Raw Normal View History

2026-01-22 00:07:29 +07:00
#include "ThaiCharacter.h"
#include <Utf8.h>
namespace ThaiShaper {
ThaiCharType getThaiCharType(uint32_t cp) {
// Not in Thai block
if (cp < 0x0E00 || cp > 0x0E7F) {
return ThaiCharType::NON_THAI;
}
// Thai consonants: ก-ฮ (U+0E01-U+0E2E)
// Note: U+0E2F (ฯ) is PAIYANNOI, a punctuation mark
if (cp >= 0x0E01 && cp <= 0x0E2E) {
return ThaiCharType::CONSONANT;
}
// Leading vowels: เ แ โ ไ ใ (U+0E40-U+0E44)
if (cp >= 0x0E40 && cp <= 0x0E44) {
return ThaiCharType::LEADING_VOWEL;
}
// Above vowels and marks
switch (cp) {
case 0x0E31: // MAI HAN-AKAT (ั)
case 0x0E34: // SARA I (ิ)
case 0x0E35: // SARA II (ี)
case 0x0E36: // SARA UE (ึ)
case 0x0E37: // SARA UEE (ื)
case 0x0E47: // MAITAIKHU (็)
return ThaiCharType::ABOVE_VOWEL;
}
// Below vowels
switch (cp) {
case 0x0E38: // SARA U (ุ)
case 0x0E39: // SARA UU (ู)
case 0x0E3A: // PHINTHU (ฺ)
return ThaiCharType::BELOW_VOWEL;
}
// Tone marks
switch (cp) {
case 0x0E48: // MAI EK (่)
case 0x0E49: // MAI THO (้)
case 0x0E4A: // MAI TRI (๊)
case 0x0E4B: // MAI CHATTAWA (๋)
return ThaiCharType::TONE_MARK;
}
// Follow vowels (vowels that display after consonant)
switch (cp) {
case 0x0E30: // SARA A (ะ)
case 0x0E32: // SARA AA (า)
case 0x0E33: // SARA AM (ำ)
case 0x0E45: // LAKKHANGYAO (ๅ)
return ThaiCharType::FOLLOW_VOWEL;
}
// Nikhahit
if (cp == 0x0E4D) {
return ThaiCharType::NIKHAHIT;
}
// Yamakkan / Thanthakhat
if (cp == 0x0E4C || cp == 0x0E4E) {
return ThaiCharType::YAMAKKAN;
}
// Thai digits: -๙ (U+0E50-U+0E59)
if (cp >= 0x0E50 && cp <= 0x0E59) {
return ThaiCharType::THAI_DIGIT;
}
// Everything else in Thai block is a symbol/punctuation
return ThaiCharType::THAI_SYMBOL;
}
bool containsThai(const char* text) {
if (text == nullptr || *text == '\0') {
return false;
}
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text);
uint32_t cp;
while ((cp = utf8NextCodepoint(&ptr))) {
if (isThaiCodepoint(cp)) {
return true;
}
}
return false;
}
} // namespace ThaiShaper