98 lines
2.2 KiB
C++
98 lines
2.2 KiB
C++
#include "ThaiCharacter.h"
|
||
|
||
#include <Utf8.h>
|
||
|
||
namespace ThaiShaper {
|
||
|
||
ThaiCharType getThaiCharType(uint32_t cp) {
|
||
// Not in Thai block
|
||
if (cp < 0x0E00 || cp > 0x0E7F) {
|
||
return ThaiCharType::NON_THAI;
|
||
}
|
||
|
||
// Thai consonants: ก-ฮ (U+0E01-U+0E2E)
|
||
// Note: U+0E2F (ฯ) is PAIYANNOI, a punctuation mark
|
||
if (cp >= 0x0E01 && cp <= 0x0E2E) {
|
||
return ThaiCharType::CONSONANT;
|
||
}
|
||
|
||
// Leading vowels: เ แ โ ไ ใ (U+0E40-U+0E44)
|
||
if (cp >= 0x0E40 && cp <= 0x0E44) {
|
||
return ThaiCharType::LEADING_VOWEL;
|
||
}
|
||
|
||
// Above vowels and marks
|
||
switch (cp) {
|
||
case 0x0E31: // MAI HAN-AKAT (ั)
|
||
case 0x0E34: // SARA I (ิ)
|
||
case 0x0E35: // SARA II (ี)
|
||
case 0x0E36: // SARA UE (ึ)
|
||
case 0x0E37: // SARA UEE (ื)
|
||
case 0x0E47: // MAITAIKHU (็)
|
||
return ThaiCharType::ABOVE_VOWEL;
|
||
}
|
||
|
||
// Below vowels
|
||
switch (cp) {
|
||
case 0x0E38: // SARA U (ุ)
|
||
case 0x0E39: // SARA UU (ู)
|
||
case 0x0E3A: // PHINTHU (ฺ)
|
||
return ThaiCharType::BELOW_VOWEL;
|
||
}
|
||
|
||
// Tone marks
|
||
switch (cp) {
|
||
case 0x0E48: // MAI EK (่)
|
||
case 0x0E49: // MAI THO (้)
|
||
case 0x0E4A: // MAI TRI (๊)
|
||
case 0x0E4B: // MAI CHATTAWA (๋)
|
||
return ThaiCharType::TONE_MARK;
|
||
}
|
||
|
||
// Follow vowels (vowels that display after consonant)
|
||
switch (cp) {
|
||
case 0x0E30: // SARA A (ะ)
|
||
case 0x0E32: // SARA AA (า)
|
||
case 0x0E33: // SARA AM (ำ)
|
||
case 0x0E45: // LAKKHANGYAO (ๅ)
|
||
return ThaiCharType::FOLLOW_VOWEL;
|
||
}
|
||
|
||
// Nikhahit
|
||
if (cp == 0x0E4D) {
|
||
return ThaiCharType::NIKHAHIT;
|
||
}
|
||
|
||
// Yamakkan / Thanthakhat
|
||
if (cp == 0x0E4C || cp == 0x0E4E) {
|
||
return ThaiCharType::YAMAKKAN;
|
||
}
|
||
|
||
// Thai digits: ๐-๙ (U+0E50-U+0E59)
|
||
if (cp >= 0x0E50 && cp <= 0x0E59) {
|
||
return ThaiCharType::THAI_DIGIT;
|
||
}
|
||
|
||
// Everything else in Thai block is a symbol/punctuation
|
||
return ThaiCharType::THAI_SYMBOL;
|
||
}
|
||
|
||
bool containsThai(const char* text) {
|
||
if (text == nullptr || *text == '\0') {
|
||
return false;
|
||
}
|
||
|
||
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text);
|
||
uint32_t cp;
|
||
|
||
while ((cp = utf8NextCodepoint(&ptr))) {
|
||
if (isThaiCodepoint(cp)) {
|
||
return true;
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
} // namespace ThaiShaper
|