2025-12-03 22:00:29 +11:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include <cstdint>
|
2026-02-01 16:23:48 +05:00
|
|
|
#include <string>
|
2026-01-19 05:58:43 -06:00
|
|
|
#define REPLACEMENT_GLYPH 0xFFFD
|
|
|
|
|
|
2025-12-03 22:00:29 +11:00
|
|
|
uint32_t utf8NextCodepoint(const unsigned char** string);
|
2026-02-01 16:23:48 +05:00
|
|
|
// Remove the last UTF-8 codepoint from a std::string and return the new size.
|
|
|
|
|
size_t utf8RemoveLastChar(std::string& str);
|
|
|
|
|
// Truncate string by removing N UTF-8 codepoints from the end.
|
|
|
|
|
void utf8TruncateChars(std::string& str, size_t numChars);
|
2026-02-22 03:11:07 +01:00
|
|
|
|
|
|
|
|
// Returns true for Unicode combining diacritical marks that should not advance the cursor.
|
|
|
|
|
inline bool utf8IsCombiningMark(const uint32_t cp) {
|
|
|
|
|
return (cp >= 0x0300 && cp <= 0x036F) // Combining Diacritical Marks
|
|
|
|
|
|| (cp >= 0x1DC0 && cp <= 0x1DFF) // Combining Diacritical Marks Supplement
|
|
|
|
|
|| (cp >= 0x20D0 && cp <= 0x20FF) // Combining Diacritical Marks for Symbols
|
|
|
|
|
|| (cp >= 0xFE20 && cp <= 0xFE2F); // Combining Half Marks
|
|
|
|
|
}
|