fix: truncating chapter titles using UTF-8 safe function (#599)
## Summary * Truncating chapter titles using utf8 safe functions (Cyrillic titles were split mid codepoint) * refactoring of lib/Utf8 --- ### AI Usage While CrossPoint doesn't have restrictions on AI tools in contributing, please be transparent about their usage as it helps set the right context for reviewers. Did you use AI tools to help write this code? _**< PARTIALLY >**_
This commit is contained in:
committed by
GitHub
parent
0d82b03981
commit
b1dcb7733b
@@ -29,3 +29,20 @@ uint32_t utf8NextCodepoint(const unsigned char** string) {
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
||||
size_t utf8RemoveLastChar(std::string& str) {
|
||||
if (str.empty()) return 0;
|
||||
size_t pos = str.size() - 1;
|
||||
while (pos > 0 && (static_cast<unsigned char>(str[pos]) & 0xC0) == 0x80) {
|
||||
--pos;
|
||||
}
|
||||
str.resize(pos);
|
||||
return pos;
|
||||
}
|
||||
|
||||
// Truncate string by removing N UTF-8 characters from the end
|
||||
void utf8TruncateChars(std::string& str, const size_t numChars) {
|
||||
for (size_t i = 0; i < numChars && !str.empty(); ++i) {
|
||||
utf8RemoveLastChar(str);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user