- #1038 (partial): Add .erase() for consumed words in layoutAndExtractLines to fix redundant early flush bug; fix wordContinues flag in hyphenateWordAtIndex - #1037: Add combining mark handling for hyphenation (NFC-like precomposition) and rendering (base glyph tracking in EpdFont, GfxRenderer including CCW) - #1045: Shorten STR_FORGET_BUTTON labels across all 9 translation files - #1019: Display file extensions in File Browser via getFileExtension helper - Pull romanian.yaml from upstream/master (merged PR #987) Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -100,6 +100,15 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
|
||||
for (size_t i = 0; i < lineCount; ++i) {
|
||||
extractLine(i, pageWidth, spaceWidth, wordWidths, wordContinues, lineBreakIndices, processLine);
|
||||
}
|
||||
|
||||
// Remove consumed words so size() reflects only remaining words
|
||||
if (lineCount > 0) {
|
||||
const size_t consumed = lineBreakIndices[lineCount - 1];
|
||||
words.erase(words.begin(), words.begin() + consumed);
|
||||
wordStyles.erase(wordStyles.begin(), wordStyles.begin() + consumed);
|
||||
wordContinues.erase(wordContinues.begin(), wordContinues.begin() + consumed);
|
||||
forceBreakAfter.erase(forceBreakAfter.begin(), forceBreakAfter.begin() + consumed);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId) {
|
||||
@@ -392,11 +401,8 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
|
||||
words.insert(words.begin() + wordIndex + 1, remainder);
|
||||
wordStyles.insert(wordStyles.begin() + wordIndex + 1, style);
|
||||
|
||||
// The remainder inherits whatever continuation status the original word had with the word after it.
|
||||
const bool originalContinuedToNext = wordContinues[wordIndex];
|
||||
// The original word (now prefix) does NOT continue to remainder (hyphen separates them)
|
||||
wordContinues[wordIndex] = false;
|
||||
wordContinues.insert(wordContinues.begin() + wordIndex + 1, originalContinuedToNext);
|
||||
// Preserve the prefix's attach-to-previous flag; allow a break between prefix and remainder.
|
||||
wordContinues.insert(wordContinues.begin() + wordIndex + 1, false);
|
||||
|
||||
// Forced break belongs to the original whole word; transfer it to the remainder (last part).
|
||||
if (!forceBreakAfter.empty()) {
|
||||
|
||||
@@ -174,6 +174,213 @@ std::vector<CodepointInfo> collectCodepoints(const std::string& word) {
|
||||
while (*ptr != 0) {
|
||||
const unsigned char* current = ptr;
|
||||
const uint32_t cp = utf8NextCodepoint(&ptr);
|
||||
// If this is a combining diacritic (e.g., U+0301 = acute) and there's
|
||||
// a previous base character that can be composed into a single
|
||||
// precomposed Unicode scalar (Latin-1 / Latin-Extended), do that
|
||||
// composition here. This provides lightweight NFC-like behavior for
|
||||
// common Western European diacritics (acute, grave, circumflex, tilde,
|
||||
// diaeresis, cedilla) without pulling in a full Unicode normalization
|
||||
// library.
|
||||
if (!cps.empty()) {
|
||||
uint32_t prev = cps.back().value;
|
||||
uint32_t composed = 0;
|
||||
switch (cp) {
|
||||
case 0x0300: // grave
|
||||
switch (prev) {
|
||||
case 0x0041:
|
||||
composed = 0x00C0;
|
||||
break; // A -> À
|
||||
case 0x0061:
|
||||
composed = 0x00E0;
|
||||
break; // a -> à
|
||||
case 0x0045:
|
||||
composed = 0x00C8;
|
||||
break; // E -> È
|
||||
case 0x0065:
|
||||
composed = 0x00E8;
|
||||
break; // e -> è
|
||||
case 0x0049:
|
||||
composed = 0x00CC;
|
||||
break; // I -> Ì
|
||||
case 0x0069:
|
||||
composed = 0x00EC;
|
||||
break; // i -> ì
|
||||
case 0x004F:
|
||||
composed = 0x00D2;
|
||||
break; // O -> Ò
|
||||
case 0x006F:
|
||||
composed = 0x00F2;
|
||||
break; // o -> ò
|
||||
case 0x0055:
|
||||
composed = 0x00D9;
|
||||
break; // U -> Ù
|
||||
case 0x0075:
|
||||
composed = 0x00F9;
|
||||
break; // u -> ù
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x0301: // acute
|
||||
switch (prev) {
|
||||
case 0x0041:
|
||||
composed = 0x00C1;
|
||||
break; // A -> Á
|
||||
case 0x0061:
|
||||
composed = 0x00E1;
|
||||
break; // a -> á
|
||||
case 0x0045:
|
||||
composed = 0x00C9;
|
||||
break; // E -> É
|
||||
case 0x0065:
|
||||
composed = 0x00E9;
|
||||
break; // e -> é
|
||||
case 0x0049:
|
||||
composed = 0x00CD;
|
||||
break; // I -> Í
|
||||
case 0x0069:
|
||||
composed = 0x00ED;
|
||||
break; // i -> í
|
||||
case 0x004F:
|
||||
composed = 0x00D3;
|
||||
break; // O -> Ó
|
||||
case 0x006F:
|
||||
composed = 0x00F3;
|
||||
break; // o -> ó
|
||||
case 0x0055:
|
||||
composed = 0x00DA;
|
||||
break; // U -> Ú
|
||||
case 0x0075:
|
||||
composed = 0x00FA;
|
||||
break; // u -> ú
|
||||
case 0x0059:
|
||||
composed = 0x00DD;
|
||||
break; // Y -> Ý
|
||||
case 0x0079:
|
||||
composed = 0x00FD;
|
||||
break; // y -> ý
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x0302: // circumflex
|
||||
switch (prev) {
|
||||
case 0x0041:
|
||||
composed = 0x00C2;
|
||||
break; // A -> Â
|
||||
case 0x0061:
|
||||
composed = 0x00E2;
|
||||
break; // a -> â
|
||||
case 0x0045:
|
||||
composed = 0x00CA;
|
||||
break; // E -> Ê
|
||||
case 0x0065:
|
||||
composed = 0x00EA;
|
||||
break; // e -> ê
|
||||
case 0x0049:
|
||||
composed = 0x00CE;
|
||||
break; // I -> Î
|
||||
case 0x0069:
|
||||
composed = 0x00EE;
|
||||
break; // i -> î
|
||||
case 0x004F:
|
||||
composed = 0x00D4;
|
||||
break; // O -> Ô
|
||||
case 0x006F:
|
||||
composed = 0x00F4;
|
||||
break; // o -> ô
|
||||
case 0x0055:
|
||||
composed = 0x00DB;
|
||||
break; // U -> Û
|
||||
case 0x0075:
|
||||
composed = 0x00FB;
|
||||
break; // u -> û
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x0303: // tilde
|
||||
switch (prev) {
|
||||
case 0x0041:
|
||||
composed = 0x00C3;
|
||||
break; // A -> Ã
|
||||
case 0x0061:
|
||||
composed = 0x00E3;
|
||||
break; // a -> ã
|
||||
case 0x004E:
|
||||
composed = 0x00D1;
|
||||
break; // N -> Ñ
|
||||
case 0x006E:
|
||||
composed = 0x00F1;
|
||||
break; // n -> ñ
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x0308: // diaeresis/umlaut
|
||||
switch (prev) {
|
||||
case 0x0041:
|
||||
composed = 0x00C4;
|
||||
break; // A -> Ä
|
||||
case 0x0061:
|
||||
composed = 0x00E4;
|
||||
break; // a -> ä
|
||||
case 0x0045:
|
||||
composed = 0x00CB;
|
||||
break; // E -> Ë
|
||||
case 0x0065:
|
||||
composed = 0x00EB;
|
||||
break; // e -> ë
|
||||
case 0x0049:
|
||||
composed = 0x00CF;
|
||||
break; // I -> Ï
|
||||
case 0x0069:
|
||||
composed = 0x00EF;
|
||||
break; // i -> ï
|
||||
case 0x004F:
|
||||
composed = 0x00D6;
|
||||
break; // O -> Ö
|
||||
case 0x006F:
|
||||
composed = 0x00F6;
|
||||
break; // o -> ö
|
||||
case 0x0055:
|
||||
composed = 0x00DC;
|
||||
break; // U -> Ü
|
||||
case 0x0075:
|
||||
composed = 0x00FC;
|
||||
break; // u -> ü
|
||||
case 0x0059:
|
||||
composed = 0x0178;
|
||||
break; // Y -> Ÿ
|
||||
case 0x0079:
|
||||
composed = 0x00FF;
|
||||
break; // y -> ÿ
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x0327: // cedilla
|
||||
switch (prev) {
|
||||
case 0x0043:
|
||||
composed = 0x00C7;
|
||||
break; // C -> Ç
|
||||
case 0x0063:
|
||||
composed = 0x00E7;
|
||||
break; // c -> ç
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (composed != 0) {
|
||||
cps.back().value = composed;
|
||||
continue; // skip pushing the combining mark itself
|
||||
}
|
||||
}
|
||||
|
||||
cps.push_back({cp, static_cast<size_t>(current - base)});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user