diff --git a/lib/EpdFont/EpdFont.cpp b/lib/EpdFont/EpdFont.cpp index 5b770462..6d777a3e 100644 --- a/lib/EpdFont/EpdFont.cpp +++ b/lib/EpdFont/EpdFont.cpp @@ -17,6 +17,11 @@ void EpdFont::getTextBounds(const char* string, const int startX, const int star int cursorX = startX; const int cursorY = startY; + int lastBaseX = startX; + int lastBaseAdvance = 0; + int lastBaseTop = 0; + bool hasBaseGlyph = false; + constexpr int MIN_COMBINING_GAP_PX = 1; uint32_t cp; while ((cp = utf8NextCodepoint(reinterpret_cast(&string)))) { const EpdGlyph* glyph = getGlyph(cp); @@ -30,11 +35,30 @@ void EpdFont::getTextBounds(const char* string, const int startX, const int star continue; } - *minX = std::min(*minX, cursorX + glyph->left); - *maxX = std::max(*maxX, cursorX + glyph->left + glyph->width); - *minY = std::min(*minY, cursorY + glyph->top - glyph->height); - *maxY = std::max(*maxY, cursorY + glyph->top); - cursorX += glyph->advanceX; + const bool isCombining = utf8IsCombiningMark(cp); + int raiseBy = 0; + if (isCombining && hasBaseGlyph) { + const int currentGap = glyph->top - glyph->height - lastBaseTop; + if (currentGap < MIN_COMBINING_GAP_PX) { + raiseBy = MIN_COMBINING_GAP_PX - currentGap; + } + } + + const int glyphBaseX = (isCombining && hasBaseGlyph) ? (lastBaseX + lastBaseAdvance / 2) : cursorX; + const int glyphBaseY = cursorY - raiseBy; + + *minX = std::min(*minX, glyphBaseX + glyph->left); + *maxX = std::max(*maxX, glyphBaseX + glyph->left + glyph->width); + *minY = std::min(*minY, glyphBaseY + glyph->top - glyph->height); + *maxY = std::max(*maxY, glyphBaseY + glyph->top); + + if (!isCombining) { + lastBaseX = cursorX; + lastBaseAdvance = glyph->advanceX; + lastBaseTop = glyph->top; + hasBaseGlyph = true; + cursorX += glyph->advanceX; + } } } diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 1a0d2c56..867b5515 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -378,20 +378,35 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl words.insert(insertWordIt, remainder); wordStyles.insert(insertStyleIt, style); - // The remainder inherits whatever continuation status the original word had with the word after it. - // Find the continues entry for the original word and insert the remainder's entry after it. + // Continuation flag handling after splitting a word into prefix + remainder. + // + // The prefix keeps the original word's continuation flag so that no-break-space groups + // stay linked. The remainder always gets continues=false because it starts on the next + // line and is not attached to the prefix. + // + // Example: "200 Quadratkilometer" produces tokens: + // [0] "200" continues=false + // [1] " " continues=true + // [2] "Quadratkilometer" continues=true <-- the word being split + // + // After splitting "Quadratkilometer" at "Quadrat-" / "kilometer": + // [0] "200" continues=false + // [1] " " continues=true + // [2] "Quadrat-" continues=true (KEPT — still attached to the no-break group) + // [3] "kilometer" continues=false (NEW — starts fresh on the next line) + // + // This lets the backtracking loop keep the entire prefix group ("200 Quadrat-") on one + // line, while "kilometer" moves to the next line. auto continuesIt = wordContinues.begin(); std::advance(continuesIt, wordIndex); - const bool originalContinuedToNext = *continuesIt; - // The original word (now prefix) does NOT continue to remainder (hyphen separates them) - *continuesIt = false; + // *continuesIt is intentionally left unchanged — the prefix keeps its original attachment. const auto insertContinuesIt = std::next(continuesIt); - wordContinues.insert(insertContinuesIt, originalContinuedToNext); + wordContinues.insert(insertContinuesIt, false); - // Keep the indexed vector in sync if provided + // Keep the indexed vector in sync if provided. if (continuesVec) { - (*continuesVec)[wordIndex] = false; - continuesVec->insert(continuesVec->begin() + wordIndex + 1, originalContinuedToNext); + // (*continuesVec)[wordIndex] stays unchanged — prefix keeps its attachment. + continuesVec->insert(continuesVec->begin() + wordIndex + 1, false); } // Update cached widths to reflect the new prefix/remainder pairing. diff --git a/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp b/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp index 0a6b7a92..15791ae0 100644 --- a/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp +++ b/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp @@ -174,6 +174,213 @@ std::vector collectCodepoints(const std::string& word) { while (*ptr != 0) { const unsigned char* current = ptr; const uint32_t cp = utf8NextCodepoint(&ptr); + // If this is a combining diacritic (e.g., U+0301 = acute) and there's + // a previous base character that can be composed into a single + // precomposed Unicode scalar (Latin-1 / Latin-Extended), do that + // composition here. This provides lightweight NFC-like behavior for + // common Western European diacritics (acute, grave, circumflex, tilde, + // diaeresis, cedilla) without pulling in a full Unicode normalization + // library. + if (!cps.empty()) { + uint32_t prev = cps.back().value; + uint32_t composed = 0; + switch (cp) { + case 0x0300: // grave + switch (prev) { + case 0x0041: + composed = 0x00C0; + break; // A -> À + case 0x0061: + composed = 0x00E0; + break; // a -> à + case 0x0045: + composed = 0x00C8; + break; // E -> È + case 0x0065: + composed = 0x00E8; + break; // e -> è + case 0x0049: + composed = 0x00CC; + break; // I -> Ì + case 0x0069: + composed = 0x00EC; + break; // i -> ì + case 0x004F: + composed = 0x00D2; + break; // O -> Ò + case 0x006F: + composed = 0x00F2; + break; // o -> ò + case 0x0055: + composed = 0x00D9; + break; // U -> Ù + case 0x0075: + composed = 0x00F9; + break; // u -> ù + default: + break; + } + break; + case 0x0301: // acute + switch (prev) { + case 0x0041: + composed = 0x00C1; + break; // A -> Á + case 0x0061: + composed = 0x00E1; + break; // a -> á + case 0x0045: + composed = 0x00C9; + break; // E -> É + case 0x0065: + composed = 0x00E9; + break; // e -> é + case 0x0049: + composed = 0x00CD; + break; // I -> Í + case 0x0069: + composed = 0x00ED; + break; // i -> í + case 0x004F: + composed = 0x00D3; + break; // O -> Ó + case 0x006F: + composed = 0x00F3; + break; // o -> ó + case 0x0055: + composed = 0x00DA; + break; // U -> Ú + case 0x0075: + composed = 0x00FA; + break; // u -> ú + case 0x0059: + composed = 0x00DD; + break; // Y -> Ý + case 0x0079: + composed = 0x00FD; + break; // y -> ý + default: + break; + } + break; + case 0x0302: // circumflex + switch (prev) { + case 0x0041: + composed = 0x00C2; + break; // A -> Â + case 0x0061: + composed = 0x00E2; + break; // a -> â + case 0x0045: + composed = 0x00CA; + break; // E -> Ê + case 0x0065: + composed = 0x00EA; + break; // e -> ê + case 0x0049: + composed = 0x00CE; + break; // I -> Î + case 0x0069: + composed = 0x00EE; + break; // i -> î + case 0x004F: + composed = 0x00D4; + break; // O -> Ô + case 0x006F: + composed = 0x00F4; + break; // o -> ô + case 0x0055: + composed = 0x00DB; + break; // U -> Û + case 0x0075: + composed = 0x00FB; + break; // u -> û + default: + break; + } + break; + case 0x0303: // tilde + switch (prev) { + case 0x0041: + composed = 0x00C3; + break; // A -> Ã + case 0x0061: + composed = 0x00E3; + break; // a -> ã + case 0x004E: + composed = 0x00D1; + break; // N -> Ñ + case 0x006E: + composed = 0x00F1; + break; // n -> ñ + default: + break; + } + break; + case 0x0308: // diaeresis/umlaut + switch (prev) { + case 0x0041: + composed = 0x00C4; + break; // A -> Ä + case 0x0061: + composed = 0x00E4; + break; // a -> ä + case 0x0045: + composed = 0x00CB; + break; // E -> Ë + case 0x0065: + composed = 0x00EB; + break; // e -> ë + case 0x0049: + composed = 0x00CF; + break; // I -> Ï + case 0x0069: + composed = 0x00EF; + break; // i -> ï + case 0x004F: + composed = 0x00D6; + break; // O -> Ö + case 0x006F: + composed = 0x00F6; + break; // o -> ö + case 0x0055: + composed = 0x00DC; + break; // U -> Ü + case 0x0075: + composed = 0x00FC; + break; // u -> ü + case 0x0059: + composed = 0x0178; + break; // Y -> Ÿ + case 0x0079: + composed = 0x00FF; + break; // y -> ÿ + default: + break; + } + break; + case 0x0327: // cedilla + switch (prev) { + case 0x0043: + composed = 0x00C7; + break; // C -> Ç + case 0x0063: + composed = 0x00E7; + break; // c -> ç + default: + break; + } + break; + default: + break; + } + + if (composed != 0) { + cps.back().value = composed; + continue; // skip pushing the combining mark itself + } + } + cps.push_back({cp, static_cast(current - base)}); } diff --git a/lib/Epub/Epub/hyphenation/Hyphenator.cpp b/lib/Epub/Epub/hyphenation/Hyphenator.cpp index e485083f..4d86febe 100644 --- a/lib/Epub/Epub/hyphenation/Hyphenator.cpp +++ b/lib/Epub/Epub/hyphenation/Hyphenator.cpp @@ -1,8 +1,10 @@ #include "Hyphenator.h" +#include #include #include "HyphenationCommon.h" +#include "LanguageHyphenator.h" #include "LanguageRegistry.h" const LanguageHyphenator* Hyphenator::cachedHyphenator_ = nullptr; @@ -32,10 +34,19 @@ size_t byteOffsetForIndex(const std::vector& cps, const size_t in } // Builds a vector of break information from explicit hyphen markers in the given codepoints. +// Only hyphens that appear between two alphabetic characters are considered valid breaks. +// +// Example: "US-Satellitensystems" (cps: U, S, -, S, a, t, ...) +// -> finds '-' at index 2 with alphabetic neighbors 'S' and 'S' +// -> returns one BreakInfo at the byte offset of 'S' (the char after '-'), +// with requiresInsertedHyphen=false because '-' is already visible. +// +// Example: "Satel\u00ADliten" (soft-hyphen between 'l' and 'l') +// -> returns one BreakInfo with requiresInsertedHyphen=true (soft-hyphen +// is invisible and needs a visible '-' when the break is used). std::vector buildExplicitBreakInfos(const std::vector& cps) { std::vector breaks; - // Scan every codepoint looking for explicit/soft hyphen markers that are surrounded by letters. for (size_t i = 1; i + 1 < cps.size(); ++i) { const uint32_t cp = cps[i].value; if (!isExplicitHyphen(cp) || !isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value)) { @@ -63,6 +74,43 @@ std::vector Hyphenator::breakOffsets(const std::string& w // Explicit hyphen markers (soft or hard) take precedence over language breaks. auto explicitBreakInfos = buildExplicitBreakInfos(cps); if (!explicitBreakInfos.empty()) { + // When a word contains explicit hyphens we also run Liang patterns on each alphabetic + // segment between them. Without this, "US-Satellitensystems" would only offer one split + // point (after "US-"), making it impossible to break mid-"Satellitensystems" even when + // "US-Satelliten-" would fit on the line. + // + // Example: "US-Satellitensystems" + // Segments: ["US", "Satellitensystems"] + // Explicit break: after "US-" -> @3 (no inserted hyphen) + // Pattern breaks on "Satellitensystems" -> @5 Sa|tel (+hyphen) + // @8 Satel|li (+hyphen) + // @10 Satelli|ten (+hyphen) + // @13 Satelliten|sys (+hyphen) + // @16 Satellitensys|tems (+hyphen) + // Result: 6 sorted break points; the line-breaker picks the widest prefix that fits. + if (hyphenator) { + size_t segStart = 0; + for (size_t i = 0; i <= cps.size(); ++i) { + const bool atEnd = (i == cps.size()); + const bool atHyphen = !atEnd && isExplicitHyphen(cps[i].value); + if (atEnd || atHyphen) { + if (i > segStart) { + std::vector segment(cps.begin() + segStart, cps.begin() + i); + auto segIndexes = hyphenator->breakIndexes(segment); + for (const size_t idx : segIndexes) { + const size_t cpIdx = segStart + idx; + if (cpIdx < cps.size()) { + explicitBreakInfos.push_back({cps[cpIdx].byteOffset, true}); + } + } + } + segStart = i + 1; + } + } + // Merge explicit and pattern breaks into ascending byte-offset order. + std::sort(explicitBreakInfos.begin(), explicitBreakInfos.end(), + [](const BreakInfo& a, const BreakInfo& b) { return a.byteOffset < b.byteOffset; }); + } return explicitBreakInfos; } diff --git a/lib/Epub/Epub/hyphenation/Hyphenator.h b/lib/Epub/Epub/hyphenation/Hyphenator.h index ffbe16fa..4447f9cc 100644 --- a/lib/Epub/Epub/hyphenation/Hyphenator.h +++ b/lib/Epub/Epub/hyphenation/Hyphenator.h @@ -9,11 +9,24 @@ class LanguageHyphenator; class Hyphenator { public: struct BreakInfo { - size_t byteOffset; - bool requiresInsertedHyphen; + size_t byteOffset; // Byte position inside the UTF-8 word where a break may occur. + bool requiresInsertedHyphen; // true = a visible '-' must be rendered at the break (pattern/fallback breaks). + // false = the word already contains a hyphen at this position (explicit '-'). }; - // Returns byte offsets where the word may be hyphenated. When includeFallback is true, all positions obeying the - // minimum prefix/suffix constraints are returned even if no language-specific rule matches. + + // Returns byte offsets where the word may be hyphenated. + // + // Break sources (in priority order): + // 1. Explicit hyphens already present in the word (e.g. '-' or soft-hyphen U+00AD). + // When found, language patterns are additionally run on each alphabetic segment + // between hyphens so compound words can break within their parts. + // Example: "US-Satellitensystems" yields breaks after "US-" (no inserted hyphen) + // plus pattern breaks inside "Satellitensystems" (Sa|tel|li|ten|sys|tems). + // 2. Language-specific Liang patterns (e.g. German de_patterns). + // Example: "Quadratkilometer" -> Qua|drat|ki|lo|me|ter. + // 3. Fallback every-N-chars splitting (only when includeFallback is true AND no + // pattern breaks were found). Used as a last resort to prevent a single oversized + // word from overflowing the page width. static std::vector breakOffsets(const std::string& word, bool includeFallback); // Provide a publication-level language hint (e.g. "en", "en-US", "ru") used to select hyphenation rules. diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 4fbba8af..90bf8fee 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -594,28 +594,60 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char continue; } - // Detect U+00A0 (non-breaking space): UTF-8 encoding is 0xC2 0xA0 - // Render a visible space without allowing a line break around it. + // Detect U+00A0 (non-breaking space, UTF-8: 0xC2 0xA0) or + // U+202F (narrow no-break space, UTF-8: 0xE2 0x80 0xAF). + // + // Both are rendered as a visible space but must never allow a line break around them. + // We split the no-break space into its own word token and link the surrounding words + // with continuation flags so the layout engine treats them as an indivisible group. + // + // Example: "200 Quadratkilometer" or "200 Quadratkilometer" + // Input bytes: "200\xC2\xA0Quadratkilometer" (or 0xE2 0x80 0xAF for U+202F) + // Tokens produced: + // [0] "200" continues=false + // [1] " " continues=true (attaches to "200", no gap) + // [2] "Quadratkilometer" continues=true (attaches to " ", no gap) + // + // The continuation flags prevent the line-breaker from inserting a line break + // between "200" and "Quadratkilometer". However, "Quadratkilometer" is now a + // standalone word for hyphenation purposes, so Liang patterns can produce + // "200 Quadrat-" / "kilometer" instead of the unusable "200" / "Quadratkilometer". if (static_cast(s[i]) == 0xC2 && i + 1 < len && static_cast(s[i + 1]) == 0xA0) { - // Flush any pending text so style is applied correctly. if (self->partWordBufferIndex > 0) { self->flushPartWordBuffer(); } - // Add a standalone space that attaches to the previous word. self->partWordBuffer[0] = ' '; self->partWordBuffer[1] = '\0'; self->partWordBufferIndex = 1; self->nextWordContinues = true; // Attach space to previous word (no break). self->flushPartWordBuffer(); - // Ensure the next real word attaches to this space (no break). - self->nextWordContinues = true; + self->nextWordContinues = true; // Next real word attaches to this space (no break). i++; // Skip the second byte (0xA0) continue; } + // U+202F (narrow no-break space) — identical logic to U+00A0 above. + if (static_cast(s[i]) == 0xE2 && i + 2 < len && static_cast(s[i + 1]) == 0x80 && + static_cast(s[i + 2]) == 0xAF) { + if (self->partWordBufferIndex > 0) { + self->flushPartWordBuffer(); + } + + self->partWordBuffer[0] = ' '; + self->partWordBuffer[1] = '\0'; + self->partWordBufferIndex = 1; + self->nextWordContinues = true; + self->flushPartWordBuffer(); + + self->nextWordContinues = true; + + i += 2; // Skip the remaining two bytes (0x80 0xAF) + continue; + } + // Skip Zero Width No-Break Space / BOM (U+FEFF) = 0xEF 0xBB 0xBF const XML_Char FEFF_BYTE_1 = static_cast(0xEF); const XML_Char FEFF_BYTE_2 = static_cast(0xBB); diff --git a/lib/GfxRenderer/GfxRenderer.cpp b/lib/GfxRenderer/GfxRenderer.cpp index b385bc03..02ce8362 100644 --- a/lib/GfxRenderer/GfxRenderer.cpp +++ b/lib/GfxRenderer/GfxRenderer.cpp @@ -157,10 +157,12 @@ static void renderCharImpl(const GfxRenderer& renderer, GfxRenderer::RenderMode } } - if constexpr (rotation == TextRotation::Rotated90CW) { - *cursorY -= glyph->advanceX; - } else { - *cursorX += glyph->advanceX; + if (!utf8IsCombiningMark(cp)) { + if constexpr (rotation == TextRotation::Rotated90CW) { + *cursorY -= glyph->advanceX; + } else { + *cursorX += glyph->advanceX; + } } } @@ -212,6 +214,11 @@ void GfxRenderer::drawText(const int fontId, const int x, const int y, const cha const EpdFontFamily::Style style) const { int yPos = y + getFontAscenderSize(fontId); int xpos = x; + int lastBaseX = x; + int lastBaseY = yPos; + int lastBaseAdvance = 0; + int lastBaseTop = 0; + bool hasBaseGlyph = false; // cannot draw a NULL / empty string if (text == nullptr || *text == '\0') { @@ -224,9 +231,43 @@ void GfxRenderer::drawText(const int fontId, const int x, const int y, const cha return; } const auto& font = fontIt->second; + constexpr int MIN_COMBINING_GAP_PX = 1; uint32_t cp; while ((cp = utf8NextCodepoint(reinterpret_cast(&text)))) { + if (utf8IsCombiningMark(cp) && hasBaseGlyph) { + const EpdGlyph* combiningGlyph = font.getGlyph(cp, style); + if (!combiningGlyph) { + combiningGlyph = font.getGlyph(REPLACEMENT_GLYPH, style); + } + + int raiseBy = 0; + if (combiningGlyph) { + const int currentGap = combiningGlyph->top - combiningGlyph->height - lastBaseTop; + if (currentGap < MIN_COMBINING_GAP_PX) { + raiseBy = MIN_COMBINING_GAP_PX - currentGap; + } + } + + int combiningX = lastBaseX + lastBaseAdvance / 2; + int combiningY = lastBaseY - raiseBy; + renderChar(font, cp, &combiningX, &combiningY, black, style); + continue; + } + + const EpdGlyph* glyph = font.getGlyph(cp, style); + if (!glyph) { + glyph = font.getGlyph(REPLACEMENT_GLYPH, style); + } + + if (!utf8IsCombiningMark(cp)) { + lastBaseX = xpos; + lastBaseY = yPos; + lastBaseAdvance = glyph ? glyph->advanceX : 0; + lastBaseTop = glyph ? glyph->top : 0; + hasBaseGlyph = true; + } + renderChar(font, cp, &xpos, &yPos, black, style); } } @@ -864,6 +905,9 @@ int GfxRenderer::getTextAdvanceX(const int fontId, const char* text, const EpdFo int width = 0; const auto& font = fontIt->second; while ((cp = utf8NextCodepoint(reinterpret_cast(&text)))) { + if (utf8IsCombiningMark(cp)) { + continue; + } const EpdGlyph* glyph = font.getGlyph(cp, style); if (!glyph) glyph = font.getGlyph(REPLACEMENT_GLYPH, style); if (glyph) width += glyph->advanceX; @@ -917,9 +961,48 @@ void GfxRenderer::drawTextRotated90CW(const int fontId, const int x, const int y int xPos = x; int yPos = y; + int lastBaseX = x; + int lastBaseY = y; + int lastBaseAdvance = 0; + int lastBaseTop = 0; + bool hasBaseGlyph = false; + constexpr int MIN_COMBINING_GAP_PX = 1; uint32_t cp; while ((cp = utf8NextCodepoint(reinterpret_cast(&text)))) { + if (utf8IsCombiningMark(cp) && hasBaseGlyph) { + const EpdGlyph* combiningGlyph = font.getGlyph(cp, style); + if (!combiningGlyph) { + combiningGlyph = font.getGlyph(REPLACEMENT_GLYPH, style); + } + + int raiseBy = 0; + if (combiningGlyph) { + const int currentGap = combiningGlyph->top - combiningGlyph->height - lastBaseTop; + if (currentGap < MIN_COMBINING_GAP_PX) { + raiseBy = MIN_COMBINING_GAP_PX - currentGap; + } + } + + int combiningX = lastBaseX - raiseBy; + int combiningY = lastBaseY - lastBaseAdvance / 2; + renderCharImpl(*this, renderMode, font, cp, &combiningX, &combiningY, black, style); + continue; + } + + const EpdGlyph* glyph = font.getGlyph(cp, style); + if (!glyph) { + glyph = font.getGlyph(REPLACEMENT_GLYPH, style); + } + + if (!utf8IsCombiningMark(cp)) { + lastBaseX = xPos; + lastBaseY = yPos; + lastBaseAdvance = glyph ? glyph->advanceX : 0; + lastBaseTop = glyph ? glyph->top : 0; + hasBaseGlyph = true; + } + renderCharImpl(*this, renderMode, font, cp, &xPos, &yPos, black, style); } } diff --git a/lib/Utf8/Utf8.h b/lib/Utf8/Utf8.h index 23d63a4e..cce7c0d6 100644 --- a/lib/Utf8/Utf8.h +++ b/lib/Utf8/Utf8.h @@ -9,3 +9,11 @@ uint32_t utf8NextCodepoint(const unsigned char** string); size_t utf8RemoveLastChar(std::string& str); // Truncate string by removing N UTF-8 codepoints from the end. void utf8TruncateChars(std::string& str, size_t numChars); + +// Returns true for Unicode combining diacritical marks that should not advance the cursor. +inline bool utf8IsCombiningMark(const uint32_t cp) { + return (cp >= 0x0300 && cp <= 0x036F) // Combining Diacritical Marks + || (cp >= 0x1DC0 && cp <= 0x1DFF) // Combining Diacritical Marks Supplement + || (cp >= 0x20D0 && cp <= 0x20FF) // Combining Diacritical Marks for Symbols + || (cp >= 0xFE20 && cp <= 0xFE2F); // Combining Half Marks +}