From ff33b2b3bea5f68b2333f5a7282c2b119a71f5e7 Mon Sep 17 00:00:00 2001 From: cottongin Date: Sat, 21 Feb 2026 17:26:09 -0500 Subject: [PATCH] fix: correct hyphenation of URLs (port upstream PR #1068) Add '/' as explicit hyphen delimiter and relax the alphabetic-surround requirement for '/' and '-' in buildExplicitBreakInfos so URL path segments can be line-wrapped. Includes repeated-separator guard to prevent breaks between consecutive identical separators (e.g. "http://"). Co-authored-by: Cursor --- lib/Epub/Epub/hyphenation/HyphenationCommon.cpp | 1 + lib/Epub/Epub/hyphenation/Hyphenator.cpp | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp b/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp index 15791ae0..3765fdc6 100644 --- a/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp +++ b/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp @@ -106,6 +106,7 @@ bool isAsciiDigit(const uint32_t cp) { return cp >= '0' && cp <= '9'; } bool isExplicitHyphen(const uint32_t cp) { switch (cp) { + case '/': case '-': case 0x00AD: // soft hyphen case 0x058A: // Armenian hyphen diff --git a/lib/Epub/Epub/hyphenation/Hyphenator.cpp b/lib/Epub/Epub/hyphenation/Hyphenator.cpp index e485083f..aa558e4e 100644 --- a/lib/Epub/Epub/hyphenation/Hyphenator.cpp +++ b/lib/Epub/Epub/hyphenation/Hyphenator.cpp @@ -35,13 +35,17 @@ size_t byteOffsetForIndex(const std::vector& cps, const size_t in std::vector buildExplicitBreakInfos(const std::vector& cps) { std::vector breaks; - // Scan every codepoint looking for explicit/soft hyphen markers that are surrounded by letters. for (size_t i = 1; i + 1 < cps.size(); ++i) { const uint32_t cp = cps[i].value; - if (!isExplicitHyphen(cp) || !isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value)) { + if (!isExplicitHyphen(cp)) { + continue; + } + if ((cp == '/' || cp == '-') && cps[i + 1].value == cp) { + continue; + } + if (cp != '/' && cp != '-' && (!isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value))) { continue; } - // Offset points to the next codepoint so rendering starts after the hyphen marker. breaks.push_back({cps[i + 1].byteOffset, isSoftHyphen(cp)}); }