fix: correct hyphenation of URLs (port upstream PR #1068)

Add '/' as explicit hyphen delimiter and relax the alphabetic-surround
requirement for '/' and '-' in buildExplicitBreakInfos so URL path
segments can be line-wrapped. Includes repeated-separator guard to
prevent breaks between consecutive identical separators (e.g. "http://").

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
cottongin
2026-02-21 17:26:09 -05:00
parent 0e2440aea8
commit ff33b2b3be
2 changed files with 8 additions and 3 deletions

View File

@@ -106,6 +106,7 @@ bool isAsciiDigit(const uint32_t cp) { return cp >= '0' && cp <= '9'; }
bool isExplicitHyphen(const uint32_t cp) {
switch (cp) {
case '/':
case '-':
case 0x00AD: // soft hyphen
case 0x058A: // Armenian hyphen

View File

@@ -35,13 +35,17 @@ size_t byteOffsetForIndex(const std::vector<CodepointInfo>& cps, const size_t in
std::vector<Hyphenator::BreakInfo> buildExplicitBreakInfos(const std::vector<CodepointInfo>& cps) {
std::vector<Hyphenator::BreakInfo> breaks;
// Scan every codepoint looking for explicit/soft hyphen markers that are surrounded by letters.
for (size_t i = 1; i + 1 < cps.size(); ++i) {
const uint32_t cp = cps[i].value;
if (!isExplicitHyphen(cp) || !isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value)) {
if (!isExplicitHyphen(cp)) {
continue;
}
if ((cp == '/' || cp == '-') && cps[i + 1].value == cp) {
continue;
}
if (cp != '/' && cp != '-' && (!isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value))) {
continue;
}
// Offset points to the next codepoint so rendering starts after the hyphen marker.
breaks.push_back({cps[i + 1].byteOffset, isSoftHyphen(cp)});
}