fix: correct hyphenation of URLs (port upstream PR #1068)
Add '/' as explicit hyphen delimiter and relax the alphabetic-surround requirement for '/' and '-' in buildExplicitBreakInfos so URL path segments can be line-wrapped. Includes repeated-separator guard to prevent breaks between consecutive identical separators (e.g. "http://"). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -106,6 +106,7 @@ bool isAsciiDigit(const uint32_t cp) { return cp >= '0' && cp <= '9'; }
|
||||
|
||||
bool isExplicitHyphen(const uint32_t cp) {
|
||||
switch (cp) {
|
||||
case '/':
|
||||
case '-':
|
||||
case 0x00AD: // soft hyphen
|
||||
case 0x058A: // Armenian hyphen
|
||||
|
||||
@@ -35,13 +35,17 @@ size_t byteOffsetForIndex(const std::vector<CodepointInfo>& cps, const size_t in
|
||||
std::vector<Hyphenator::BreakInfo> buildExplicitBreakInfos(const std::vector<CodepointInfo>& cps) {
|
||||
std::vector<Hyphenator::BreakInfo> breaks;
|
||||
|
||||
// Scan every codepoint looking for explicit/soft hyphen markers that are surrounded by letters.
|
||||
for (size_t i = 1; i + 1 < cps.size(); ++i) {
|
||||
const uint32_t cp = cps[i].value;
|
||||
if (!isExplicitHyphen(cp) || !isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value)) {
|
||||
if (!isExplicitHyphen(cp)) {
|
||||
continue;
|
||||
}
|
||||
if ((cp == '/' || cp == '-') && cps[i + 1].value == cp) {
|
||||
continue;
|
||||
}
|
||||
if (cp != '/' && cp != '-' && (!isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value))) {
|
||||
continue;
|
||||
}
|
||||
// Offset points to the next codepoint so rendering starts after the hyphen marker.
|
||||
breaks.push_back({cps[i + 1].byteOffset, isSoftHyphen(cp)});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user