fix: correct hyphenation of URLs (port upstream PR #1068)
Add '/' as explicit hyphen delimiter and relax the alphabetic-surround requirement for '/' and '-' in buildExplicitBreakInfos so URL path segments can be line-wrapped. Includes repeated-separator guard to prevent breaks between consecutive identical separators (e.g. "http://"). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -106,6 +106,7 @@ bool isAsciiDigit(const uint32_t cp) { return cp >= '0' && cp <= '9'; }
|
|||||||
|
|
||||||
bool isExplicitHyphen(const uint32_t cp) {
|
bool isExplicitHyphen(const uint32_t cp) {
|
||||||
switch (cp) {
|
switch (cp) {
|
||||||
|
case '/':
|
||||||
case '-':
|
case '-':
|
||||||
case 0x00AD: // soft hyphen
|
case 0x00AD: // soft hyphen
|
||||||
case 0x058A: // Armenian hyphen
|
case 0x058A: // Armenian hyphen
|
||||||
|
|||||||
@@ -35,13 +35,17 @@ size_t byteOffsetForIndex(const std::vector<CodepointInfo>& cps, const size_t in
|
|||||||
std::vector<Hyphenator::BreakInfo> buildExplicitBreakInfos(const std::vector<CodepointInfo>& cps) {
|
std::vector<Hyphenator::BreakInfo> buildExplicitBreakInfos(const std::vector<CodepointInfo>& cps) {
|
||||||
std::vector<Hyphenator::BreakInfo> breaks;
|
std::vector<Hyphenator::BreakInfo> breaks;
|
||||||
|
|
||||||
// Scan every codepoint looking for explicit/soft hyphen markers that are surrounded by letters.
|
|
||||||
for (size_t i = 1; i + 1 < cps.size(); ++i) {
|
for (size_t i = 1; i + 1 < cps.size(); ++i) {
|
||||||
const uint32_t cp = cps[i].value;
|
const uint32_t cp = cps[i].value;
|
||||||
if (!isExplicitHyphen(cp) || !isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value)) {
|
if (!isExplicitHyphen(cp)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ((cp == '/' || cp == '-') && cps[i + 1].value == cp) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (cp != '/' && cp != '-' && (!isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value))) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Offset points to the next codepoint so rendering starts after the hyphen marker.
|
|
||||||
breaks.push_back({cps[i + 1].byteOffset, isSoftHyphen(cp)});
|
breaks.push_back({cps[i + 1].byteOffset, isSoftHyphen(cp)});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user