fix: Prevent line breaks on common English contractions (#1405)
This commit is contained in:
@@ -102,7 +102,7 @@ void appendSegmentPatternBreaks(const std::vector<CodepointInfo>& cps, const Lan
|
||||
void appendApostropheContractionBreaks(const std::vector<CodepointInfo>& cps,
|
||||
std::vector<Hyphenator::BreakInfo>& outBreaks) {
|
||||
constexpr size_t kMinLeftSegmentLen = 3;
|
||||
constexpr size_t kMinRightSegmentLen = 2;
|
||||
constexpr size_t kMinRightSegmentLen = 3;
|
||||
size_t segmentStart = 0;
|
||||
|
||||
for (size_t i = 0; i < cps.size(); ++i) {
|
||||
@@ -123,7 +123,7 @@ void appendApostropheContractionBreaks(const std::vector<CodepointInfo>& cps,
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid stranding short clitics like "l'"/"d'" or tiny suffixes like "'t".
|
||||
// Avoid stranding short clitics like "l'"/"d'" or contraction tails like "'ve"/"'re"/"'ll".
|
||||
if (leftPrefixLen >= kMinLeftSegmentLen && rightSuffixLen >= kMinRightSegmentLen) {
|
||||
outBreaks.push_back({cps[i + 1].byteOffset, false});
|
||||
}
|
||||
|
||||
@@ -26,8 +26,8 @@ class Hyphenator {
|
||||
// 2. Apostrophe contractions between letters (e.g. all'improvviso).
|
||||
// Liang patterns are run per alphabetic segment around apostrophes.
|
||||
// A direct break at the apostrophe boundary is allowed only when the left
|
||||
// segment has at least 3 letters and the right segment has at least 2 letters,
|
||||
// avoiding short clitics (e.g. l', d') and short contraction tails (e.g. can't).
|
||||
// segment has at least 3 letters and the right segment has at least 3 letters,
|
||||
// avoiding short clitics (e.g. l', d') and contraction tails (e.g. 've, 're, 'll).
|
||||
// 3. Language-specific Liang patterns (e.g. German de_patterns).
|
||||
// Example: "Quadratkilometer" -> Qua|drat|ki|lo|me|ter.
|
||||
// 4. Fallback every-N-chars splitting (only when includeFallback is true AND no
|
||||
|
||||
Reference in New Issue
Block a user