From dc3948034979c6045d58b15b9b0bba9a3da00ad8 Mon Sep 17 00:00:00 2001 From: Zach Nelson Date: Mon, 16 Mar 2026 19:04:06 -0500 Subject: [PATCH] fix: Prevent line breaks on common English contractions (#1405) --- lib/Epub/Epub/hyphenation/Hyphenator.cpp | 4 ++-- lib/Epub/Epub/hyphenation/Hyphenator.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/Epub/Epub/hyphenation/Hyphenator.cpp b/lib/Epub/Epub/hyphenation/Hyphenator.cpp index eb3bdec2..4ae5307e 100644 --- a/lib/Epub/Epub/hyphenation/Hyphenator.cpp +++ b/lib/Epub/Epub/hyphenation/Hyphenator.cpp @@ -102,7 +102,7 @@ void appendSegmentPatternBreaks(const std::vector& cps, const Lan void appendApostropheContractionBreaks(const std::vector& cps, std::vector& outBreaks) { constexpr size_t kMinLeftSegmentLen = 3; - constexpr size_t kMinRightSegmentLen = 2; + constexpr size_t kMinRightSegmentLen = 3; size_t segmentStart = 0; for (size_t i = 0; i < cps.size(); ++i) { @@ -123,7 +123,7 @@ void appendApostropheContractionBreaks(const std::vector& cps, } } - // Avoid stranding short clitics like "l'"/"d'" or tiny suffixes like "'t". + // Avoid stranding short clitics like "l'"/"d'" or contraction tails like "'ve"/"'re"/"'ll". if (leftPrefixLen >= kMinLeftSegmentLen && rightSuffixLen >= kMinRightSegmentLen) { outBreaks.push_back({cps[i + 1].byteOffset, false}); } diff --git a/lib/Epub/Epub/hyphenation/Hyphenator.h b/lib/Epub/Epub/hyphenation/Hyphenator.h index 74886ba6..931695f9 100644 --- a/lib/Epub/Epub/hyphenation/Hyphenator.h +++ b/lib/Epub/Epub/hyphenation/Hyphenator.h @@ -26,8 +26,8 @@ class Hyphenator { // 2. Apostrophe contractions between letters (e.g. all'improvviso). // Liang patterns are run per alphabetic segment around apostrophes. // A direct break at the apostrophe boundary is allowed only when the left - // segment has at least 3 letters and the right segment has at least 2 letters, - // avoiding short clitics (e.g. l', d') and short contraction tails (e.g. can't). + // segment has at least 3 letters and the right segment has at least 3 letters, + // avoiding short clitics (e.g. l', d') and contraction tails (e.g. 've, 're, 'll). // 3. Language-specific Liang patterns (e.g. German de_patterns). // Example: "Quadratkilometer" -> Qua|drat|ki|lo|me|ter. // 4. Fallback every-N-chars splitting (only when includeFallback is true AND no