diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 92a6f21..67975c3 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -33,6 +33,7 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo } const int spaceWidth = renderer.getSpaceWidth(fontId); + // Maintain classic prose indenting when extra paragraph spacing is disabled. const bool allowIndent = !extraParagraphSpacing && (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN); const int indentWidth = allowIndent ? renderer.getTextWidth(fontId, "m", REGULAR) : 0; const int firstLinePageWidth = allowIndent ? std::max(pageWidth - indentWidth, 0) : pageWidth; @@ -52,6 +53,7 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo size_t producedLines = 0; constexpr size_t MAX_LINES = 1000; + // commitLine moves buffered words/styles into a TextBlock and delivers it upstream. auto commitLine = [&](const bool isLastLine) { if (lineWordCount == 0) { return; @@ -75,6 +77,7 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo int spacing = spaceWidth; int spacingRemainder = 0; if (style == TextBlock::JUSTIFIED && !isLastLine && gaps > 0) { + // Spread the remaining width evenly across the gaps for justification. const int additional = std::max(0, spaceBudget - baseSpaceTotal); spacing = spaceWidth + (gaps > 0 ? additional / gaps : 0); spacingRemainder = (gaps > 0) ? additional % gaps : 0; @@ -94,6 +97,7 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo xpos = indentWidth; } + // Cache the x positions for each word so TextBlock can render without recomputing layout. std::list lineXPos; for (size_t idx = 0; idx < lineWordWidths.size(); ++idx) { lineXPos.push_back(xpos); @@ -148,6 +152,7 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo } if (lineWordCount > 0 && availableWidth > 0) { + // Try hyphenating the next word so the current line stays compact. HyphenationResult split; if (Hyphenator::splitWord(renderer, fontId, *wordIt, *styleIt, availableWidth, &split, false)) { *wordIt = std::move(split.head); @@ -161,6 +166,7 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo if (lineWordCount == 0) { HyphenationResult split; + // Single overlong words get force-split so they can be displayed within the margins. if (Hyphenator::splitWord(renderer, fontId, *wordIt, *styleIt, currentLinePageWidth, &split, true)) { *wordIt = std::move(split.head); auto nextWordIt = std::next(wordIt); diff --git a/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp b/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp index 7f2d3d2..1e01b05 100644 --- a/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp +++ b/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp @@ -160,6 +160,7 @@ bool isValidEnglishOnsetTrigram(const uint32_t firstCp, const uint32_t secondCp, return false; } +// Verifies that the consonant cluster could begin an English syllable. bool englishClusterIsValidOnset(const std::vector& cps, const size_t start, const size_t end) { if (start >= end) { return false; @@ -189,6 +190,7 @@ bool englishClusterIsValidOnset(const std::vector& cps, const siz return false; } +// Picks the longest legal onset inside the consonant cluster between vowels. size_t englishOnsetLength(const std::vector& cps, const size_t clusterStart, const size_t clusterEnd) { const size_t clusterLen = clusterEnd - clusterStart; if (clusterLen == 0) { @@ -206,6 +208,7 @@ size_t englishOnsetLength(const std::vector& cps, const size_t cl return 1; } +// Avoids creating hyphen positions adjacent to apostrophes (e.g., contractions). bool nextToApostrophe(const std::vector& cps, const size_t index) { if (index == 0 || index >= cps.size()) { return false; @@ -215,6 +218,7 @@ bool nextToApostrophe(const std::vector& cps, const size_t index) return left == '\'' || right == '\''; } +// Returns byte indexes where the word may break according to English syllable rules. std::vector englishBreakIndexes(const std::vector& cps) { std::vector indexes; if (cps.size() < MIN_PREFIX_CP + MIN_SUFFIX_CP) { diff --git a/lib/Epub/Epub/hyphenation/EnglishHyphenator.h b/lib/Epub/Epub/hyphenation/EnglishHyphenator.h index 17c8f87..927b6d9 100644 --- a/lib/Epub/Epub/hyphenation/EnglishHyphenator.h +++ b/lib/Epub/Epub/hyphenation/EnglishHyphenator.h @@ -2,6 +2,7 @@ #include "LanguageHyphenator.h" +// Implements syllable-aware break calculation for Latin-script (English) words. class EnglishHyphenator final : public LanguageHyphenator { public: static const EnglishHyphenator& instance(); diff --git a/lib/Epub/Epub/hyphenation/Hyphenator.cpp b/lib/Epub/Epub/hyphenation/Hyphenator.cpp index 7106388..5040946 100644 --- a/lib/Epub/Epub/hyphenation/Hyphenator.cpp +++ b/lib/Epub/Epub/hyphenation/Hyphenator.cpp @@ -15,6 +15,7 @@ namespace { +// Central registry for language-specific hyphenators supported on device. const std::array& registeredHyphenators() { static const std::array hyphenators = { &EnglishHyphenator::instance(), @@ -23,6 +24,7 @@ const std::array& registeredHyphenators() { return hyphenators; } +// Finds the hyphenator matching the detected script. const LanguageHyphenator* hyphenatorForScript(const Script script) { for (const auto* hyphenator : registeredHyphenators()) { if (hyphenator->script() == script) { @@ -32,6 +34,7 @@ const LanguageHyphenator* hyphenatorForScript(const Script script) { return nullptr; } +// Converts the UTF-8 word into codepoint metadata for downstream rules. std::vector collectCodepoints(const std::string& word) { std::vector cps; cps.reserve(word.size()); @@ -47,6 +50,7 @@ std::vector collectCodepoints(const std::string& word) { return cps; } +// Rejects words containing punctuation or digits unless forced. bool hasOnlyAlphabetic(const std::vector& cps) { if (cps.empty()) { return false; @@ -60,6 +64,7 @@ bool hasOnlyAlphabetic(const std::vector& cps) { return true; } +// Asks the language hyphenator for legal break positions inside the word. std::vector collectBreakIndexes(const std::vector& cps) { if (cps.size() < MIN_PREFIX_CP + MIN_SUFFIX_CP) { return {}; @@ -74,6 +79,7 @@ std::vector collectBreakIndexes(const std::vector& cps) { return {}; } +// Maps a codepoint index back to its byte offset inside the source word. size_t byteOffsetForIndex(const std::vector& cps, const size_t index) { if (index >= cps.size()) { return cps.empty() ? 0 : cps.back().byteOffset; @@ -81,6 +87,7 @@ size_t byteOffsetForIndex(const std::vector& cps, const size_t in return cps[index].byteOffset; } +// Safely slices a UTF-8 string without splitting multibyte sequences. std::string slice(const std::string& word, const size_t startByte, const size_t endByte) { if (startByte >= endByte || startByte >= word.size()) { return std::string(); @@ -127,6 +134,7 @@ bool Hyphenator::splitWord(const GfxRenderer& renderer, const int fontId, const } if (chosenIndex == std::numeric_limits::max() && force) { + // Emergency fallback: brute-force through codepoints to avoid overflow when no legal breaks fit. for (size_t idx = MIN_PREFIX_CP; idx + MIN_SUFFIX_CP <= cps.size(); ++idx) { const size_t byteOffset = byteOffsetForIndex(cps, idx); const std::string prefix = word.substr(0, byteOffset); diff --git a/lib/Epub/Epub/hyphenation/Hyphenator.h b/lib/Epub/Epub/hyphenation/Hyphenator.h index 819bf56..8c0bd78 100644 --- a/lib/Epub/Epub/hyphenation/Hyphenator.h +++ b/lib/Epub/Epub/hyphenation/Hyphenator.h @@ -6,6 +6,7 @@ class GfxRenderer; +// Holds the split portions of a hyphenated word. struct HyphenationResult { std::string head; std::string tail; @@ -13,6 +14,7 @@ struct HyphenationResult { class Hyphenator { public: + // Splits a word so it fits within availableWidth, appending a hyphen to the head when needed. static bool splitWord(const GfxRenderer& renderer, int fontId, const std::string& word, EpdFontStyle style, int availableWidth, HyphenationResult* result, bool force); }; \ No newline at end of file diff --git a/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp b/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp index b37ebc6..80be57f 100644 --- a/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp +++ b/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp @@ -77,6 +77,7 @@ int russianSonority(uint32_t cp) { } } +// Applies Russian sonority sequencing to ensure the consonant cluster can start a syllable. bool russianClusterIsValidOnset(const std::vector& cps, const size_t start, const size_t end) { if (start >= end) { return false; @@ -111,6 +112,7 @@ bool russianClusterIsValidOnset(const std::vector& cps, const siz return true; } +// Chooses the longest valid onset contained within the inter-vowel cluster. size_t russianOnsetLength(const std::vector& cps, const size_t clusterStart, const size_t clusterEnd) { const size_t clusterLen = clusterEnd - clusterStart; if (clusterLen == 0) { @@ -128,6 +130,7 @@ size_t russianOnsetLength(const std::vector& cps, const size_t cl return 1; } +// Prevents hyphenation splits immediately beside ь/ъ characters. bool nextToSoftSign(const std::vector& cps, const size_t index) { if (index == 0 || index >= cps.size()) { return false; @@ -137,6 +140,7 @@ bool nextToSoftSign(const std::vector& cps, const size_t index) { return isSoftOrHardSign(left) || isSoftOrHardSign(right); } +// Produces syllable break indexes tailored to Russian phonotactics. std::vector russianBreakIndexes(const std::vector& cps) { std::vector indexes; if (cps.size() < MIN_PREFIX_CP + MIN_SUFFIX_CP) { diff --git a/lib/Epub/Epub/hyphenation/RussianHyphenator.h b/lib/Epub/Epub/hyphenation/RussianHyphenator.h index a55ce26..164335e 100644 --- a/lib/Epub/Epub/hyphenation/RussianHyphenator.h +++ b/lib/Epub/Epub/hyphenation/RussianHyphenator.h @@ -2,6 +2,7 @@ #include "LanguageHyphenator.h" +// Handles Cyrillic-specific hyphenation heuristics (Russian syllable rules). class RussianHyphenator final : public LanguageHyphenator { public: static const RussianHyphenator& instance();