diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 111627a..dfb16df 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -8,15 +8,12 @@ #include #include #include -#include #include constexpr int MAX_COST = std::numeric_limits::max(); namespace { -using PrefixWidthCache = ParsedText::PrefixWidthCache; - struct HyphenSplitDecision { size_t byteOffset; uint16_t prefixWidth; @@ -27,24 +24,9 @@ struct HyphenationGuard { size_t tailIndex; }; -uint16_t cachedPrefixWidth(PrefixWidthCache& cache, const GfxRenderer& renderer, const int fontId, - const std::string& word, const EpdFontStyle style, const size_t prefixBytes) { - const void* wordKey = static_cast(&word); - auto& offsetMap = cache[wordKey]; - const auto it = offsetMap.find(prefixBytes); - if (it != offsetMap.end()) { - return it->second; - } - - const std::string prefix = word.substr(0, prefixBytes); - const uint16_t width = renderer.getTextWidth(fontId, prefix.c_str(), style); - offsetMap.emplace(prefixBytes, width); - return width; -} - bool chooseSplitForWidth(const GfxRenderer& renderer, const int fontId, const std::string& word, const EpdFontStyle style, const int availableWidth, const bool includeFallback, - PrefixWidthCache& cache, HyphenSplitDecision* decision) { + HyphenSplitDecision* decision) { if (!decision || availableWidth <= 0) { return false; } @@ -64,7 +46,8 @@ bool chooseSplitForWidth(const GfxRenderer& renderer, const int fontId, const st uint16_t chosenWidth = 0; for (const size_t offset : offsets) { - const int prefixWidth = cachedPrefixWidth(cache, renderer, fontId, word, style, offset); + const std::string prefix = word.substr(0, offset); + const int prefixWidth = renderer.getTextWidth(fontId, prefix.c_str(), style); if (prefixWidth <= adjustedWidth) { chosenOffset = offset; chosenWidth = static_cast(prefixWidth + hyphenWidth); @@ -101,9 +84,9 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo const int pageWidth = renderer.getScreenWidth() - horizontalMargin; const int spaceWidth = renderer.getSpaceWidth(fontId); - PrefixWidthCache prefixWidthCache; - auto wordWidths = calculateWordWidths(renderer, fontId, pageWidth, prefixWidthCache); - auto lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, prefixWidthCache); + // Pre-split oversized tokens so the DP step always has feasible line candidates. + auto wordWidths = calculateWordWidths(renderer, fontId, pageWidth); + auto lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths); const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1; for (size_t i = 0; i < lineCount; ++i) { @@ -112,7 +95,7 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo } std::vector ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId, - const int pageWidth, PrefixWidthCache& cache) { + const int pageWidth) { const size_t totalWordCount = words.size(); std::vector wordWidths; @@ -131,9 +114,8 @@ std::vector ParsedText::calculateWordWidths(const GfxRenderer& rendere uint16_t width = renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt); if (width > pageWidth) { - // Pre-split oversized tokens so the DP step always has feasible line candidates. HyphenSplitDecision decision; - if (chooseSplitForWidth(renderer, fontId, *wordsIt, *wordStylesIt, pageWidth, true, cache, &decision)) { + if (chooseSplitForWidth(renderer, fontId, *wordsIt, *wordStylesIt, pageWidth, true, &decision)) { const std::string originalWord = *wordsIt; const std::string tail = originalWord.substr(decision.byteOffset); if (tail.empty()) { @@ -164,14 +146,12 @@ std::vector ParsedText::calculateWordWidths(const GfxRenderer& rendere } std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth, - const int spaceWidth, std::vector& wordWidths, - PrefixWidthCache& cache) { + const int spaceWidth, std::vector& wordWidths) { if (words.empty()) { return {}; } std::vector guards; - std::vector lineWordWidthSums; auto shiftGuardIndices = [&](size_t insertPos) { for (auto& guard : guards) { @@ -187,17 +167,8 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c auto runDp = [&](std::vector& lineBreaks) { const size_t totalWordCount = wordWidths.size(); - // DP table to store the minimum badness (cost) of lines starting at index i std::vector dp(totalWordCount); - // 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i' std::vector ans(totalWordCount); - lineWordWidthSums.assign(totalWordCount, 0); - - if (!wordWidths.empty()) { - lineWordWidthSums.back() = wordWidths.back(); - } - - // Base Case dp[totalWordCount - 1] = 0; ans[totalWordCount - 1] = totalWordCount - 1; @@ -207,7 +178,6 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c dp[i] = MAX_COST; for (size_t j = i; j < totalWordCount; ++j) { - // Current line length: previous width + space + current word width currlen += wordWidths[j] + spaceWidth; if (currlen > pageWidth) { @@ -230,17 +200,13 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c cost = 0; } else { const int remainingSpace = pageWidth - currlen; - // Use long long for the square to prevent overflow const long long cost_ll = static_cast(remainingSpace) * remainingSpace + dp[j + 1]; cost = cost_ll > MAX_COST ? MAX_COST : static_cast(cost_ll); } if (cost < dp[i]) { dp[i] = cost; - ans[i] = j; // j is the index of the last word in this optimal line - const size_t wordsInLine = j - i + 1; - const int spacesWidth = wordsInLine > 1 ? static_cast(wordsInLine - 1) * spaceWidth : 0; - lineWordWidthSums[i] = currlen - spacesWidth; + ans[i] = j; } } } @@ -273,7 +239,10 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c const bool isLastLine = lineIdx == lineBreakIndices.size() - 1; const size_t lineWordCount = lineBreak - lastBreakAt; - const int lineWordWidthSum = (lastBreakAt < lineWordWidthSums.size()) ? lineWordWidthSums[lastBreakAt] : 0; + int lineWordWidthSum = 0; + for (size_t idx = lastBreakAt; idx < lineBreak; ++idx) { + lineWordWidthSum += wordWidths[idx]; + } lastBreakAt = lineBreak; if (isLastLine || lineBreak >= wordWidths.size()) { @@ -299,7 +268,7 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c } HyphenSplitDecision decision; - if (!chooseSplitForWidth(renderer, fontId, *nextWordIt, *nextStyleIt, budgetForPrefix, false, cache, &decision)) { + if (!chooseSplitForWidth(renderer, fontId, *nextWordIt, *nextStyleIt, budgetForPrefix, false, &decision)) { continue; } @@ -338,23 +307,19 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0; const size_t lineWordCount = lineBreak - lastBreakAt; - // Calculate total word width for this line int lineWordWidthSum = 0; for (size_t idx = lastBreakAt; idx < lineBreak; ++idx) { lineWordWidthSum += wordWidths[idx]; } + const bool isLastLine = breakIndex == lineBreakIndices.size() - 1; - // Calculate spacing const int spareSpace = pageWidth - lineWordWidthSum; int spacing = spaceWidth; - const bool isLastLine = breakIndex == lineBreakIndices.size() - 1; - if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) { spacing = spareSpace / (lineWordCount - 1); } - // Calculate initial x position uint16_t xpos = 0; if (style == TextBlock::RIGHT_ALIGN) { xpos = spareSpace - (lineWordCount - 1) * spaceWidth; @@ -362,7 +327,6 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2; } - // Pre-calculate X positions for words std::list lineXPos; for (size_t i = lastBreakAt; i < lineBreak; i++) { const uint16_t currentWordWidth = wordWidths[i]; @@ -370,13 +334,11 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const xpos += currentWordWidth + spacing; } - // Iterators always start at the beginning as we are moving content with splice below auto wordEndIt = words.begin(); auto wordStyleEndIt = wordStyles.begin(); std::advance(wordEndIt, lineWordCount); std::advance(wordStyleEndIt, lineWordCount); - // *** CRITICAL STEP: CONSUME DATA USING SPLICE *** std::list lineWords; lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt); std::list lineWordStyles; diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index 4a2b641..f0e0405 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include "blocks/TextBlock.h" @@ -14,23 +13,6 @@ class GfxRenderer; class ParsedText { - public: - using PrefixWidthCache = std::unordered_map>; - - explicit ParsedText(const TextBlock::BLOCK_STYLE style, bool extraParagraphSpacing, bool hyphenationEnabled) - : style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {} - ~ParsedText() = default; - - void addWord(std::string word, EpdFontStyle fontStyle); - void setStyle(TextBlock::BLOCK_STYLE style) { this->style = style; } - TextBlock::BLOCK_STYLE getStyle() const { return style; } - size_t size() const { return words.size(); } - bool isEmpty() const { return words.empty(); } - void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, int horizontalMargin, - const std::function)>& processLine, - bool includeLastLine = true); - - private: std::list words; std::list wordStyles; TextBlock::BLOCK_STYLE style; @@ -38,10 +20,24 @@ class ParsedText { bool hyphenationEnabled; std::vector computeLineBreaks(const GfxRenderer& renderer, int fontId, int pageWidth, int spaceWidth, - std::vector& wordWidths, PrefixWidthCache& cache); + std::vector& wordWidths); void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector& wordWidths, const std::vector& lineBreakIndices, const std::function)>& processLine); - std::vector calculateWordWidths(const GfxRenderer& renderer, int fontId, int pageWidth, - PrefixWidthCache& cache); + std::vector calculateWordWidths(const GfxRenderer& renderer, int fontId, int pageWidth); + + public: + explicit ParsedText(const TextBlock::BLOCK_STYLE style, const bool extraParagraphSpacing, + const bool hyphenationEnabled) + : style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {} + ~ParsedText() = default; + + void addWord(std::string word, EpdFontStyle fontStyle); + void setStyle(const TextBlock::BLOCK_STYLE style) { this->style = style; } + TextBlock::BLOCK_STYLE getStyle() const { return style; } + size_t size() const { return words.size(); } + bool isEmpty() const { return words.empty(); } + void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, int horizontalMargin, + const std::function)>& processLine, + bool includeLastLine = true); };