From 3696794591446d69aba740134e5aeaba14374e98 Mon Sep 17 00:00:00 2001 From: Zach Nelson Date: Sat, 21 Feb 2026 22:28:56 -0600 Subject: [PATCH] perf: Replace std::list with std::vector in text layout (#1038) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary _Revision to @blindbat's #802. Description comes from the original PR._ - Replace `std::list` with `std::vector` for word storage in `TextBlock` and `ParsedText` - Use index-based access (`words[i]`) instead of iterator advancement (`std::advance(it, n)`) - Remove the separate `continuesVec` copy that was built from `wordContinues` for O(1) access — now unnecessary since `std::vector` already provides O(1) indexing ## Why `std::list` allocates each node individually on the heap with 16 bytes of prev/next pointer overhead per node. For text layout with many small words, this means: - Scattered heap allocations instead of contiguous memory - Poor cache locality during iteration (each node can be anywhere in memory) - Per-node malloc/free overhead during construction and destruction `std::vector` stores elements contiguously, giving better cache performance during the tight rendering and layout loops. The `extractLine` function also benefits: list splice was O(1) but required maintaining three parallel iterators, while vector range construction with move iterators is simpler and still efficient for the small line-sized chunks involved. ## Files changed - `lib/Epub/Epub/blocks/TextBlock.h` / `.cpp` - `lib/Epub/Epub/ParsedText.h` / `.cpp` ## AI Usage YES ## Test plan - [ ] Open an EPUB with mixed formatting (bold, italic, underline) — verify text renders correctly - [ ] Open a book with justified text — verify word spacing is correct - [ ] Open a book with hyphenation enabled — verify words break correctly at hyphens - [ ] Navigate through pages rapidly — verify no rendering glitches or crashes - [ ] Open a book with long paragraphs — verify text layout matches pre-change behavior --------- Co-authored-by: Kuanysh Bekkulov --- lib/Epub/Epub/ParsedText.cpp | 98 ++++++++++-------------------- lib/Epub/Epub/ParsedText.h | 10 ++- lib/Epub/Epub/blocks/TextBlock.cpp | 23 +++---- lib/Epub/Epub/blocks/TextBlock.h | 12 ++-- 4 files changed, 50 insertions(+), 93 deletions(-) diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 867b5515..2de3256c 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -82,37 +81,34 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo const int spaceWidth = renderer.getSpaceWidth(fontId); auto wordWidths = calculateWordWidths(renderer, fontId); - // Build indexed continues vector from the parallel list for O(1) access during layout - std::vector continuesVec(wordContinues.begin(), wordContinues.end()); - std::vector lineBreakIndices; if (hyphenationEnabled) { // Use greedy layout that can split words mid-loop when a hyphenated prefix fits. - lineBreakIndices = computeHyphenatedLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, continuesVec); + lineBreakIndices = computeHyphenatedLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, wordContinues); } else { - lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, continuesVec); + lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, wordContinues); } const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1; for (size_t i = 0; i < lineCount; ++i) { - extractLine(i, pageWidth, spaceWidth, wordWidths, continuesVec, lineBreakIndices, processLine); + extractLine(i, pageWidth, spaceWidth, wordWidths, wordContinues, lineBreakIndices, processLine); + } + + // Remove consumed words so size() reflects only remaining words + if (lineCount > 0) { + const size_t consumed = lineBreakIndices[lineCount - 1]; + words.erase(words.begin(), words.begin() + consumed); + wordStyles.erase(wordStyles.begin(), wordStyles.begin() + consumed); + wordContinues.erase(wordContinues.begin(), wordContinues.begin() + consumed); } } std::vector ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId) { - const size_t totalWordCount = words.size(); - std::vector wordWidths; - wordWidths.reserve(totalWordCount); + wordWidths.reserve(words.size()); - auto wordsIt = words.begin(); - auto wordStylesIt = wordStyles.begin(); - - while (wordsIt != words.end()) { - wordWidths.push_back(measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt)); - - std::advance(wordsIt, 1); - std::advance(wordStylesIt, 1); + for (size_t i = 0; i < words.size(); ++i) { + wordWidths.push_back(measureWordWidth(renderer, fontId, words[i], wordStyles[i])); } return wordWidths; @@ -137,8 +133,7 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c // First word needs to fit in reduced width if there's an indent const int effectiveWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth; while (wordWidths[i] > effectiveWidth) { - if (!hyphenateWordAtIndex(i, effectiveWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true, - &continuesVec)) { + if (!hyphenateWordAtIndex(i, effectiveWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) { break; } } @@ -284,8 +279,8 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r const int availableWidth = effectivePageWidth - lineWidth - spacing; const bool allowFallbackBreaks = isFirstWord; // Only for first word on line - if (availableWidth > 0 && hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths, - allowFallbackBreaks, &continuesVec)) { + if (availableWidth > 0 && + hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths, allowFallbackBreaks)) { // Prefix now fits; append it to this line and move to next line lineWidth += spacing + wordWidths[currentIndex]; ++currentIndex; @@ -317,20 +312,14 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r // available width. bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availableWidth, const GfxRenderer& renderer, const int fontId, std::vector& wordWidths, - const bool allowFallbackBreaks, std::vector* continuesVec) { + const bool allowFallbackBreaks) { // Guard against invalid indices or zero available width before attempting to split. if (availableWidth <= 0 || wordIndex >= words.size()) { return false; } - // Get iterators to target word and style. - auto wordIt = words.begin(); - auto styleIt = wordStyles.begin(); - std::advance(wordIt, wordIndex); - std::advance(styleIt, wordIndex); - - const std::string& word = *wordIt; - const auto style = *styleIt; + const std::string& word = words[wordIndex]; + const auto style = wordStyles[wordIndex]; // Collect candidate breakpoints (byte offsets and hyphen requirements). auto breakInfos = Hyphenator::breakOffsets(word, allowFallbackBreaks); @@ -367,16 +356,14 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl // Split the word at the selected breakpoint and append a hyphen if required. std::string remainder = word.substr(chosenOffset); - wordIt->resize(chosenOffset); + words[wordIndex].resize(chosenOffset); if (chosenNeedsHyphen) { - wordIt->push_back('-'); + words[wordIndex].push_back('-'); } // Insert the remainder word (with matching style and continuation flag) directly after the prefix. - auto insertWordIt = std::next(wordIt); - auto insertStyleIt = std::next(styleIt); - words.insert(insertWordIt, remainder); - wordStyles.insert(insertStyleIt, style); + words.insert(words.begin() + wordIndex + 1, remainder); + wordStyles.insert(wordStyles.begin() + wordIndex + 1, style); // Continuation flag handling after splitting a word into prefix + remainder. // @@ -397,17 +384,8 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl // // This lets the backtracking loop keep the entire prefix group ("200 Quadrat-") on one // line, while "kilometer" moves to the next line. - auto continuesIt = wordContinues.begin(); - std::advance(continuesIt, wordIndex); - // *continuesIt is intentionally left unchanged — the prefix keeps its original attachment. - const auto insertContinuesIt = std::next(continuesIt); - wordContinues.insert(insertContinuesIt, false); - - // Keep the indexed vector in sync if provided. - if (continuesVec) { - // (*continuesVec)[wordIndex] stays unchanged — prefix keeps its attachment. - continuesVec->insert(continuesVec->begin() + wordIndex + 1, false); - } + // wordContinues[wordIndex] is intentionally left unchanged — the prefix keeps its original attachment. + wordContinues.insert(wordContinues.begin() + wordIndex + 1, false); // Update cached widths to reflect the new prefix/remainder pairing. wordWidths[wordIndex] = static_cast(chosenWidth); @@ -467,7 +445,8 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const // Pre-calculate X positions for words // Continuation words attach to the previous word with no space before them - std::list lineXPos; + std::vector lineXPos; + lineXPos.reserve(lineWordCount); for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) { const uint16_t currentWordWidth = wordWidths[lastBreakAt + wordIdx]; @@ -480,23 +459,10 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const xpos += currentWordWidth + (nextIsContinuation ? 0 : spacing); } - // Iterators always start at the beginning as we are moving content with splice below - auto wordEndIt = words.begin(); - auto wordStyleEndIt = wordStyles.begin(); - auto wordContinuesEndIt = wordContinues.begin(); - std::advance(wordEndIt, lineWordCount); - std::advance(wordStyleEndIt, lineWordCount); - std::advance(wordContinuesEndIt, lineWordCount); - - // *** CRITICAL STEP: CONSUME DATA USING SPLICE *** - std::list lineWords; - lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt); - std::list lineWordStyles; - lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt); - - // Consume continues flags (not passed to TextBlock, but must be consumed to stay in sync) - std::list lineContinues; - lineContinues.splice(lineContinues.begin(), wordContinues, wordContinues.begin(), wordContinuesEndIt); + // Build line data by moving from the original vectors using index range + std::vector lineWords(std::make_move_iterator(words.begin() + lastBreakAt), + std::make_move_iterator(words.begin() + lineBreak)); + std::vector lineWordStyles(wordStyles.begin() + lastBreakAt, wordStyles.begin() + lineBreak); for (auto& word : lineWords) { if (containsSoftHyphen(word)) { diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index 8dd040cb..39e6bfe8 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -14,9 +13,9 @@ class GfxRenderer; class ParsedText { - std::list words; - std::list wordStyles; - std::list wordContinues; // true = word attaches to previous (no space before it) + std::vector words; + std::vector wordStyles; + std::vector wordContinues; // true = word attaches to previous (no space before it) BlockStyle blockStyle; bool extraParagraphSpacing; bool hyphenationEnabled; @@ -28,8 +27,7 @@ class ParsedText { int spaceWidth, std::vector& wordWidths, std::vector& continuesVec); bool hyphenateWordAtIndex(size_t wordIndex, int availableWidth, const GfxRenderer& renderer, int fontId, - std::vector& wordWidths, bool allowFallbackBreaks, - std::vector* continuesVec = nullptr); + std::vector& wordWidths, bool allowFallbackBreaks); void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector& wordWidths, const std::vector& continuesVec, const std::vector& lineBreakIndices, const std::function)>& processLine); diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index 1ace7283..1671e59a 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -12,16 +12,13 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int return; } - auto wordIt = words.begin(); - auto wordStylesIt = wordStyles.begin(); - auto wordXposIt = wordXpos.begin(); for (size_t i = 0; i < words.size(); i++) { - const int wordX = *wordXposIt + x; - const EpdFontFamily::Style currentStyle = *wordStylesIt; - renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, currentStyle); + const int wordX = wordXpos[i] + x; + const EpdFontFamily::Style currentStyle = wordStyles[i]; + renderer.drawText(fontId, wordX, y, words[i].c_str(), true, currentStyle); if ((currentStyle & EpdFontFamily::UNDERLINE) != 0) { - const std::string& w = *wordIt; + const std::string& w = words[i]; const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), currentStyle); // y is the top of the text line; add ascender to reach baseline, then offset 2px below const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2; @@ -41,10 +38,6 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int renderer.drawLine(startX, underlineY, startX + underlineWidth, underlineY, true); } - - std::advance(wordIt, 1); - std::advance(wordStylesIt, 1); - std::advance(wordXposIt, 1); } } @@ -80,15 +73,15 @@ bool TextBlock::serialize(FsFile& file) const { std::unique_ptr TextBlock::deserialize(FsFile& file) { uint16_t wc; - std::list words; - std::list wordXpos; - std::list wordStyles; + std::vector words; + std::vector wordXpos; + std::vector wordStyles; BlockStyle blockStyle; // Word count serialization::readPod(file, wc); - // Sanity check: prevent allocation of unreasonably large lists (max 10000 words per block) + // Sanity check: prevent allocation of unreasonably large vectors (max 10000 words per block) if (wc > 10000) { LOG_ERR("TXB", "Deserialization failed: word count %u exceeds maximum", wc); return nullptr; diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index 3c110de9..11539044 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -2,9 +2,9 @@ #include #include -#include #include #include +#include #include "Block.h" #include "BlockStyle.h" @@ -12,14 +12,14 @@ // Represents a line of text on a page class TextBlock final : public Block { private: - std::list words; - std::list wordXpos; - std::list wordStyles; + std::vector words; + std::vector wordXpos; + std::vector wordStyles; BlockStyle blockStyle; public: - explicit TextBlock(std::list words, std::list word_xpos, - std::list word_styles, const BlockStyle& blockStyle = BlockStyle()) + explicit TextBlock(std::vector words, std::vector word_xpos, + std::vector word_styles, const BlockStyle& blockStyle = BlockStyle()) : words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)),