From 3696794591446d69aba740134e5aeaba14374e98 Mon Sep 17 00:00:00 2001
From: Zach Nelson <zach@zdnelson.com>
Date: Sat, 21 Feb 2026 22:28:56 -0600
Subject: [PATCH] perf: Replace std::list with std::vector in text layout
 (#1038)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

_Revision to @blindbat's #802. Description comes from the original PR._

- Replace `std::list` with `std::vector` for word storage in `TextBlock`
and `ParsedText`
- Use index-based access (`words[i]`) instead of iterator advancement
(`std::advance(it, n)`)
- Remove the separate `continuesVec` copy that was built from
`wordContinues` for O(1) access — now unnecessary since
`std::vector<bool>` already provides O(1) indexing

## Why

`std::list` allocates each node individually on the heap with 16 bytes
of prev/next pointer overhead per node. For text layout with many small
words, this means:
- Scattered heap allocations instead of contiguous memory
- Poor cache locality during iteration (each node can be anywhere in
memory)
- Per-node malloc/free overhead during construction and destruction

`std::vector` stores elements contiguously, giving better cache
performance during the tight rendering and layout loops. The
`extractLine` function also benefits: list splice was O(1) but required
maintaining three parallel iterators, while vector range construction
with move iterators is simpler and still efficient for the small
line-sized chunks involved.

## Files changed

- `lib/Epub/Epub/blocks/TextBlock.h` / `.cpp`
- `lib/Epub/Epub/ParsedText.h` / `.cpp`

## AI Usage

YES

## Test plan

- [ ] Open an EPUB with mixed formatting (bold, italic, underline) —
verify text renders correctly
- [ ] Open a book with justified text — verify word spacing is correct
- [ ] Open a book with hyphenation enabled — verify words break
correctly at hyphens
- [ ] Navigate through pages rapidly — verify no rendering glitches or
crashes
- [ ] Open a book with long paragraphs — verify text layout matches
pre-change behavior

---------

Co-authored-by: Kuanysh Bekkulov <kbekkulov@gmail.com>
---
 lib/Epub/Epub/ParsedText.cpp       | 98 ++++++++++--------------------
 lib/Epub/Epub/ParsedText.h         | 10 ++-
 lib/Epub/Epub/blocks/TextBlock.cpp | 23 +++----
 lib/Epub/Epub/blocks/TextBlock.h   | 12 ++--
 4 files changed, 50 insertions(+), 93 deletions(-)
diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp
index 867b5515..2de3256c 100644
--- a/lib/Epub/Epub/ParsedText.cpp
+++ b/lib/Epub/Epub/ParsedText.cpp
@@ -5,7 +5,6 @@
 #include <algorithm>
 #include <cmath>
 #include <functional>
-#include <iterator>
 #include <limits>
 #include <vector>
 
@@ -82,37 +81,34 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
   const int spaceWidth = renderer.getSpaceWidth(fontId);
   auto wordWidths = calculateWordWidths(renderer, fontId);
 
-  // Build indexed continues vector from the parallel list for O(1) access during layout
-  std::vector<bool> continuesVec(wordContinues.begin(), wordContinues.end());
-
   std::vector<size_t> lineBreakIndices;
   if (hyphenationEnabled) {
     // Use greedy layout that can split words mid-loop when a hyphenated prefix fits.
-    lineBreakIndices = computeHyphenatedLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, continuesVec);
+    lineBreakIndices = computeHyphenatedLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, wordContinues);
   } else {
-    lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, continuesVec);
+    lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, wordContinues);
   }
   const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;
 
   for (size_t i = 0; i < lineCount; ++i) {
-    extractLine(i, pageWidth, spaceWidth, wordWidths, continuesVec, lineBreakIndices, processLine);
+    extractLine(i, pageWidth, spaceWidth, wordWidths, wordContinues, lineBreakIndices, processLine);
+  }
+
+  // Remove consumed words so size() reflects only remaining words
+  if (lineCount > 0) {
+    const size_t consumed = lineBreakIndices[lineCount - 1];
+    words.erase(words.begin(), words.begin() + consumed);
+    wordStyles.erase(wordStyles.begin(), wordStyles.begin() + consumed);
+    wordContinues.erase(wordContinues.begin(), wordContinues.begin() + consumed);
   }
 }
 
 std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId) {
-  const size_t totalWordCount = words.size();
-
   std::vector<uint16_t> wordWidths;
-  wordWidths.reserve(totalWordCount);
+  wordWidths.reserve(words.size());
 
-  auto wordsIt = words.begin();
-  auto wordStylesIt = wordStyles.begin();
-
-  while (wordsIt != words.end()) {
-    wordWidths.push_back(measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt));
-
-    std::advance(wordsIt, 1);
-    std::advance(wordStylesIt, 1);
+  for (size_t i = 0; i < words.size(); ++i) {
+    wordWidths.push_back(measureWordWidth(renderer, fontId, words[i], wordStyles[i]));
   }
 
   return wordWidths;
@@ -137,8 +133,7 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
     // First word needs to fit in reduced width if there's an indent
     const int effectiveWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth;
     while (wordWidths[i] > effectiveWidth) {
-      if (!hyphenateWordAtIndex(i, effectiveWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true,
-                                &continuesVec)) {
+      if (!hyphenateWordAtIndex(i, effectiveWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) {
         break;
       }
     }
@@ -284,8 +279,8 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
       const int availableWidth = effectivePageWidth - lineWidth - spacing;
       const bool allowFallbackBreaks = isFirstWord;  // Only for first word on line
 
-      if (availableWidth > 0 && hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths,
-                                                     allowFallbackBreaks, &continuesVec)) {
+      if (availableWidth > 0 &&
+          hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths, allowFallbackBreaks)) {
         // Prefix now fits; append it to this line and move to next line
         lineWidth += spacing + wordWidths[currentIndex];
         ++currentIndex;
@@ -317,20 +312,14 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
 // available width.
 bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availableWidth, const GfxRenderer& renderer,
                                       const int fontId, std::vector<uint16_t>& wordWidths,
-                                      const bool allowFallbackBreaks, std::vector<bool>* continuesVec) {
+                                      const bool allowFallbackBreaks) {
   // Guard against invalid indices or zero available width before attempting to split.
   if (availableWidth <= 0 || wordIndex >= words.size()) {
     return false;
   }
 
-  // Get iterators to target word and style.
-  auto wordIt = words.begin();
-  auto styleIt = wordStyles.begin();
-  std::advance(wordIt, wordIndex);
-  std::advance(styleIt, wordIndex);
-
-  const std::string& word = *wordIt;
-  const auto style = *styleIt;
+  const std::string& word = words[wordIndex];
+  const auto style = wordStyles[wordIndex];
 
   // Collect candidate breakpoints (byte offsets and hyphen requirements).
   auto breakInfos = Hyphenator::breakOffsets(word, allowFallbackBreaks);
@@ -367,16 +356,14 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
 
   // Split the word at the selected breakpoint and append a hyphen if required.
   std::string remainder = word.substr(chosenOffset);
-  wordIt->resize(chosenOffset);
+  words[wordIndex].resize(chosenOffset);
   if (chosenNeedsHyphen) {
-    wordIt->push_back('-');
+    words[wordIndex].push_back('-');
   }
 
   // Insert the remainder word (with matching style and continuation flag) directly after the prefix.
-  auto insertWordIt = std::next(wordIt);
-  auto insertStyleIt = std::next(styleIt);
-  words.insert(insertWordIt, remainder);
-  wordStyles.insert(insertStyleIt, style);
+  words.insert(words.begin() + wordIndex + 1, remainder);
+  wordStyles.insert(wordStyles.begin() + wordIndex + 1, style);
 
   // Continuation flag handling after splitting a word into prefix + remainder.
   //
@@ -397,17 +384,8 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
   //
   // This lets the backtracking loop keep the entire prefix group ("200 Quadrat-") on one
   // line, while "kilometer" moves to the next line.
-  auto continuesIt = wordContinues.begin();
-  std::advance(continuesIt, wordIndex);
-  // *continuesIt is intentionally left unchanged — the prefix keeps its original attachment.
-  const auto insertContinuesIt = std::next(continuesIt);
-  wordContinues.insert(insertContinuesIt, false);
-
-  // Keep the indexed vector in sync if provided.
-  if (continuesVec) {
-    // (*continuesVec)[wordIndex] stays unchanged — prefix keeps its attachment.
-    continuesVec->insert(continuesVec->begin() + wordIndex + 1, false);
-  }
+  // wordContinues[wordIndex] is intentionally left unchanged — the prefix keeps its original attachment.
+  wordContinues.insert(wordContinues.begin() + wordIndex + 1, false);
 
   // Update cached widths to reflect the new prefix/remainder pairing.
   wordWidths[wordIndex] = static_cast<uint16_t>(chosenWidth);
@@ -467,7 +445,8 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
 
   // Pre-calculate X positions for words
   // Continuation words attach to the previous word with no space before them
-  std::list<uint16_t> lineXPos;
+  std::vector<uint16_t> lineXPos;
+  lineXPos.reserve(lineWordCount);
 
   for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
     const uint16_t currentWordWidth = wordWidths[lastBreakAt + wordIdx];
@@ -480,23 +459,10 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
     xpos += currentWordWidth + (nextIsContinuation ? 0 : spacing);
   }
 
-  // Iterators always start at the beginning as we are moving content with splice below
-  auto wordEndIt = words.begin();
-  auto wordStyleEndIt = wordStyles.begin();
-  auto wordContinuesEndIt = wordContinues.begin();
-  std::advance(wordEndIt, lineWordCount);
-  std::advance(wordStyleEndIt, lineWordCount);
-  std::advance(wordContinuesEndIt, lineWordCount);
-
-  // *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
-  std::list<std::string> lineWords;
-  lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
-  std::list<EpdFontFamily::Style> lineWordStyles;
-  lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt);
-
-  // Consume continues flags (not passed to TextBlock, but must be consumed to stay in sync)
-  std::list<bool> lineContinues;
-  lineContinues.splice(lineContinues.begin(), wordContinues, wordContinues.begin(), wordContinuesEndIt);
+  // Build line data by moving from the original vectors using index range
+  std::vector<std::string> lineWords(std::make_move_iterator(words.begin() + lastBreakAt),
+                                     std::make_move_iterator(words.begin() + lineBreak));
+  std::vector<EpdFontFamily::Style> lineWordStyles(wordStyles.begin() + lastBreakAt, wordStyles.begin() + lineBreak);
 
   for (auto& word : lineWords) {
     if (containsSoftHyphen(word)) {
diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h
index 8dd040cb..39e6bfe8 100644
--- a/lib/Epub/Epub/ParsedText.h
+++ b/lib/Epub/Epub/ParsedText.h
@@ -3,7 +3,6 @@
 #include <EpdFontFamily.h>
 
 #include <functional>
-#include <list>
 #include <memory>
 #include <string>
 #include <vector>
@@ -14,9 +13,9 @@
 class GfxRenderer;
 
 class ParsedText {
-  std::list<std::string> words;
-  std::list<EpdFontFamily::Style> wordStyles;
-  std::list<bool> wordContinues;  // true = word attaches to previous (no space before it)
+  std::vector<std::string> words;
+  std::vector<EpdFontFamily::Style> wordStyles;
+  std::vector<bool> wordContinues;  // true = word attaches to previous (no space before it)
   BlockStyle blockStyle;
   bool extraParagraphSpacing;
   bool hyphenationEnabled;
@@ -28,8 +27,7 @@ class ParsedText {
                                                   int spaceWidth, std::vector<uint16_t>& wordWidths,
                                                   std::vector<bool>& continuesVec);
   bool hyphenateWordAtIndex(size_t wordIndex, int availableWidth, const GfxRenderer& renderer, int fontId,
-                            std::vector<uint16_t>& wordWidths, bool allowFallbackBreaks,
-                            std::vector<bool>* continuesVec = nullptr);
+                            std::vector<uint16_t>& wordWidths, bool allowFallbackBreaks);
   void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths,
                    const std::vector<bool>& continuesVec, const std::vector<size_t>& lineBreakIndices,
                    const std::function<void(std::shared_ptr<TextBlock>)>& processLine);
diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp
index 1ace7283..1671e59a 100644
--- a/lib/Epub/Epub/blocks/TextBlock.cpp
+++ b/lib/Epub/Epub/blocks/TextBlock.cpp
@@ -12,16 +12,13 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int
     return;
   }
 
-  auto wordIt = words.begin();
-  auto wordStylesIt = wordStyles.begin();
-  auto wordXposIt = wordXpos.begin();
   for (size_t i = 0; i < words.size(); i++) {
-    const int wordX = *wordXposIt + x;
-    const EpdFontFamily::Style currentStyle = *wordStylesIt;
-    renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, currentStyle);
+    const int wordX = wordXpos[i] + x;
+    const EpdFontFamily::Style currentStyle = wordStyles[i];
+    renderer.drawText(fontId, wordX, y, words[i].c_str(), true, currentStyle);
 
     if ((currentStyle & EpdFontFamily::UNDERLINE) != 0) {
-      const std::string& w = *wordIt;
+      const std::string& w = words[i];
       const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), currentStyle);
       // y is the top of the text line; add ascender to reach baseline, then offset 2px below
       const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2;
@@ -41,10 +38,6 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int
 
       renderer.drawLine(startX, underlineY, startX + underlineWidth, underlineY, true);
     }
-
-    std::advance(wordIt, 1);
-    std::advance(wordStylesIt, 1);
-    std::advance(wordXposIt, 1);
   }
 }
 
@@ -80,15 +73,15 @@ bool TextBlock::serialize(FsFile& file) const {
 
 std::unique_ptr<TextBlock> TextBlock::deserialize(FsFile& file) {
   uint16_t wc;
-  std::list<std::string> words;
-  std::list<uint16_t> wordXpos;
-  std::list<EpdFontFamily::Style> wordStyles;
+  std::vector<std::string> words;
+  std::vector<uint16_t> wordXpos;
+  std::vector<EpdFontFamily::Style> wordStyles;
   BlockStyle blockStyle;
 
   // Word count
   serialization::readPod(file, wc);
 
-  // Sanity check: prevent allocation of unreasonably large lists (max 10000 words per block)
+  // Sanity check: prevent allocation of unreasonably large vectors (max 10000 words per block)
   if (wc > 10000) {
     LOG_ERR("TXB", "Deserialization failed: word count %u exceeds maximum", wc);
     return nullptr;
diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h
index 3c110de9..11539044 100644
--- a/lib/Epub/Epub/blocks/TextBlock.h
+++ b/lib/Epub/Epub/blocks/TextBlock.h
@@ -2,9 +2,9 @@
 #include <EpdFontFamily.h>
 #include <HalStorage.h>
 
-#include <list>
 #include <memory>
 #include <string>
+#include <vector>
 
 #include "Block.h"
 #include "BlockStyle.h"
@@ -12,14 +12,14 @@
 // Represents a line of text on a page
 class TextBlock final : public Block {
  private:
-  std::list<std::string> words;
-  std::list<uint16_t> wordXpos;
-  std::list<EpdFontFamily::Style> wordStyles;
+  std::vector<std::string> words;
+  std::vector<uint16_t> wordXpos;
+  std::vector<EpdFontFamily::Style> wordStyles;
   BlockStyle blockStyle;
 
  public:
-  explicit TextBlock(std::list<std::string> words, std::list<uint16_t> word_xpos,
-                     std::list<EpdFontFamily::Style> word_styles, const BlockStyle& blockStyle = BlockStyle())
+  explicit TextBlock(std::vector<std::string> words, std::vector<uint16_t> word_xpos,
+                     std::vector<EpdFontFamily::Style> word_styles, const BlockStyle& blockStyle = BlockStyle())
       : words(std::move(words)),
         wordXpos(std::move(word_xpos)),
         wordStyles(std::move(word_styles)),