From 6ceba56620706b15bb4477965f563af4c6aa7ff9 Mon Sep 17 00:00:00 2001
From: cottongin <cottongin@users.noreply.github.com>
Date: Thu, 29 Jan 2026 09:52:30 -0500
Subject: [PATCH] checkpoint: refactor TextBlock/ParsedText from std::list to
 std::vector

Reduces heap fragmentation by ~12x fewer allocations per TextBlock.
This fixes crashes when repeatedly navigating dictionary pages.

- Replace std::list with std::vector in TextBlock members
- Replace splice() with move+erase in ParsedText::extractLine()
- Use index-based access in hyphenateWordAtIndex()
---
 lib/Epub/Epub/ParsedText.cpp       | 57 ++++++++++++++----------------
 lib/Epub/Epub/ParsedText.h         |  7 ++--
 lib/Epub/Epub/blocks/TextBlock.cpp | 20 +++++------
 lib/Epub/Epub/blocks/TextBlock.h   | 22 ++++++------
 4 files changed, 50 insertions(+), 56 deletions(-)

diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp
index 6ae1896..b7cb606 100644
--- a/lib/Epub/Epub/ParsedText.cpp
+++ b/lib/Epub/Epub/ParsedText.cpp
@@ -281,14 +281,9 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
     return false;
   }
 
-  // Get iterators to target word and style.
-  auto wordIt = words.begin();
-  auto styleIt = wordStyles.begin();
-  std::advance(wordIt, wordIndex);
-  std::advance(styleIt, wordIndex);
-
-  const std::string& word = *wordIt;
-  const auto style = *styleIt;
+  // Direct index access for vectors (more efficient than iterator + advance)
+  const std::string& word = words[wordIndex];
+  const auto wordStyle = wordStyles[wordIndex];
 
   // Collect candidate breakpoints (byte offsets and hyphen requirements).
   auto breakInfos = Hyphenator::breakOffsets(word, allowFallbackBreaks);
@@ -308,7 +303,7 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
     }
 
     const bool needsHyphen = info.requiresInsertedHyphen;
-    const int prefixWidth = measureWordWidth(renderer, fontId, word.substr(0, offset), style, needsHyphen);
+    const int prefixWidth = measureWordWidth(renderer, fontId, word.substr(0, offset), wordStyle, needsHyphen);
     if (prefixWidth > availableWidth || prefixWidth <= chosenWidth) {
       continue;  // Skip if too wide or not an improvement
     }
@@ -325,20 +320,18 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
 
   // Split the word at the selected breakpoint and append a hyphen if required.
   std::string remainder = word.substr(chosenOffset);
-  wordIt->resize(chosenOffset);
+  words[wordIndex].resize(chosenOffset);
   if (chosenNeedsHyphen) {
-    wordIt->push_back('-');
+    words[wordIndex].push_back('-');
   }
 
   // Insert the remainder word (with matching style) directly after the prefix.
-  auto insertWordIt = std::next(wordIt);
-  auto insertStyleIt = std::next(styleIt);
-  words.insert(insertWordIt, remainder);
-  wordStyles.insert(insertStyleIt, style);
+  words.insert(words.begin() + wordIndex + 1, remainder);
+  wordStyles.insert(wordStyles.begin() + wordIndex + 1, wordStyle);
 
   // Update cached widths to reflect the new prefix/remainder pairing.
   wordWidths[wordIndex] = static_cast<uint16_t>(chosenWidth);
-  const uint16_t remainderWidth = measureWordWidth(renderer, fontId, remainder, style);
+  const uint16_t remainderWidth = measureWordWidth(renderer, fontId, remainder, wordStyle);
   wordWidths.insert(wordWidths.begin() + wordIndex + 1, remainderWidth);
   return true;
 }
@@ -375,28 +368,30 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
   }
 
   // Pre-calculate X positions for words
-  std::list<uint16_t> lineXPos;
+  std::vector<uint16_t> lineXPos;
+  lineXPos.reserve(lineWordCount);
   for (size_t i = lastBreakAt; i < lineBreak; i++) {
     const uint16_t currentWordWidth = wordWidths[i];
     lineXPos.push_back(xpos);
     xpos += currentWordWidth + spacing;
   }
 
-  // Iterators always start at the beginning as we are moving content with splice below
-  auto wordEndIt = words.begin();
-  auto wordStyleEndIt = wordStyles.begin();
-  auto wordUnderlineEndIt = wordUnderlines.begin();
-  std::advance(wordEndIt, lineWordCount);
-  std::advance(wordStyleEndIt, lineWordCount);
-  std::advance(wordUnderlineEndIt, lineWordCount);
+  // *** CRITICAL STEP: CONSUME DATA USING MOVE + ERASE ***
+  // Move first lineWordCount elements from words into lineWords
+  std::vector<std::string> lineWords(
+      std::make_move_iterator(words.begin()),
+      std::make_move_iterator(words.begin() + lineWordCount));
+  words.erase(words.begin(), words.begin() + lineWordCount);
 
-  // *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
-  std::list<std::string> lineWords;
-  lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
-  std::list<EpdFontFamily::Style> lineWordStyles;
-  lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt);
-  std::list<bool> lineWordUnderlines;
-  lineWordUnderlines.splice(lineWordUnderlines.begin(), wordUnderlines, wordUnderlines.begin(), wordUnderlineEndIt);
+  std::vector<EpdFontFamily::Style> lineWordStyles(
+      std::make_move_iterator(wordStyles.begin()),
+      std::make_move_iterator(wordStyles.begin() + lineWordCount));
+  wordStyles.erase(wordStyles.begin(), wordStyles.begin() + lineWordCount);
+
+  std::vector<bool> lineWordUnderlines(
+      wordUnderlines.begin(),
+      wordUnderlines.begin() + lineWordCount);
+  wordUnderlines.erase(wordUnderlines.begin(), wordUnderlines.begin() + lineWordCount);
 
   for (auto& word : lineWords) {
     if (containsSoftHyphen(word)) {
diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h
index cc2596c..053cf49 100644
--- a/lib/Epub/Epub/ParsedText.h
+++ b/lib/Epub/Epub/ParsedText.h
@@ -3,7 +3,6 @@
 #include <EpdFontFamily.h>
 
 #include <functional>
-#include <list>
 #include <memory>
 #include <string>
 #include <vector>
@@ -14,9 +13,9 @@
 class GfxRenderer;
 
 class ParsedText {
-  std::list<std::string> words;
-  std::list<EpdFontFamily::Style> wordStyles;
-  std::list<bool> wordUnderlines;  // Track underline per word
+  std::vector<std::string> words;
+  std::vector<EpdFontFamily::Style> wordStyles;
+  std::vector<bool> wordUnderlines;  // Track underline per word
   TextBlock::Style style;
   BlockStyle blockStyle;
   bool extraParagraphSpacing;
diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp
index 4fa7da7..c6bdc8f 100644
--- a/lib/Epub/Epub/blocks/TextBlock.cpp
+++ b/lib/Epub/Epub/blocks/TextBlock.cpp
@@ -98,23 +98,23 @@ bool TextBlock::serialize(FsFile& file) const {
 
 std::unique_ptr<TextBlock> TextBlock::deserialize(FsFile& file) {
   uint16_t wc;
-  std::list<std::string> words;
-  std::list<uint16_t> wordXpos;
-  std::list<EpdFontFamily::Style> wordStyles;
-  std::list<bool> wordUnderlines;
+  std::vector<std::string> words;
+  std::vector<uint16_t> wordXpos;
+  std::vector<EpdFontFamily::Style> wordStyles;
+  std::vector<bool> wordUnderlines;
   Style style;
   BlockStyle blockStyle;
 
   // Word count
   serialization::readPod(file, wc);
 
-  // Sanity check: prevent allocation of unreasonably large lists (max 10000 words per block)
+  // Sanity check: prevent allocation of unreasonably large vectors (max 10000 words per block)
   if (wc > 10000) {
     Serial.printf("[%lu] [TXB] Deserialization failed: word count %u exceeds maximum\n", millis(), wc);
     return nullptr;
   }
 
-  // Word data
+  // Word data - reserve capacity then resize
   words.resize(wc);
   wordXpos.resize(wc);
   wordStyles.resize(wc);
@@ -124,14 +124,14 @@ std::unique_ptr<TextBlock> TextBlock::deserialize(FsFile& file) {
 
   // Underline flags (packed as bytes, 8 words per byte)
   wordUnderlines.resize(wc, false);
-  auto underlineIt = wordUnderlines.begin();
+  size_t underlineIdx = 0;
   const int bytesNeeded = (wc + 7) / 8;
   for (int byteIdx = 0; byteIdx < bytesNeeded; byteIdx++) {
     uint8_t underlineByte;
     serialization::readPod(file, underlineByte);
-    for (int bit = 0; bit < 8 && underlineIt != wordUnderlines.end(); bit++) {
-      *underlineIt = (underlineByte & 1 << bit) != 0;
-      ++underlineIt;
+    for (int bit = 0; bit < 8 && underlineIdx < wc; bit++) {
+      wordUnderlines[underlineIdx] = (underlineByte & (1 << bit)) != 0;
+      ++underlineIdx;
     }
   }
 
diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h
index a6f1b0f..e94e9af 100644
--- a/lib/Epub/Epub/blocks/TextBlock.h
+++ b/lib/Epub/Epub/blocks/TextBlock.h
@@ -2,7 +2,7 @@
 #include <EpdFontFamily.h>
 #include <SdFat.h>
 
-#include <list>
+#include <vector>
 #include <memory>
 #include <string>
 
@@ -20,17 +20,17 @@ class TextBlock final : public Block {
   };
 
  private:
-  std::list<std::string> words;
-  std::list<uint16_t> wordXpos;
-  std::list<EpdFontFamily::Style> wordStyles;
-  std::list<bool> wordUnderlines;  // Track underline per word
+  std::vector<std::string> words;
+  std::vector<uint16_t> wordXpos;
+  std::vector<EpdFontFamily::Style> wordStyles;
+  std::vector<bool> wordUnderlines;  // Track underline per word
   Style style;
   BlockStyle blockStyle;
 
  public:
-  explicit TextBlock(std::list<std::string> words, std::list<uint16_t> word_xpos,
-                     std::list<EpdFontFamily::Style> word_styles, const Style style,
-                     const BlockStyle& blockStyle = BlockStyle(), std::list<bool> word_underlines = std::list<bool>())
+  explicit TextBlock(std::vector<std::string> words, std::vector<uint16_t> word_xpos,
+                     std::vector<EpdFontFamily::Style> word_styles, const Style style,
+                     const BlockStyle& blockStyle = BlockStyle(), std::vector<bool> word_underlines = std::vector<bool>())
       : words(std::move(words)),
         wordXpos(std::move(word_xpos)),
         wordStyles(std::move(word_styles)),
@@ -50,9 +50,9 @@ class TextBlock final : public Block {
   bool isEmpty() override { return words.empty(); }
 
   // Getters for word selection support
-  const std::list<std::string>& getWords() const { return words; }
-  const std::list<uint16_t>& getWordXPositions() const { return wordXpos; }
-  const std::list<EpdFontFamily::Style>& getWordStyles() const { return wordStyles; }
+  const std::vector<std::string>& getWords() const { return words; }
+  const std::vector<uint16_t>& getWordXPositions() const { return wordXpos; }
+  const std::vector<EpdFontFamily::Style>& getWordStyles() const { return wordStyles; }
   size_t getWordCount() const { return words.size(); }
   void layout(GfxRenderer& renderer) override {};
   // given a renderer works out where to break the words into lines