From 4dadea1a03274f1d7256065bbd29566928f809bd Mon Sep 17 00:00:00 2001
From: cottongin <cottongin@users.noreply.github.com>
Date: Sat, 21 Feb 2026 01:48:58 -0500
Subject: [PATCH] =?UTF-8?q?perf:=20Port=20upstream=20PR=20#1027=20?=
 =?UTF-8?q?=E2=80=94=20word-width=20cache=20and=20hyphenation=20early=20ex?=
 =?UTF-8?q?it?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reduces ParsedText::layoutAndExtractLines CPU time 5–9% via two
independent optimizations from jpirnay's PR #1027:

- 128-entry direct-mapped word-width cache (4 KB BSS, FNV-1a hash)
  absorbs redundant getTextAdvanceX calls across paragraphs
- Early exit in hyphenateWordAtIndex when prefix exceeds available
  width (ascending byte-offset order guarantees monotonic widths)
- Reusable prefix string buffer eliminates per-candidate substr allocs
- Reserve hint for lineBreakIndices in computeLineBreaks

List-specific upstream changes (splice, iterator style) not applicable
as mod already uses std::vector (PR #1038). Benchmark infrastructure
excluded (removed by author in final commit).

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 lib/Epub/Epub/ParsedText.cpp | 93 ++++++++++++++++++++++++++++++++++--
 1 file changed, 89 insertions(+), 4 deletions(-)
diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp
index d620d203..f6289f42 100644
--- a/lib/Epub/Epub/ParsedText.cpp
+++ b/lib/Epub/Epub/ParsedText.cpp
@@ -4,6 +4,7 @@
 
 #include <algorithm>
 #include <cmath>
+#include <cstring>
 #include <functional>
 #include <limits>
 #include <vector>
@@ -51,6 +52,80 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s
   return renderer.getTextAdvanceX(fontId, sanitized.c_str(), style);
 }
 
+// ---------------------------------------------------------------------------
+// Direct-mapped word-width cache
+//
+// Avoids redundant getTextAdvanceX calls when the same (word, style, fontId)
+// triple appears across paragraphs.  A fixed-size static array is used so
+// that heap allocation and fragmentation are both zero.
+//
+// Eviction policy: hash-direct mapping — a word always occupies the single
+// slot determined by its hash; a collision simply overwrites that slot.
+// This gives O(1) lookup (one hash + one memcmp) regardless of how full the
+// cache is, avoiding the O(n) linear-scan overhead that causes a regression
+// on corpora with many unique words (e.g. German compound-heavy text).
+//
+// Words longer than 23 bytes bypass the cache entirely — they are uncommon,
+// unlikely to repeat verbatim, and exceed the fixed-width key buffer.
+// ---------------------------------------------------------------------------
+
+struct WordWidthCacheEntry {
+  char word[24];  // NUL-terminated; 23 usable bytes + terminator
+  int fontId;
+  uint16_t width;
+  uint8_t style;  // EpdFontFamily::Style narrowed to one byte
+  bool valid;     // false = slot empty (BSS-initialised to 0)
+};
+
+// Power-of-two size → slot selection via fast bitmask AND.
+// 128 entries × 32 bytes = 4 KB in BSS; covers typical paragraph vocabulary
+// with a low collision rate even for German compound-heavy prose.
+static constexpr uint32_t WORD_WIDTH_CACHE_SIZE = 128;
+static constexpr uint32_t WORD_WIDTH_CACHE_MASK = WORD_WIDTH_CACHE_SIZE - 1;
+static WordWidthCacheEntry s_wordWidthCache[WORD_WIDTH_CACHE_SIZE];
+
+// FNV-1a over the word bytes, then XOR-folded with fontId and style.
+static uint32_t wordWidthCacheHash(const char* str, const size_t len, const int fontId, const uint8_t style) {
+  uint32_t h = 2166136261u;  // FNV offset basis
+  for (size_t i = 0; i < len; ++i) {
+    h ^= static_cast<uint8_t>(str[i]);
+    h *= 16777619u;  // FNV prime
+  }
+  h ^= static_cast<uint32_t>(fontId);
+  h *= 16777619u;
+  h ^= style;
+  return h;
+}
+
+// Returns the cached width for (word, style, fontId), measuring and caching
+// on a miss.  Appending a hyphen is not supported — those measurements are
+// word-fragment lookups that will not repeat and must not pollute the cache.
+static uint16_t cachedMeasureWordWidth(const GfxRenderer& renderer, const int fontId, const std::string& word,
+                                       const EpdFontFamily::Style style) {
+  const size_t len = word.size();
+  if (len >= 24) {
+    return measureWordWidth(renderer, fontId, word, style);
+  }
+
+  const uint8_t styleByte = static_cast<uint8_t>(style);
+  const char* const wordCStr = word.c_str();
+
+  const uint32_t slot = wordWidthCacheHash(wordCStr, len, fontId, styleByte) & WORD_WIDTH_CACHE_MASK;
+  auto& e = s_wordWidthCache[slot];
+
+  if (e.valid && e.fontId == fontId && e.style == styleByte && memcmp(e.word, wordCStr, len + 1) == 0) {
+    return e.width;  // O(1) cache hit
+  }
+
+  const uint16_t w = measureWordWidth(renderer, fontId, word, style);
+  memcpy(e.word, wordCStr, len + 1);
+  e.fontId = fontId;
+  e.width = w;
+  e.style = styleByte;
+  e.valid = true;
+  return w;
+}
+
 }  // namespace
 
 void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline,
@@ -116,7 +191,7 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
   wordWidths.reserve(words.size());
 
   for (size_t i = 0; i < words.size(); ++i) {
-    wordWidths.push_back(measureWordWidth(renderer, fontId, words[i], wordStyles[i]));
+    wordWidths.push_back(cachedMeasureWordWidth(renderer, fontId, words[i], wordStyles[i]));
   }
 
   return wordWidths;
@@ -228,6 +303,7 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
 
   // Stores the index of the word that starts the next line (last_word_index + 1)
   std::vector<size_t> lineBreakIndices;
+  lineBreakIndices.reserve(totalWordCount / 8 + 1);
   size_t currentWordIndex = 0;
 
   while (currentWordIndex < totalWordCount) {
@@ -368,6 +444,9 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
   bool chosenNeedsHyphen = true;
 
   // Iterate over each legal breakpoint and retain the widest prefix that still fits.
+  // Re-use a single string buffer to avoid one heap allocation per candidate breakpoint.
+  std::string prefix;
+  prefix.reserve(word.size());
   for (const auto& info : breakInfos) {
     const size_t offset = info.byteOffset;
     if (offset == 0 || offset >= word.size()) {
@@ -375,9 +454,15 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
     }
 
     const bool needsHyphen = info.requiresInsertedHyphen;
-    const int prefixWidth = measureWordWidth(renderer, fontId, word.substr(0, offset), style, needsHyphen);
-    if (prefixWidth > availableWidth || prefixWidth <= chosenWidth) {
-      continue;  // Skip if too wide or not an improvement
+    prefix.assign(word, 0, offset);
+    const int prefixWidth = measureWordWidth(renderer, fontId, prefix, style, needsHyphen);
+    if (prefixWidth > availableWidth) {
+      // breakOffsets returns candidates in ascending byte-offset order, and prefix width is
+      // non-decreasing with offset, so every subsequent candidate will also be too wide.
+      break;
+    }
+    if (prefixWidth <= chosenWidth) {
+      continue;
     }
 
     chosenWidth = prefixWidth;