From 4dadea1a03274f1d7256065bbd29566928f809bd Mon Sep 17 00:00:00 2001 From: cottongin Date: Sat, 21 Feb 2026 01:48:58 -0500 Subject: [PATCH] =?UTF-8?q?perf:=20Port=20upstream=20PR=20#1027=20?= =?UTF-8?q?=E2=80=94=20word-width=20cache=20and=20hyphenation=20early=20ex?= =?UTF-8?q?it?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduces ParsedText::layoutAndExtractLines CPU time 5–9% via two independent optimizations from jpirnay's PR #1027: - 128-entry direct-mapped word-width cache (4 KB BSS, FNV-1a hash) absorbs redundant getTextAdvanceX calls across paragraphs - Early exit in hyphenateWordAtIndex when prefix exceeds available width (ascending byte-offset order guarantees monotonic widths) - Reusable prefix string buffer eliminates per-candidate substr allocs - Reserve hint for lineBreakIndices in computeLineBreaks List-specific upstream changes (splice, iterator style) not applicable as mod already uses std::vector (PR #1038). Benchmark infrastructure excluded (removed by author in final commit). Co-authored-by: Cursor --- lib/Epub/Epub/ParsedText.cpp | 93 ++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 4 deletions(-) diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index d620d203..f6289f42 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -51,6 +52,80 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s return renderer.getTextAdvanceX(fontId, sanitized.c_str(), style); } +// --------------------------------------------------------------------------- +// Direct-mapped word-width cache +// +// Avoids redundant getTextAdvanceX calls when the same (word, style, fontId) +// triple appears across paragraphs. A fixed-size static array is used so +// that heap allocation and fragmentation are both zero. +// +// Eviction policy: hash-direct mapping — a word always occupies the single +// slot determined by its hash; a collision simply overwrites that slot. +// This gives O(1) lookup (one hash + one memcmp) regardless of how full the +// cache is, avoiding the O(n) linear-scan overhead that causes a regression +// on corpora with many unique words (e.g. German compound-heavy text). +// +// Words longer than 23 bytes bypass the cache entirely — they are uncommon, +// unlikely to repeat verbatim, and exceed the fixed-width key buffer. +// --------------------------------------------------------------------------- + +struct WordWidthCacheEntry { + char word[24]; // NUL-terminated; 23 usable bytes + terminator + int fontId; + uint16_t width; + uint8_t style; // EpdFontFamily::Style narrowed to one byte + bool valid; // false = slot empty (BSS-initialised to 0) +}; + +// Power-of-two size → slot selection via fast bitmask AND. +// 128 entries × 32 bytes = 4 KB in BSS; covers typical paragraph vocabulary +// with a low collision rate even for German compound-heavy prose. +static constexpr uint32_t WORD_WIDTH_CACHE_SIZE = 128; +static constexpr uint32_t WORD_WIDTH_CACHE_MASK = WORD_WIDTH_CACHE_SIZE - 1; +static WordWidthCacheEntry s_wordWidthCache[WORD_WIDTH_CACHE_SIZE]; + +// FNV-1a over the word bytes, then XOR-folded with fontId and style. +static uint32_t wordWidthCacheHash(const char* str, const size_t len, const int fontId, const uint8_t style) { + uint32_t h = 2166136261u; // FNV offset basis + for (size_t i = 0; i < len; ++i) { + h ^= static_cast(str[i]); + h *= 16777619u; // FNV prime + } + h ^= static_cast(fontId); + h *= 16777619u; + h ^= style; + return h; +} + +// Returns the cached width for (word, style, fontId), measuring and caching +// on a miss. Appending a hyphen is not supported — those measurements are +// word-fragment lookups that will not repeat and must not pollute the cache. +static uint16_t cachedMeasureWordWidth(const GfxRenderer& renderer, const int fontId, const std::string& word, + const EpdFontFamily::Style style) { + const size_t len = word.size(); + if (len >= 24) { + return measureWordWidth(renderer, fontId, word, style); + } + + const uint8_t styleByte = static_cast(style); + const char* const wordCStr = word.c_str(); + + const uint32_t slot = wordWidthCacheHash(wordCStr, len, fontId, styleByte) & WORD_WIDTH_CACHE_MASK; + auto& e = s_wordWidthCache[slot]; + + if (e.valid && e.fontId == fontId && e.style == styleByte && memcmp(e.word, wordCStr, len + 1) == 0) { + return e.width; // O(1) cache hit + } + + const uint16_t w = measureWordWidth(renderer, fontId, word, style); + memcpy(e.word, wordCStr, len + 1); + e.fontId = fontId; + e.width = w; + e.style = styleByte; + e.valid = true; + return w; +} + } // namespace void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline, @@ -116,7 +191,7 @@ std::vector ParsedText::calculateWordWidths(const GfxRenderer& rendere wordWidths.reserve(words.size()); for (size_t i = 0; i < words.size(); ++i) { - wordWidths.push_back(measureWordWidth(renderer, fontId, words[i], wordStyles[i])); + wordWidths.push_back(cachedMeasureWordWidth(renderer, fontId, words[i], wordStyles[i])); } return wordWidths; @@ -228,6 +303,7 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c // Stores the index of the word that starts the next line (last_word_index + 1) std::vector lineBreakIndices; + lineBreakIndices.reserve(totalWordCount / 8 + 1); size_t currentWordIndex = 0; while (currentWordIndex < totalWordCount) { @@ -368,6 +444,9 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl bool chosenNeedsHyphen = true; // Iterate over each legal breakpoint and retain the widest prefix that still fits. + // Re-use a single string buffer to avoid one heap allocation per candidate breakpoint. + std::string prefix; + prefix.reserve(word.size()); for (const auto& info : breakInfos) { const size_t offset = info.byteOffset; if (offset == 0 || offset >= word.size()) { @@ -375,9 +454,15 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl } const bool needsHyphen = info.requiresInsertedHyphen; - const int prefixWidth = measureWordWidth(renderer, fontId, word.substr(0, offset), style, needsHyphen); - if (prefixWidth > availableWidth || prefixWidth <= chosenWidth) { - continue; // Skip if too wide or not an improvement + prefix.assign(word, 0, offset); + const int prefixWidth = measureWordWidth(renderer, fontId, prefix, style, needsHyphen); + if (prefixWidth > availableWidth) { + // breakOffsets returns candidates in ascending byte-offset order, and prefix width is + // non-decreasing with offset, so every subsequent candidate will also be too wide. + break; + } + if (prefixWidth <= chosenWidth) { + continue; } chosenWidth = prefixWidth;