perf: Port upstream PR #1027 — word-width cache and hyphenation early exit
Reduces ParsedText::layoutAndExtractLines CPU time 5–9% via two independent optimizations from jpirnay's PR #1027: - 128-entry direct-mapped word-width cache (4 KB BSS, FNV-1a hash) absorbs redundant getTextAdvanceX calls across paragraphs - Early exit in hyphenateWordAtIndex when prefix exceeds available width (ascending byte-offset order guarantees monotonic widths) - Reusable prefix string buffer eliminates per-candidate substr allocs - Reserve hint for lineBreakIndices in computeLineBreaks List-specific upstream changes (splice, iterator style) not applicable as mod already uses std::vector (PR #1038). Benchmark infrastructure excluded (removed by author in final commit). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <cstring>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@@ -51,6 +52,80 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s
|
|||||||
return renderer.getTextAdvanceX(fontId, sanitized.c_str(), style);
|
return renderer.getTextAdvanceX(fontId, sanitized.c_str(), style);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Direct-mapped word-width cache
|
||||||
|
//
|
||||||
|
// Avoids redundant getTextAdvanceX calls when the same (word, style, fontId)
|
||||||
|
// triple appears across paragraphs. A fixed-size static array is used so
|
||||||
|
// that heap allocation and fragmentation are both zero.
|
||||||
|
//
|
||||||
|
// Eviction policy: hash-direct mapping — a word always occupies the single
|
||||||
|
// slot determined by its hash; a collision simply overwrites that slot.
|
||||||
|
// This gives O(1) lookup (one hash + one memcmp) regardless of how full the
|
||||||
|
// cache is, avoiding the O(n) linear-scan overhead that causes a regression
|
||||||
|
// on corpora with many unique words (e.g. German compound-heavy text).
|
||||||
|
//
|
||||||
|
// Words longer than 23 bytes bypass the cache entirely — they are uncommon,
|
||||||
|
// unlikely to repeat verbatim, and exceed the fixed-width key buffer.
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
struct WordWidthCacheEntry {
|
||||||
|
char word[24]; // NUL-terminated; 23 usable bytes + terminator
|
||||||
|
int fontId;
|
||||||
|
uint16_t width;
|
||||||
|
uint8_t style; // EpdFontFamily::Style narrowed to one byte
|
||||||
|
bool valid; // false = slot empty (BSS-initialised to 0)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Power-of-two size → slot selection via fast bitmask AND.
|
||||||
|
// 128 entries × 32 bytes = 4 KB in BSS; covers typical paragraph vocabulary
|
||||||
|
// with a low collision rate even for German compound-heavy prose.
|
||||||
|
static constexpr uint32_t WORD_WIDTH_CACHE_SIZE = 128;
|
||||||
|
static constexpr uint32_t WORD_WIDTH_CACHE_MASK = WORD_WIDTH_CACHE_SIZE - 1;
|
||||||
|
static WordWidthCacheEntry s_wordWidthCache[WORD_WIDTH_CACHE_SIZE];
|
||||||
|
|
||||||
|
// FNV-1a over the word bytes, then XOR-folded with fontId and style.
|
||||||
|
static uint32_t wordWidthCacheHash(const char* str, const size_t len, const int fontId, const uint8_t style) {
|
||||||
|
uint32_t h = 2166136261u; // FNV offset basis
|
||||||
|
for (size_t i = 0; i < len; ++i) {
|
||||||
|
h ^= static_cast<uint8_t>(str[i]);
|
||||||
|
h *= 16777619u; // FNV prime
|
||||||
|
}
|
||||||
|
h ^= static_cast<uint32_t>(fontId);
|
||||||
|
h *= 16777619u;
|
||||||
|
h ^= style;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the cached width for (word, style, fontId), measuring and caching
|
||||||
|
// on a miss. Appending a hyphen is not supported — those measurements are
|
||||||
|
// word-fragment lookups that will not repeat and must not pollute the cache.
|
||||||
|
static uint16_t cachedMeasureWordWidth(const GfxRenderer& renderer, const int fontId, const std::string& word,
|
||||||
|
const EpdFontFamily::Style style) {
|
||||||
|
const size_t len = word.size();
|
||||||
|
if (len >= 24) {
|
||||||
|
return measureWordWidth(renderer, fontId, word, style);
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint8_t styleByte = static_cast<uint8_t>(style);
|
||||||
|
const char* const wordCStr = word.c_str();
|
||||||
|
|
||||||
|
const uint32_t slot = wordWidthCacheHash(wordCStr, len, fontId, styleByte) & WORD_WIDTH_CACHE_MASK;
|
||||||
|
auto& e = s_wordWidthCache[slot];
|
||||||
|
|
||||||
|
if (e.valid && e.fontId == fontId && e.style == styleByte && memcmp(e.word, wordCStr, len + 1) == 0) {
|
||||||
|
return e.width; // O(1) cache hit
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint16_t w = measureWordWidth(renderer, fontId, word, style);
|
||||||
|
memcpy(e.word, wordCStr, len + 1);
|
||||||
|
e.fontId = fontId;
|
||||||
|
e.width = w;
|
||||||
|
e.style = styleByte;
|
||||||
|
e.valid = true;
|
||||||
|
return w;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline,
|
void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline,
|
||||||
@@ -116,7 +191,7 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
|
|||||||
wordWidths.reserve(words.size());
|
wordWidths.reserve(words.size());
|
||||||
|
|
||||||
for (size_t i = 0; i < words.size(); ++i) {
|
for (size_t i = 0; i < words.size(); ++i) {
|
||||||
wordWidths.push_back(measureWordWidth(renderer, fontId, words[i], wordStyles[i]));
|
wordWidths.push_back(cachedMeasureWordWidth(renderer, fontId, words[i], wordStyles[i]));
|
||||||
}
|
}
|
||||||
|
|
||||||
return wordWidths;
|
return wordWidths;
|
||||||
@@ -228,6 +303,7 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
|
|||||||
|
|
||||||
// Stores the index of the word that starts the next line (last_word_index + 1)
|
// Stores the index of the word that starts the next line (last_word_index + 1)
|
||||||
std::vector<size_t> lineBreakIndices;
|
std::vector<size_t> lineBreakIndices;
|
||||||
|
lineBreakIndices.reserve(totalWordCount / 8 + 1);
|
||||||
size_t currentWordIndex = 0;
|
size_t currentWordIndex = 0;
|
||||||
|
|
||||||
while (currentWordIndex < totalWordCount) {
|
while (currentWordIndex < totalWordCount) {
|
||||||
@@ -368,6 +444,9 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
|
|||||||
bool chosenNeedsHyphen = true;
|
bool chosenNeedsHyphen = true;
|
||||||
|
|
||||||
// Iterate over each legal breakpoint and retain the widest prefix that still fits.
|
// Iterate over each legal breakpoint and retain the widest prefix that still fits.
|
||||||
|
// Re-use a single string buffer to avoid one heap allocation per candidate breakpoint.
|
||||||
|
std::string prefix;
|
||||||
|
prefix.reserve(word.size());
|
||||||
for (const auto& info : breakInfos) {
|
for (const auto& info : breakInfos) {
|
||||||
const size_t offset = info.byteOffset;
|
const size_t offset = info.byteOffset;
|
||||||
if (offset == 0 || offset >= word.size()) {
|
if (offset == 0 || offset >= word.size()) {
|
||||||
@@ -375,9 +454,15 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
|
|||||||
}
|
}
|
||||||
|
|
||||||
const bool needsHyphen = info.requiresInsertedHyphen;
|
const bool needsHyphen = info.requiresInsertedHyphen;
|
||||||
const int prefixWidth = measureWordWidth(renderer, fontId, word.substr(0, offset), style, needsHyphen);
|
prefix.assign(word, 0, offset);
|
||||||
if (prefixWidth > availableWidth || prefixWidth <= chosenWidth) {
|
const int prefixWidth = measureWordWidth(renderer, fontId, prefix, style, needsHyphen);
|
||||||
continue; // Skip if too wide or not an improvement
|
if (prefixWidth > availableWidth) {
|
||||||
|
// breakOffsets returns candidates in ascending byte-offset order, and prefix width is
|
||||||
|
// non-decreasing with offset, so every subsequent candidate will also be too wide.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (prefixWidth <= chosenWidth) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
chosenWidth = prefixWidth;
|
chosenWidth = prefixWidth;
|
||||||
|
|||||||
Reference in New Issue
Block a user