2 Commits

Author SHA1 Message Date
cottongin
4dadea1a03 perf: Port upstream PR #1027 — word-width cache and hyphenation early exit
Reduces ParsedText::layoutAndExtractLines CPU time 5–9% via two
independent optimizations from jpirnay's PR #1027:

- 128-entry direct-mapped word-width cache (4 KB BSS, FNV-1a hash)
  absorbs redundant getTextAdvanceX calls across paragraphs
- Early exit in hyphenateWordAtIndex when prefix exceeds available
  width (ascending byte-offset order guarantees monotonic widths)
- Reusable prefix string buffer eliminates per-candidate substr allocs
- Reserve hint for lineBreakIndices in computeLineBreaks

List-specific upstream changes (splice, iterator style) not applicable
as mod already uses std::vector (PR #1038). Benchmark infrastructure
excluded (removed by author in final commit).

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 01:48:58 -05:00
cottongin
0d9a1f4f89 perf: Port upstream PR #1055 — byte-level framebuffer writes
Replace per-pixel drawPixel calls with byte-level framebuffer writes
for fillRect, axis-aligned drawLine, and fillRectDither. Adds
fillPhysicalHSpanByte/fillPhysicalHSpan helpers that write directly
to physical rows with memset and partial-byte masking.

Also applies coderabbit nitpick: fillPolygon scanline fill now uses
fillPhysicalHSpan for Landscape orientations.

Upstream: https://github.com/crosspoint-reader/crosspoint-reader/pull/1055
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 01:14:30 -05:00
3 changed files with 275 additions and 23 deletions

View File

@@ -4,6 +4,7 @@
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <cstring>
#include <functional> #include <functional>
#include <limits> #include <limits>
#include <vector> #include <vector>
@@ -51,6 +52,80 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s
return renderer.getTextAdvanceX(fontId, sanitized.c_str(), style); return renderer.getTextAdvanceX(fontId, sanitized.c_str(), style);
} }
// ---------------------------------------------------------------------------
// Direct-mapped word-width cache
//
// Avoids redundant getTextAdvanceX calls when the same (word, style, fontId)
// triple appears across paragraphs. A fixed-size static array is used so
// that heap allocation and fragmentation are both zero.
//
// Eviction policy: hash-direct mapping — a word always occupies the single
// slot determined by its hash; a collision simply overwrites that slot.
// This gives O(1) lookup (one hash + one memcmp) regardless of how full the
// cache is, avoiding the O(n) linear-scan overhead that causes a regression
// on corpora with many unique words (e.g. German compound-heavy text).
//
// Words longer than 23 bytes bypass the cache entirely — they are uncommon,
// unlikely to repeat verbatim, and exceed the fixed-width key buffer.
// ---------------------------------------------------------------------------
struct WordWidthCacheEntry {
char word[24]; // NUL-terminated; 23 usable bytes + terminator
int fontId;
uint16_t width;
uint8_t style; // EpdFontFamily::Style narrowed to one byte
bool valid; // false = slot empty (BSS-initialised to 0)
};
// Power-of-two size → slot selection via fast bitmask AND.
// 128 entries × 32 bytes = 4 KB in BSS; covers typical paragraph vocabulary
// with a low collision rate even for German compound-heavy prose.
static constexpr uint32_t WORD_WIDTH_CACHE_SIZE = 128;
static constexpr uint32_t WORD_WIDTH_CACHE_MASK = WORD_WIDTH_CACHE_SIZE - 1;
static WordWidthCacheEntry s_wordWidthCache[WORD_WIDTH_CACHE_SIZE];
// FNV-1a over the word bytes, then XOR-folded with fontId and style.
static uint32_t wordWidthCacheHash(const char* str, const size_t len, const int fontId, const uint8_t style) {
uint32_t h = 2166136261u; // FNV offset basis
for (size_t i = 0; i < len; ++i) {
h ^= static_cast<uint8_t>(str[i]);
h *= 16777619u; // FNV prime
}
h ^= static_cast<uint32_t>(fontId);
h *= 16777619u;
h ^= style;
return h;
}
// Returns the cached width for (word, style, fontId), measuring and caching
// on a miss. Appending a hyphen is not supported — those measurements are
// word-fragment lookups that will not repeat and must not pollute the cache.
static uint16_t cachedMeasureWordWidth(const GfxRenderer& renderer, const int fontId, const std::string& word,
const EpdFontFamily::Style style) {
const size_t len = word.size();
if (len >= 24) {
return measureWordWidth(renderer, fontId, word, style);
}
const uint8_t styleByte = static_cast<uint8_t>(style);
const char* const wordCStr = word.c_str();
const uint32_t slot = wordWidthCacheHash(wordCStr, len, fontId, styleByte) & WORD_WIDTH_CACHE_MASK;
auto& e = s_wordWidthCache[slot];
if (e.valid && e.fontId == fontId && e.style == styleByte && memcmp(e.word, wordCStr, len + 1) == 0) {
return e.width; // O(1) cache hit
}
const uint16_t w = measureWordWidth(renderer, fontId, word, style);
memcpy(e.word, wordCStr, len + 1);
e.fontId = fontId;
e.width = w;
e.style = styleByte;
e.valid = true;
return w;
}
} // namespace } // namespace
void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline, void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline,
@@ -116,7 +191,7 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
wordWidths.reserve(words.size()); wordWidths.reserve(words.size());
for (size_t i = 0; i < words.size(); ++i) { for (size_t i = 0; i < words.size(); ++i) {
wordWidths.push_back(measureWordWidth(renderer, fontId, words[i], wordStyles[i])); wordWidths.push_back(cachedMeasureWordWidth(renderer, fontId, words[i], wordStyles[i]));
} }
return wordWidths; return wordWidths;
@@ -228,6 +303,7 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
// Stores the index of the word that starts the next line (last_word_index + 1) // Stores the index of the word that starts the next line (last_word_index + 1)
std::vector<size_t> lineBreakIndices; std::vector<size_t> lineBreakIndices;
lineBreakIndices.reserve(totalWordCount / 8 + 1);
size_t currentWordIndex = 0; size_t currentWordIndex = 0;
while (currentWordIndex < totalWordCount) { while (currentWordIndex < totalWordCount) {
@@ -368,6 +444,9 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
bool chosenNeedsHyphen = true; bool chosenNeedsHyphen = true;
// Iterate over each legal breakpoint and retain the widest prefix that still fits. // Iterate over each legal breakpoint and retain the widest prefix that still fits.
// Re-use a single string buffer to avoid one heap allocation per candidate breakpoint.
std::string prefix;
prefix.reserve(word.size());
for (const auto& info : breakInfos) { for (const auto& info : breakInfos) {
const size_t offset = info.byteOffset; const size_t offset = info.byteOffset;
if (offset == 0 || offset >= word.size()) { if (offset == 0 || offset >= word.size()) {
@@ -375,9 +454,15 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
} }
const bool needsHyphen = info.requiresInsertedHyphen; const bool needsHyphen = info.requiresInsertedHyphen;
const int prefixWidth = measureWordWidth(renderer, fontId, word.substr(0, offset), style, needsHyphen); prefix.assign(word, 0, offset);
if (prefixWidth > availableWidth || prefixWidth <= chosenWidth) { const int prefixWidth = measureWordWidth(renderer, fontId, prefix, style, needsHyphen);
continue; // Skip if too wide or not an improvement if (prefixWidth > availableWidth) {
// breakOffsets returns candidates in ascending byte-offset order, and prefix width is
// non-decreasing with offset, so every subsequent candidate will also be too wide.
break;
}
if (prefixWidth <= chosenWidth) {
continue;
} }
chosenWidth = prefixWidth; chosenWidth = prefixWidth;

View File

@@ -3,6 +3,8 @@
#include <Logging.h> #include <Logging.h>
#include <Utf8.h> #include <Utf8.h>
#include <cstring>
const uint8_t* GfxRenderer::getGlyphBitmap(const EpdFontData* fontData, const EpdGlyph* glyph) const { const uint8_t* GfxRenderer::getGlyphBitmap(const EpdFontData* fontData, const EpdGlyph* glyph) const {
if (fontData->groups != nullptr) { if (fontData->groups != nullptr) {
if (!fontDecompressor) { if (!fontDecompressor) {
@@ -306,15 +308,34 @@ void GfxRenderer::drawLine(int x1, int y1, int x2, int y2, const bool state) con
if (y2 < y1) { if (y2 < y1) {
std::swap(y1, y2); std::swap(y1, y2);
} }
for (int y = y1; y <= y2; y++) { // In Portrait/PortraitInverted a logical vertical line maps to a physical horizontal span.
drawPixel(x1, y, state); switch (orientation) {
case Portrait:
fillPhysicalHSpan(HalDisplay::DISPLAY_HEIGHT - 1 - x1, y1, y2, state);
return;
case PortraitInverted:
fillPhysicalHSpan(x1, HalDisplay::DISPLAY_WIDTH - 1 - y2, HalDisplay::DISPLAY_WIDTH - 1 - y1, state);
return;
default:
for (int y = y1; y <= y2; y++) drawPixel(x1, y, state);
return;
} }
} else if (y1 == y2) { } else if (y1 == y2) {
if (x2 < x1) { if (x2 < x1) {
std::swap(x1, x2); std::swap(x1, x2);
} }
for (int x = x1; x <= x2; x++) { // In Landscape a logical horizontal line maps to a physical horizontal span.
drawPixel(x, y1, state); switch (orientation) {
case LandscapeCounterClockwise:
fillPhysicalHSpan(y1, x1, x2, state);
return;
case LandscapeClockwise:
fillPhysicalHSpan(HalDisplay::DISPLAY_HEIGHT - 1 - y1, HalDisplay::DISPLAY_WIDTH - 1 - x2,
HalDisplay::DISPLAY_WIDTH - 1 - x1, state);
return;
default:
for (int x = x1; x <= x2; x++) drawPixel(x, y1, state);
return;
} }
} else { } else {
// Bresenham's line algorithm — integer arithmetic only // Bresenham's line algorithm — integer arithmetic only
@@ -443,9 +464,80 @@ void GfxRenderer::drawRoundedRect(const int x, const int y, const int width, con
} }
} }
// Write a patterned horizontal span directly into the physical framebuffer with byte-level operations.
// Handles partial left/right bytes and fills the aligned middle with memset.
// Bit layout: MSB-first (bit 7 = phyX=0, bit 0 = phyX=7); 0 bits = dark pixel, 1 bits = white pixel.
void GfxRenderer::fillPhysicalHSpanByte(const int phyY, const int phyX_start, const int phyX_end,
const uint8_t patternByte) const {
const int cX0 = std::max(phyX_start, 0);
const int cX1 = std::min(phyX_end, (int)HalDisplay::DISPLAY_WIDTH - 1);
if (cX0 > cX1 || phyY < 0 || phyY >= (int)HalDisplay::DISPLAY_HEIGHT) return;
uint8_t* const row = frameBuffer + phyY * HalDisplay::DISPLAY_WIDTH_BYTES;
const int startByte = cX0 >> 3;
const int endByte = cX1 >> 3;
const int leftBits = cX0 & 7;
const int rightBits = cX1 & 7;
if (startByte == endByte) {
const uint8_t fillMask = (0xFF >> leftBits) & ~(0xFF >> (rightBits + 1));
row[startByte] = (row[startByte] & ~fillMask) | (patternByte & fillMask);
return;
}
// Left partial byte
if (leftBits != 0) {
const uint8_t fillMask = 0xFF >> leftBits;
row[startByte] = (row[startByte] & ~fillMask) | (patternByte & fillMask);
}
// Full bytes in the middle
const int fullStart = (leftBits == 0) ? startByte : startByte + 1;
const int fullEnd = (rightBits == 7) ? endByte : endByte - 1;
if (fullStart <= fullEnd) {
memset(row + fullStart, patternByte, fullEnd - fullStart + 1);
}
// Right partial byte
if (rightBits != 7) {
const uint8_t fillMask = ~(0xFF >> (rightBits + 1));
row[endByte] = (row[endByte] & ~fillMask) | (patternByte & fillMask);
}
}
// Thin wrapper: state=true → 0x00 (all dark), false → 0xFF (all white).
void GfxRenderer::fillPhysicalHSpan(const int phyY, const int phyX_start, const int phyX_end, const bool state) const {
fillPhysicalHSpanByte(phyY, phyX_start, phyX_end, state ? 0x00 : 0xFF);
}
void GfxRenderer::fillRect(const int x, const int y, const int width, const int height, const bool state) const { void GfxRenderer::fillRect(const int x, const int y, const int width, const int height, const bool state) const {
for (int fillY = y; fillY < y + height; fillY++) { if (width <= 0 || height <= 0) return;
drawLine(x, fillY, x + width - 1, fillY, state);
// For each orientation, one logical dimension maps to a constant physical row, allowing the
// perpendicular dimension to be written as a byte-level span — eliminating per-pixel overhead.
switch (orientation) {
case Portrait:
for (int lx = x; lx < x + width; lx++) {
fillPhysicalHSpan(HalDisplay::DISPLAY_HEIGHT - 1 - lx, y, y + height - 1, state);
}
return;
case PortraitInverted:
for (int lx = x; lx < x + width; lx++) {
fillPhysicalHSpan(lx, HalDisplay::DISPLAY_WIDTH - 1 - (y + height - 1), HalDisplay::DISPLAY_WIDTH - 1 - y,
state);
}
return;
case LandscapeCounterClockwise:
for (int ly = y; ly < y + height; ly++) {
fillPhysicalHSpan(ly, x, x + width - 1, state);
}
return;
case LandscapeClockwise:
for (int ly = y; ly < y + height; ly++) {
fillPhysicalHSpan(HalDisplay::DISPLAY_HEIGHT - 1 - ly, HalDisplay::DISPLAY_WIDTH - 1 - (x + width - 1),
HalDisplay::DISPLAY_WIDTH - 1 - x, state);
}
return;
} }
} }
@@ -482,17 +574,77 @@ void GfxRenderer::fillRectDither(const int x, const int y, const int width, cons
fillRect(x, y, width, height, true); fillRect(x, y, width, height, true);
} else if (color == Color::White) { } else if (color == Color::White) {
fillRect(x, y, width, height, false); fillRect(x, y, width, height, false);
} else if (color == Color::LightGray) {
for (int fillY = y; fillY < y + height; fillY++) {
for (int fillX = x; fillX < x + width; fillX++) {
drawPixelDither<Color::LightGray>(fillX, fillY);
}
}
} else if (color == Color::DarkGray) { } else if (color == Color::DarkGray) {
for (int fillY = y; fillY < y + height; fillY++) { // Pattern: dark where (phyX + phyY) % 2 == 0 (alternating checkerboard).
for (int fillX = x; fillX < x + width; fillX++) { // Byte patterns (phyY even / phyY odd):
drawPixelDither<Color::DarkGray>(fillX, fillY); // Portrait / PortraitInverted: 0xAA / 0x55
// LandscapeCW / LandscapeCCW: 0x55 / 0xAA
switch (orientation) {
case Portrait:
for (int lx = x; lx < x + width; lx++) {
const int phyY = HalDisplay::DISPLAY_HEIGHT - 1 - lx;
const uint8_t pb = (phyY % 2 == 0) ? 0xAA : 0x55;
fillPhysicalHSpanByte(phyY, y, y + height - 1, pb);
} }
return;
case PortraitInverted:
for (int lx = x; lx < x + width; lx++) {
const int phyY = lx;
const uint8_t pb = (phyY % 2 == 0) ? 0xAA : 0x55;
fillPhysicalHSpanByte(phyY, HalDisplay::DISPLAY_WIDTH - 1 - (y + height - 1),
HalDisplay::DISPLAY_WIDTH - 1 - y, pb);
}
return;
case LandscapeCounterClockwise:
for (int ly = y; ly < y + height; ly++) {
const int phyY = ly;
const uint8_t pb = (phyY % 2 == 0) ? 0x55 : 0xAA;
fillPhysicalHSpanByte(phyY, x, x + width - 1, pb);
}
return;
case LandscapeClockwise:
for (int ly = y; ly < y + height; ly++) {
const int phyY = HalDisplay::DISPLAY_HEIGHT - 1 - ly;
const uint8_t pb = (phyY % 2 == 0) ? 0x55 : 0xAA;
fillPhysicalHSpanByte(phyY, HalDisplay::DISPLAY_WIDTH - 1 - (x + width - 1),
HalDisplay::DISPLAY_WIDTH - 1 - x, pb);
}
return;
}
} else if (color == Color::LightGray) {
// Pattern: dark where phyX % 2 == 0 && phyY % 2 == 0 (1-in-4 pixels dark).
// Rows that would be all-white are skipped entirely.
switch (orientation) {
case Portrait:
for (int lx = x; lx < x + width; lx++) {
const int phyY = HalDisplay::DISPLAY_HEIGHT - 1 - lx;
if (phyY % 2 == 0) continue;
fillPhysicalHSpanByte(phyY, y, y + height - 1, 0x55);
}
return;
case PortraitInverted:
for (int lx = x; lx < x + width; lx++) {
const int phyY = lx;
if (phyY % 2 != 0) continue;
fillPhysicalHSpanByte(phyY, HalDisplay::DISPLAY_WIDTH - 1 - (y + height - 1),
HalDisplay::DISPLAY_WIDTH - 1 - y, 0xAA);
}
return;
case LandscapeCounterClockwise:
for (int ly = y; ly < y + height; ly++) {
const int phyY = ly;
if (phyY % 2 != 0) continue;
fillPhysicalHSpanByte(phyY, x, x + width - 1, 0x55);
}
return;
case LandscapeClockwise:
for (int ly = y; ly < y + height; ly++) {
const int phyY = HalDisplay::DISPLAY_HEIGHT - 1 - ly;
if (phyY % 2 == 0) continue;
fillPhysicalHSpanByte(phyY, HalDisplay::DISPLAY_WIDTH - 1 - (x + width - 1),
HalDisplay::DISPLAY_WIDTH - 1 - x, 0xAA);
}
return;
} }
} }
} }
@@ -890,12 +1042,19 @@ void GfxRenderer::fillPolygon(const int* xPoints, const int* yPoints, int numPoi
if (startX < 0) startX = 0; if (startX < 0) startX = 0;
if (endX >= getScreenWidth()) endX = getScreenWidth() - 1; if (endX >= getScreenWidth()) endX = getScreenWidth() - 1;
// Draw horizontal line // In Landscape orientations, horizontal scanlines map to physical horizontal spans.
if (orientation == LandscapeCounterClockwise) {
fillPhysicalHSpan(scanY, startX, endX, state);
} else if (orientation == LandscapeClockwise) {
fillPhysicalHSpan(HalDisplay::DISPLAY_HEIGHT - 1 - scanY, HalDisplay::DISPLAY_WIDTH - 1 - endX,
HalDisplay::DISPLAY_WIDTH - 1 - startX, state);
} else {
for (int x = startX; x <= endX; x++) { for (int x = startX; x <= endX; x++) {
drawPixel(x, scanY, state); drawPixel(x, scanY, state);
} }
} }
} }
}
free(nodeX); free(nodeX);
} }

View File

@@ -45,6 +45,14 @@ class GfxRenderer {
void drawPixelDither(int x, int y) const; void drawPixelDither(int x, int y) const;
template <Color color> template <Color color>
void fillArc(int maxRadius, int cx, int cy, int xDir, int yDir) const; void fillArc(int maxRadius, int cx, int cy, int xDir, int yDir) const;
// Write a patterned horizontal span directly to the physical framebuffer using byte-level operations.
// phyY: physical row; phyX_start/phyX_end: inclusive physical column range.
// patternByte is repeated across the span; partial edge bytes are blended with existing content.
// Bit layout: MSB-first (bit 7 = phyX=0); 0 bits = dark pixel, 1 bits = white pixel.
void fillPhysicalHSpanByte(int phyY, int phyX_start, int phyX_end, uint8_t patternByte) const;
// Write a solid horizontal span directly to the physical framebuffer using byte-level operations.
// Thin wrapper around fillPhysicalHSpanByte: state=true → 0x00 (dark), false → 0xFF (white).
void fillPhysicalHSpan(int phyY, int phyX_start, int phyX_end, bool state) const;
public: public:
explicit GfxRenderer(HalDisplay& halDisplay) explicit GfxRenderer(HalDisplay& halDisplay)