From 0c9e8b3ece69b87fb33f7bfa9b1a0a4155b3842c Mon Sep 17 00:00:00 2001 From: Adrian Wilkins-Caruana <38579731+adriancaruana@users.noreply.github.com> Date: Sun, 22 Mar 2026 04:10:41 +1100 Subject: [PATCH] fix: Fix prewarm perf when a page contains many styles (#1451) ## Summary **What is the goal of this PR?** (e.g., Implements the new feature for file uploading.) Fix prewarm perf when a page contains many styles. The prewarm page buffer was a single slot, so each `prewarmCache` call for a new font style freed the previous style's glyphs. On pages with multiple styles (regular + bold + italic), only the last style was prewarmed. The others fell through to the hot-group compaction path at ~2-3ms per glyph. This was most visible on rich formatting (e.g. this [Czech prayer book](https://stahuj.kancional.cz/e-kniha/kancional.epub) with bold headings, italic liturgical text, and regular body), where page renders took 3-5 seconds instead of ~700ms. Fix: use up to 4 page buffer slots (one per font style) so all styles stay prewarmed simultaneously. Fixes #1450. --- ### AI Usage While CrossPoint doesn't have restrictions on AI tools in contributing, please be transparent about their usage as it helps set the right context for reviewers. Did you use AI tools to help write this code? PARTIALLY: to diagnose and brainstorm solutions. --- lib/EpdFont/FontDecompressor.cpp | 99 ++++++++++++++++++-------------- lib/EpdFont/FontDecompressor.h | 16 ++++-- 2 files changed, 67 insertions(+), 48 deletions(-) diff --git a/lib/EpdFont/FontDecompressor.cpp b/lib/EpdFont/FontDecompressor.cpp index 181bf8d0..10af14df 100644 --- a/lib/EpdFont/FontDecompressor.cpp +++ b/lib/EpdFont/FontDecompressor.cpp @@ -24,12 +24,12 @@ void FontDecompressor::clearCache() { } void FontDecompressor::freePageBuffer() { - free(pageBuffer); - pageBuffer = nullptr; - free(pageGlyphs); - pageGlyphs = nullptr; - pageFont = nullptr; - pageGlyphCount = 0; + for (uint8_t s = 0; s < pageSlotCount; s++) { + free(pageSlots[s].buffer); + free(pageSlots[s].glyphs); + pageSlots[s] = {}; + } + pageSlotCount = 0; } void FontDecompressor::freeHotGroup() { @@ -137,24 +137,28 @@ const uint8_t* FontDecompressor::getBitmap(const EpdFontData* fontData, const Ep return &fontData->bitmap[glyph->dataOffset]; } - // Check page buffer first (populated by prewarmCache) - if (pageBuffer && pageFont == fontData && pageGlyphCount > 0) { - int left = 0, right = pageGlyphCount - 1; + // Check page buffer slots (populated by prewarmCache — one slot per font style) + for (uint8_t s = 0; s < pageSlotCount; s++) { + const auto& slot = pageSlots[s]; + if (slot.fontData != fontData || slot.glyphCount == 0) continue; + + int left = 0, right = slot.glyphCount - 1; while (left <= right) { int mid = left + (right - left) / 2; - if (pageGlyphs[mid].glyphIndex == glyphIndex) { - if (pageGlyphs[mid].bufferOffset != UINT32_MAX) { + if (slot.glyphs[mid].glyphIndex == glyphIndex) { + if (slot.glyphs[mid].bufferOffset != UINT32_MAX) { stats.cacheHits++; stats.getBitmapTimeUs += micros() - tStart; - return &pageBuffer[pageGlyphs[mid].bufferOffset]; + return &slot.buffer[slot.glyphs[mid].bufferOffset]; } break; // Not extracted during prewarm; fall through to hot-group path } - if (pageGlyphs[mid].glyphIndex < glyphIndex) + if (slot.glyphs[mid].glyphIndex < glyphIndex) left = mid + 1; else right = mid - 1; } + break; // Found the right slot but glyph wasn't in it; don't check other slots } // Fallback: hot group slot @@ -239,9 +243,15 @@ int32_t FontDecompressor::findGlyphIndex(const EpdFontData* fontData, uint32_t c } int FontDecompressor::prewarmCache(const EpdFontData* fontData, const char* utf8Text) { - freePageBuffer(); if (!fontData || !fontData->groups || !utf8Text) return 0; + // Allocate the next available slot (caller must call freePageBuffer/clearCache to reset) + if (pageSlotCount >= MAX_PAGE_SLOTS) { + LOG_ERR("FDC", "All %u page buffer slots full, cannot prewarm fontData=%p", MAX_PAGE_SLOTS, (void*)fontData); + return -1; + } + PageSlot& slot = pageSlots[pageSlotCount]; + // Step 1: Collect unique glyph indices needed for this page uint32_t neededGlyphs[MAX_PAGE_GLYPHS]; uint16_t glyphCount = 0; @@ -304,34 +314,37 @@ int FontDecompressor::prewarmCache(const EpdFontData* fontData, const char* utf8 stats.uniqueGroupsAccessed = groupCount; - // Step 3: Allocate page buffer and lookup table - pageBuffer = static_cast(malloc(totalBytes)); - pageGlyphs = static_cast(malloc(glyphCount * sizeof(PageGlyphEntry))); - if (!pageBuffer || !pageGlyphs) { + // Step 3: Allocate page buffer and lookup table for this slot + slot.buffer = static_cast(malloc(totalBytes)); + slot.glyphs = static_cast(malloc(glyphCount * sizeof(PageGlyphEntry))); + if (!slot.buffer || !slot.glyphs) { LOG_ERR("FDC", "Failed to allocate page buffer (%u bytes, %u glyphs)", totalBytes, glyphCount); - freePageBuffer(); + free(slot.buffer); + free(slot.glyphs); + slot = {}; return glyphCount; } - stats.pageBufferBytes = totalBytes; - stats.pageGlyphsBytes = glyphCount * sizeof(PageGlyphEntry); + stats.pageBufferBytes += totalBytes; + stats.pageGlyphsBytes += glyphCount * sizeof(PageGlyphEntry); - pageFont = fontData; - pageGlyphCount = glyphCount; + slot.fontData = fontData; + slot.glyphCount = glyphCount; + pageSlotCount++; // Initialize lookup entries (bufferOffset = UINT32_MAX means not yet extracted) for (uint16_t i = 0; i < glyphCount; i++) { - pageGlyphs[i] = {neededGlyphs[i], UINT32_MAX, 0}; + slot.glyphs[i] = {neededGlyphs[i], UINT32_MAX, 0}; } // Sort by glyphIndex for binary search in getBitmap() for (uint16_t i = 1; i < glyphCount; i++) { - PageGlyphEntry key = pageGlyphs[i]; + PageGlyphEntry key = slot.glyphs[i]; int j = i - 1; - while (j >= 0 && pageGlyphs[j].glyphIndex > key.glyphIndex) { - pageGlyphs[j + 1] = pageGlyphs[j]; + while (j >= 0 && slot.glyphs[j].glyphIndex > key.glyphIndex) { + slot.glyphs[j + 1] = slot.glyphs[j]; j--; } - pageGlyphs[j + 1] = key; + slot.glyphs[j + 1] = key; } // Step 3b: Pre-scan to compute each needed glyph's byte-aligned offset within its group. @@ -357,15 +370,15 @@ int FontDecompressor::prewarmCache(const EpdFontData* fontData, const char* utf8 const EpdGlyph& glyph = fontData->glyph[i]; - // Binary search in sorted pageGlyphs to find if glyph i is needed - int left = 0, right = (int)pageGlyphCount - 1; + // Binary search in sorted slot.glyphs to find if glyph i is needed + int left = 0, right = (int)slot.glyphCount - 1; while (left <= right) { const int mid = left + (right - left) / 2; - if (pageGlyphs[mid].glyphIndex == i) { - pageGlyphs[mid].alignedOffset = groupAlignedTracker[gpPos]; + if (slot.glyphs[mid].glyphIndex == i) { + slot.glyphs[mid].alignedOffset = groupAlignedTracker[gpPos]; break; } - if (pageGlyphs[mid].glyphIndex < i) + if (slot.glyphs[mid].glyphIndex < i) left = mid + 1; else right = mid - 1; @@ -384,14 +397,14 @@ int FontDecompressor::prewarmCache(const EpdFontData* fontData, const char* utf8 const uint32_t glyphI = group.firstGlyphIndex + j; const EpdGlyph& glyph = fontData->glyph[glyphI]; - int left = 0, right = (int)pageGlyphCount - 1; + int left = 0, right = (int)slot.glyphCount - 1; while (left <= right) { const int mid = left + (right - left) / 2; - if (pageGlyphs[mid].glyphIndex == glyphI) { - pageGlyphs[mid].alignedOffset = alignedOff; + if (slot.glyphs[mid].glyphIndex == glyphI) { + slot.glyphs[mid].alignedOffset = alignedOff; break; } - if (pageGlyphs[mid].glyphIndex < glyphI) + if (slot.glyphs[mid].glyphIndex < glyphI) left = mid + 1; else right = mid - 1; @@ -430,13 +443,13 @@ int FontDecompressor::prewarmCache(const EpdFontData* fontData, const char* utf8 // Extract needed glyphs directly from the byte-aligned temp buffer, compacting on the fly. // alignedOffset was pre-computed in step 3b — no full-group compact scan needed. - for (uint16_t i = 0; i < pageGlyphCount; i++) { - if (pageGlyphs[i].bufferOffset != UINT32_MAX) continue; // already extracted - if (getGroupIndex(fontData, pageGlyphs[i].glyphIndex) != groupIdx) continue; + for (uint16_t i = 0; i < slot.glyphCount; i++) { + if (slot.glyphs[i].bufferOffset != UINT32_MAX) continue; // already extracted + if (getGroupIndex(fontData, slot.glyphs[i].glyphIndex) != groupIdx) continue; - const EpdGlyph& glyph = fontData->glyph[pageGlyphs[i].glyphIndex]; - compactSingleGlyph(&tempBuf[pageGlyphs[i].alignedOffset], &pageBuffer[writeOffset], glyph.width, glyph.height); - pageGlyphs[i].bufferOffset = writeOffset; + const EpdGlyph& glyph = fontData->glyph[slot.glyphs[i].glyphIndex]; + compactSingleGlyph(&tempBuf[slot.glyphs[i].alignedOffset], &slot.buffer[writeOffset], glyph.width, glyph.height); + slot.glyphs[i].bufferOffset = writeOffset; writeOffset += glyph.dataLength; } diff --git a/lib/EpdFont/FontDecompressor.h b/lib/EpdFont/FontDecompressor.h index 7c70cc6e..54e75a86 100644 --- a/lib/EpdFont/FontDecompressor.h +++ b/lib/EpdFont/FontDecompressor.h @@ -9,6 +9,7 @@ class FontDecompressor { public: static constexpr uint16_t MAX_PAGE_GLYPHS = 512; + static constexpr uint8_t MAX_PAGE_SLOTS = 4; // One per font style (R/B/I/BI) FontDecompressor() = default; ~FontDecompressor(); @@ -48,16 +49,21 @@ class FontDecompressor { Stats stats; InflateReader inflateReader; - // Page buffer: flat array of prewarmed glyph bitmaps with sorted lookup + // Page buffer slots: each style gets its own flat glyph buffer with sorted lookup. + // Up to MAX_PAGE_SLOTS (4) styles can be prewarmed simultaneously. struct PageGlyphEntry { uint32_t glyphIndex; uint32_t bufferOffset; uint32_t alignedOffset; // byte-aligned offset within its decompressed group (set during prewarm pre-scan) }; - uint8_t* pageBuffer = nullptr; - const EpdFontData* pageFont = nullptr; - PageGlyphEntry* pageGlyphs = nullptr; - uint16_t pageGlyphCount = 0; + struct PageSlot { + uint8_t* buffer = nullptr; + const EpdFontData* fontData = nullptr; + PageGlyphEntry* glyphs = nullptr; + uint16_t glyphCount = 0; + }; + PageSlot pageSlots[MAX_PAGE_SLOTS] = {}; + uint8_t pageSlotCount = 0; // Hot group: last decompressed group (byte-aligned) for non-prewarmed fallback path. // Kept in byte-aligned format; individual glyphs are compacted on demand into hotGlyphBuf.