fix: Fix prewarm perf when a page contains many styles (#1451)

## Summary

**What is the goal of this PR?** (e.g., Implements the new feature for
file uploading.)

Fix prewarm perf when a page contains many styles.

The prewarm page buffer was a single slot, so each `prewarmCache` call
for a new font style freed the previous style's glyphs. On pages with
multiple styles (regular + bold + italic), only the last style was
prewarmed. The others fell through to the hot-group compaction path at
~2-3ms per glyph.

This was most visible on rich formatting (e.g. this [Czech prayer
book](https://stahuj.kancional.cz/e-kniha/kancional.epub) with bold
headings, italic liturgical text, and regular body), where page renders
took 3-5 seconds instead of ~700ms.

Fix: use up to 4 page buffer slots (one per font style) so all styles
stay prewarmed simultaneously.

Fixes #1450.

---

### AI Usage

While CrossPoint doesn't have restrictions on AI tools in contributing,
please be transparent about their usage as it
helps set the right context for reviewers.

Did you use AI tools to help write this code? PARTIALLY: to diagnose and
brainstorm solutions.
This commit is contained in:
Adrian Wilkins-Caruana
2026-03-22 04:10:41 +11:00
committed by GitHub
parent 53beeeed2b
commit 0c9e8b3ece
2 changed files with 67 additions and 48 deletions

View File

@@ -24,12 +24,12 @@ void FontDecompressor::clearCache() {
} }
void FontDecompressor::freePageBuffer() { void FontDecompressor::freePageBuffer() {
free(pageBuffer); for (uint8_t s = 0; s < pageSlotCount; s++) {
pageBuffer = nullptr; free(pageSlots[s].buffer);
free(pageGlyphs); free(pageSlots[s].glyphs);
pageGlyphs = nullptr; pageSlots[s] = {};
pageFont = nullptr; }
pageGlyphCount = 0; pageSlotCount = 0;
} }
void FontDecompressor::freeHotGroup() { void FontDecompressor::freeHotGroup() {
@@ -137,24 +137,28 @@ const uint8_t* FontDecompressor::getBitmap(const EpdFontData* fontData, const Ep
return &fontData->bitmap[glyph->dataOffset]; return &fontData->bitmap[glyph->dataOffset];
} }
// Check page buffer first (populated by prewarmCache) // Check page buffer slots (populated by prewarmCache — one slot per font style)
if (pageBuffer && pageFont == fontData && pageGlyphCount > 0) { for (uint8_t s = 0; s < pageSlotCount; s++) {
int left = 0, right = pageGlyphCount - 1; const auto& slot = pageSlots[s];
if (slot.fontData != fontData || slot.glyphCount == 0) continue;
int left = 0, right = slot.glyphCount - 1;
while (left <= right) { while (left <= right) {
int mid = left + (right - left) / 2; int mid = left + (right - left) / 2;
if (pageGlyphs[mid].glyphIndex == glyphIndex) { if (slot.glyphs[mid].glyphIndex == glyphIndex) {
if (pageGlyphs[mid].bufferOffset != UINT32_MAX) { if (slot.glyphs[mid].bufferOffset != UINT32_MAX) {
stats.cacheHits++; stats.cacheHits++;
stats.getBitmapTimeUs += micros() - tStart; stats.getBitmapTimeUs += micros() - tStart;
return &pageBuffer[pageGlyphs[mid].bufferOffset]; return &slot.buffer[slot.glyphs[mid].bufferOffset];
} }
break; // Not extracted during prewarm; fall through to hot-group path break; // Not extracted during prewarm; fall through to hot-group path
} }
if (pageGlyphs[mid].glyphIndex < glyphIndex) if (slot.glyphs[mid].glyphIndex < glyphIndex)
left = mid + 1; left = mid + 1;
else else
right = mid - 1; right = mid - 1;
} }
break; // Found the right slot but glyph wasn't in it; don't check other slots
} }
// Fallback: hot group slot // Fallback: hot group slot
@@ -239,9 +243,15 @@ int32_t FontDecompressor::findGlyphIndex(const EpdFontData* fontData, uint32_t c
} }
int FontDecompressor::prewarmCache(const EpdFontData* fontData, const char* utf8Text) { int FontDecompressor::prewarmCache(const EpdFontData* fontData, const char* utf8Text) {
freePageBuffer();
if (!fontData || !fontData->groups || !utf8Text) return 0; if (!fontData || !fontData->groups || !utf8Text) return 0;
// Allocate the next available slot (caller must call freePageBuffer/clearCache to reset)
if (pageSlotCount >= MAX_PAGE_SLOTS) {
LOG_ERR("FDC", "All %u page buffer slots full, cannot prewarm fontData=%p", MAX_PAGE_SLOTS, (void*)fontData);
return -1;
}
PageSlot& slot = pageSlots[pageSlotCount];
// Step 1: Collect unique glyph indices needed for this page // Step 1: Collect unique glyph indices needed for this page
uint32_t neededGlyphs[MAX_PAGE_GLYPHS]; uint32_t neededGlyphs[MAX_PAGE_GLYPHS];
uint16_t glyphCount = 0; uint16_t glyphCount = 0;
@@ -304,34 +314,37 @@ int FontDecompressor::prewarmCache(const EpdFontData* fontData, const char* utf8
stats.uniqueGroupsAccessed = groupCount; stats.uniqueGroupsAccessed = groupCount;
// Step 3: Allocate page buffer and lookup table // Step 3: Allocate page buffer and lookup table for this slot
pageBuffer = static_cast<uint8_t*>(malloc(totalBytes)); slot.buffer = static_cast<uint8_t*>(malloc(totalBytes));
pageGlyphs = static_cast<PageGlyphEntry*>(malloc(glyphCount * sizeof(PageGlyphEntry))); slot.glyphs = static_cast<PageGlyphEntry*>(malloc(glyphCount * sizeof(PageGlyphEntry)));
if (!pageBuffer || !pageGlyphs) { if (!slot.buffer || !slot.glyphs) {
LOG_ERR("FDC", "Failed to allocate page buffer (%u bytes, %u glyphs)", totalBytes, glyphCount); LOG_ERR("FDC", "Failed to allocate page buffer (%u bytes, %u glyphs)", totalBytes, glyphCount);
freePageBuffer(); free(slot.buffer);
free(slot.glyphs);
slot = {};
return glyphCount; return glyphCount;
} }
stats.pageBufferBytes = totalBytes; stats.pageBufferBytes += totalBytes;
stats.pageGlyphsBytes = glyphCount * sizeof(PageGlyphEntry); stats.pageGlyphsBytes += glyphCount * sizeof(PageGlyphEntry);
pageFont = fontData; slot.fontData = fontData;
pageGlyphCount = glyphCount; slot.glyphCount = glyphCount;
pageSlotCount++;
// Initialize lookup entries (bufferOffset = UINT32_MAX means not yet extracted) // Initialize lookup entries (bufferOffset = UINT32_MAX means not yet extracted)
for (uint16_t i = 0; i < glyphCount; i++) { for (uint16_t i = 0; i < glyphCount; i++) {
pageGlyphs[i] = {neededGlyphs[i], UINT32_MAX, 0}; slot.glyphs[i] = {neededGlyphs[i], UINT32_MAX, 0};
} }
// Sort by glyphIndex for binary search in getBitmap() // Sort by glyphIndex for binary search in getBitmap()
for (uint16_t i = 1; i < glyphCount; i++) { for (uint16_t i = 1; i < glyphCount; i++) {
PageGlyphEntry key = pageGlyphs[i]; PageGlyphEntry key = slot.glyphs[i];
int j = i - 1; int j = i - 1;
while (j >= 0 && pageGlyphs[j].glyphIndex > key.glyphIndex) { while (j >= 0 && slot.glyphs[j].glyphIndex > key.glyphIndex) {
pageGlyphs[j + 1] = pageGlyphs[j]; slot.glyphs[j + 1] = slot.glyphs[j];
j--; j--;
} }
pageGlyphs[j + 1] = key; slot.glyphs[j + 1] = key;
} }
// Step 3b: Pre-scan to compute each needed glyph's byte-aligned offset within its group. // Step 3b: Pre-scan to compute each needed glyph's byte-aligned offset within its group.
@@ -357,15 +370,15 @@ int FontDecompressor::prewarmCache(const EpdFontData* fontData, const char* utf8
const EpdGlyph& glyph = fontData->glyph[i]; const EpdGlyph& glyph = fontData->glyph[i];
// Binary search in sorted pageGlyphs to find if glyph i is needed // Binary search in sorted slot.glyphs to find if glyph i is needed
int left = 0, right = (int)pageGlyphCount - 1; int left = 0, right = (int)slot.glyphCount - 1;
while (left <= right) { while (left <= right) {
const int mid = left + (right - left) / 2; const int mid = left + (right - left) / 2;
if (pageGlyphs[mid].glyphIndex == i) { if (slot.glyphs[mid].glyphIndex == i) {
pageGlyphs[mid].alignedOffset = groupAlignedTracker[gpPos]; slot.glyphs[mid].alignedOffset = groupAlignedTracker[gpPos];
break; break;
} }
if (pageGlyphs[mid].glyphIndex < i) if (slot.glyphs[mid].glyphIndex < i)
left = mid + 1; left = mid + 1;
else else
right = mid - 1; right = mid - 1;
@@ -384,14 +397,14 @@ int FontDecompressor::prewarmCache(const EpdFontData* fontData, const char* utf8
const uint32_t glyphI = group.firstGlyphIndex + j; const uint32_t glyphI = group.firstGlyphIndex + j;
const EpdGlyph& glyph = fontData->glyph[glyphI]; const EpdGlyph& glyph = fontData->glyph[glyphI];
int left = 0, right = (int)pageGlyphCount - 1; int left = 0, right = (int)slot.glyphCount - 1;
while (left <= right) { while (left <= right) {
const int mid = left + (right - left) / 2; const int mid = left + (right - left) / 2;
if (pageGlyphs[mid].glyphIndex == glyphI) { if (slot.glyphs[mid].glyphIndex == glyphI) {
pageGlyphs[mid].alignedOffset = alignedOff; slot.glyphs[mid].alignedOffset = alignedOff;
break; break;
} }
if (pageGlyphs[mid].glyphIndex < glyphI) if (slot.glyphs[mid].glyphIndex < glyphI)
left = mid + 1; left = mid + 1;
else else
right = mid - 1; right = mid - 1;
@@ -430,13 +443,13 @@ int FontDecompressor::prewarmCache(const EpdFontData* fontData, const char* utf8
// Extract needed glyphs directly from the byte-aligned temp buffer, compacting on the fly. // Extract needed glyphs directly from the byte-aligned temp buffer, compacting on the fly.
// alignedOffset was pre-computed in step 3b — no full-group compact scan needed. // alignedOffset was pre-computed in step 3b — no full-group compact scan needed.
for (uint16_t i = 0; i < pageGlyphCount; i++) { for (uint16_t i = 0; i < slot.glyphCount; i++) {
if (pageGlyphs[i].bufferOffset != UINT32_MAX) continue; // already extracted if (slot.glyphs[i].bufferOffset != UINT32_MAX) continue; // already extracted
if (getGroupIndex(fontData, pageGlyphs[i].glyphIndex) != groupIdx) continue; if (getGroupIndex(fontData, slot.glyphs[i].glyphIndex) != groupIdx) continue;
const EpdGlyph& glyph = fontData->glyph[pageGlyphs[i].glyphIndex]; const EpdGlyph& glyph = fontData->glyph[slot.glyphs[i].glyphIndex];
compactSingleGlyph(&tempBuf[pageGlyphs[i].alignedOffset], &pageBuffer[writeOffset], glyph.width, glyph.height); compactSingleGlyph(&tempBuf[slot.glyphs[i].alignedOffset], &slot.buffer[writeOffset], glyph.width, glyph.height);
pageGlyphs[i].bufferOffset = writeOffset; slot.glyphs[i].bufferOffset = writeOffset;
writeOffset += glyph.dataLength; writeOffset += glyph.dataLength;
} }

View File

@@ -9,6 +9,7 @@
class FontDecompressor { class FontDecompressor {
public: public:
static constexpr uint16_t MAX_PAGE_GLYPHS = 512; static constexpr uint16_t MAX_PAGE_GLYPHS = 512;
static constexpr uint8_t MAX_PAGE_SLOTS = 4; // One per font style (R/B/I/BI)
FontDecompressor() = default; FontDecompressor() = default;
~FontDecompressor(); ~FontDecompressor();
@@ -48,16 +49,21 @@ class FontDecompressor {
Stats stats; Stats stats;
InflateReader inflateReader; InflateReader inflateReader;
// Page buffer: flat array of prewarmed glyph bitmaps with sorted lookup // Page buffer slots: each style gets its own flat glyph buffer with sorted lookup.
// Up to MAX_PAGE_SLOTS (4) styles can be prewarmed simultaneously.
struct PageGlyphEntry { struct PageGlyphEntry {
uint32_t glyphIndex; uint32_t glyphIndex;
uint32_t bufferOffset; uint32_t bufferOffset;
uint32_t alignedOffset; // byte-aligned offset within its decompressed group (set during prewarm pre-scan) uint32_t alignedOffset; // byte-aligned offset within its decompressed group (set during prewarm pre-scan)
}; };
uint8_t* pageBuffer = nullptr; struct PageSlot {
const EpdFontData* pageFont = nullptr; uint8_t* buffer = nullptr;
PageGlyphEntry* pageGlyphs = nullptr; const EpdFontData* fontData = nullptr;
uint16_t pageGlyphCount = 0; PageGlyphEntry* glyphs = nullptr;
uint16_t glyphCount = 0;
};
PageSlot pageSlots[MAX_PAGE_SLOTS] = {};
uint8_t pageSlotCount = 0;
// Hot group: last decompressed group (byte-aligned) for non-prewarmed fallback path. // Hot group: last decompressed group (byte-aligned) for non-prewarmed fallback path.
// Kept in byte-aligned format; individual glyphs are compacted on demand into hotGlyphBuf. // Kept in byte-aligned format; individual glyphs are compacted on demand into hotGlyphBuf.