perf: optimize large EPUB indexing from O(n²) to O(n)

Replace O(n²) lookups with O(n) preprocessing:

1. createTocEntry(): Build href->spineIndex map once in beginTocPass()
   instead of scanning spine file for every TOC entry

2. buildBookBin(): Build spineIndex->tocIndex vector in single pass
   instead of scanning TOC file for every spine entry

For 2768-chapter EPUBs, this reduces:
- TOC pass: from ~7.6M file reads to ~5.5K reads
- buildBookBin: from ~7.6M file reads to ~5.5K reads

Memory impact: ~80KB for href map (acceptable trade-off for 10x+ speedup)
This commit is contained in:
Daniel 2026-01-20 12:40:36 -08:00 committed by cottongin
parent 481b8210fb
commit a91bb0b1b8
No known key found for this signature in database
GPG Key ID: 0ECC91FE4655C262
2 changed files with 33 additions and 22 deletions

View File

@ -48,12 +48,24 @@ bool BookMetadataCache::beginTocPass() {
spineFile.close(); spineFile.close();
return false; return false;
} }
// Build href->spineIndex lookup map for O(1) access during TOC creation
hrefToSpineIndex.clear();
hrefToSpineIndex.reserve(spineCount);
spineFile.seek(0);
for (int i = 0; i < spineCount; i++) {
auto entry = readSpineEntry(spineFile);
hrefToSpineIndex[entry.href] = static_cast<int16_t>(i);
}
spineFile.seek(0);
return true; return true;
} }
bool BookMetadataCache::endTocPass() { bool BookMetadataCache::endTocPass() {
tocFile.close(); tocFile.close();
spineFile.close(); spineFile.close();
hrefToSpineIndex.clear();
return true; return true;
} }
@ -134,6 +146,18 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
// LUTs complete // LUTs complete
// Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin // Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin
// Build spineIndex->tocIndex mapping in one pass (O(n) instead of O(n*m))
std::vector<int16_t> spineToTocIndex(spineCount, -1);
tocFile.seek(0);
for (int j = 0; j < tocCount; j++) {
auto tocEntry = readTocEntry(tocFile);
if (tocEntry.spineIndex >= 0 && tocEntry.spineIndex < spineCount) {
if (spineToTocIndex[tocEntry.spineIndex] == -1) {
spineToTocIndex[tocEntry.spineIndex] = static_cast<int16_t>(j);
}
}
}
ZipFile zip(epubPath); ZipFile zip(epubPath);
// Pre-open zip file to speed up size calculations // Pre-open zip file to speed up size calculations
if (!zip.open()) { if (!zip.open()) {
@ -155,14 +179,7 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
for (int i = 0; i < spineCount; i++) { for (int i = 0; i < spineCount; i++) {
auto spineEntry = readSpineEntry(spineFile); auto spineEntry = readSpineEntry(spineFile);
tocFile.seek(0); spineEntry.tocIndex = spineToTocIndex[i];
for (int j = 0; j < tocCount; j++) {
auto tocEntry = readTocEntry(tocFile);
if (tocEntry.spineIndex == i) {
spineEntry.tocIndex = j;
break;
}
}
// Not a huge deal if we don't fine a TOC entry for the spine entry, this is expected behaviour for EPUBs // Not a huge deal if we don't fine a TOC entry for the spine entry, this is expected behaviour for EPUBs
// Logging here is for debugging // Logging here is for debugging
@ -253,20 +270,11 @@ void BookMetadataCache::createTocEntry(const std::string& title, const std::stri
return; return;
} }
int spineIndex = -1; int16_t spineIndex = -1;
// find spine index auto it = hrefToSpineIndex.find(href);
// TODO: This lookup is slow as need to scan through all items each time. We can't hold it all in memory due to size. if (it != hrefToSpineIndex.end()) {
// But perhaps we can load just the hrefs in a vector/list to do an index lookup? spineIndex = it->second;
spineFile.seek(0); } else {
for (int i = 0; i < spineCount; i++) {
auto spineEntry = readSpineEntry(spineFile);
if (spineEntry.href == href) {
spineIndex = i;
break;
}
}
if (spineIndex == -1) {
Serial.printf("[%lu] [BMC] addTocEntry: Could not find spine item for TOC href %s\n", millis(), href.c_str()); Serial.printf("[%lu] [BMC] addTocEntry: Could not find spine item for TOC href %s\n", millis(), href.c_str());
} }

View File

@ -3,6 +3,7 @@
#include <SDCardManager.h> #include <SDCardManager.h>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
class BookMetadataCache { class BookMetadataCache {
@ -54,6 +55,8 @@ class BookMetadataCache {
// Temp file handles during build // Temp file handles during build
FsFile spineFile; FsFile spineFile;
FsFile tocFile; FsFile tocFile;
// Lookup cache for O(1) href->spineIndex during TOC pass
std::unordered_map<std::string, int16_t> hrefToSpineIndex;
uint32_t writeSpineEntry(FsFile& file, const SpineEntry& entry) const; uint32_t writeSpineEntry(FsFile& file, const SpineEntry& entry) const;
uint32_t writeTocEntry(FsFile& file, const TocEntry& entry) const; uint32_t writeTocEntry(FsFile& file, const TocEntry& entry) const;