From a91bb0b1b8f204cbcfe6b9857f4d3a567f190ff1 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 20 Jan 2026 12:40:36 -0800 Subject: [PATCH] =?UTF-8?q?perf:=20optimize=20large=20EPUB=20indexing=20fr?= =?UTF-8?q?om=20O(n=C2=B2)=20to=20O(n)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace O(n²) lookups with O(n) preprocessing: 1. createTocEntry(): Build href->spineIndex map once in beginTocPass() instead of scanning spine file for every TOC entry 2. buildBookBin(): Build spineIndex->tocIndex vector in single pass instead of scanning TOC file for every spine entry For 2768-chapter EPUBs, this reduces: - TOC pass: from ~7.6M file reads to ~5.5K reads - buildBookBin: from ~7.6M file reads to ~5.5K reads Memory impact: ~80KB for href map (acceptable trade-off for 10x+ speedup) --- lib/Epub/Epub/BookMetadataCache.cpp | 52 +++++++++++++++++------------ lib/Epub/Epub/BookMetadataCache.h | 3 ++ 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index bc5ff52..517d59d 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -48,12 +48,24 @@ bool BookMetadataCache::beginTocPass() { spineFile.close(); return false; } + + // Build href->spineIndex lookup map for O(1) access during TOC creation + hrefToSpineIndex.clear(); + hrefToSpineIndex.reserve(spineCount); + spineFile.seek(0); + for (int i = 0; i < spineCount; i++) { + auto entry = readSpineEntry(spineFile); + hrefToSpineIndex[entry.href] = static_cast(i); + } + spineFile.seek(0); + return true; } bool BookMetadataCache::endTocPass() { tocFile.close(); spineFile.close(); + hrefToSpineIndex.clear(); return true; } @@ -134,6 +146,18 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta // LUTs complete // Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin + // Build spineIndex->tocIndex mapping in one pass (O(n) instead of O(n*m)) + std::vector spineToTocIndex(spineCount, -1); + tocFile.seek(0); + for (int j = 0; j < tocCount; j++) { + auto tocEntry = readTocEntry(tocFile); + if (tocEntry.spineIndex >= 0 && tocEntry.spineIndex < spineCount) { + if (spineToTocIndex[tocEntry.spineIndex] == -1) { + spineToTocIndex[tocEntry.spineIndex] = static_cast(j); + } + } + } + ZipFile zip(epubPath); // Pre-open zip file to speed up size calculations if (!zip.open()) { @@ -155,14 +179,7 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta for (int i = 0; i < spineCount; i++) { auto spineEntry = readSpineEntry(spineFile); - tocFile.seek(0); - for (int j = 0; j < tocCount; j++) { - auto tocEntry = readTocEntry(tocFile); - if (tocEntry.spineIndex == i) { - spineEntry.tocIndex = j; - break; - } - } + spineEntry.tocIndex = spineToTocIndex[i]; // Not a huge deal if we don't fine a TOC entry for the spine entry, this is expected behaviour for EPUBs // Logging here is for debugging @@ -253,20 +270,11 @@ void BookMetadataCache::createTocEntry(const std::string& title, const std::stri return; } - int spineIndex = -1; - // find spine index - // TODO: This lookup is slow as need to scan through all items each time. We can't hold it all in memory due to size. - // But perhaps we can load just the hrefs in a vector/list to do an index lookup? - spineFile.seek(0); - for (int i = 0; i < spineCount; i++) { - auto spineEntry = readSpineEntry(spineFile); - if (spineEntry.href == href) { - spineIndex = i; - break; - } - } - - if (spineIndex == -1) { + int16_t spineIndex = -1; + auto it = hrefToSpineIndex.find(href); + if (it != hrefToSpineIndex.end()) { + spineIndex = it->second; + } else { Serial.printf("[%lu] [BMC] addTocEntry: Could not find spine item for TOC href %s\n", millis(), href.c_str()); } diff --git a/lib/Epub/Epub/BookMetadataCache.h b/lib/Epub/Epub/BookMetadataCache.h index c7e9590..e0efc36 100644 --- a/lib/Epub/Epub/BookMetadataCache.h +++ b/lib/Epub/Epub/BookMetadataCache.h @@ -3,6 +3,7 @@ #include #include +#include #include class BookMetadataCache { @@ -54,6 +55,8 @@ class BookMetadataCache { // Temp file handles during build FsFile spineFile; FsFile tocFile; + // Lookup cache for O(1) href->spineIndex during TOC pass + std::unordered_map hrefToSpineIndex; uint32_t writeSpineEntry(FsFile& file, const SpineEntry& entry) const; uint32_t writeTocEntry(FsFile& file, const TocEntry& entry) const;