diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index bc5ff52..517d59d 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -48,12 +48,24 @@ bool BookMetadataCache::beginTocPass() { spineFile.close(); return false; } + + // Build href->spineIndex lookup map for O(1) access during TOC creation + hrefToSpineIndex.clear(); + hrefToSpineIndex.reserve(spineCount); + spineFile.seek(0); + for (int i = 0; i < spineCount; i++) { + auto entry = readSpineEntry(spineFile); + hrefToSpineIndex[entry.href] = static_cast(i); + } + spineFile.seek(0); + return true; } bool BookMetadataCache::endTocPass() { tocFile.close(); spineFile.close(); + hrefToSpineIndex.clear(); return true; } @@ -134,6 +146,18 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta // LUTs complete // Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin + // Build spineIndex->tocIndex mapping in one pass (O(n) instead of O(n*m)) + std::vector spineToTocIndex(spineCount, -1); + tocFile.seek(0); + for (int j = 0; j < tocCount; j++) { + auto tocEntry = readTocEntry(tocFile); + if (tocEntry.spineIndex >= 0 && tocEntry.spineIndex < spineCount) { + if (spineToTocIndex[tocEntry.spineIndex] == -1) { + spineToTocIndex[tocEntry.spineIndex] = static_cast(j); + } + } + } + ZipFile zip(epubPath); // Pre-open zip file to speed up size calculations if (!zip.open()) { @@ -155,14 +179,7 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta for (int i = 0; i < spineCount; i++) { auto spineEntry = readSpineEntry(spineFile); - tocFile.seek(0); - for (int j = 0; j < tocCount; j++) { - auto tocEntry = readTocEntry(tocFile); - if (tocEntry.spineIndex == i) { - spineEntry.tocIndex = j; - break; - } - } + spineEntry.tocIndex = spineToTocIndex[i]; // Not a huge deal if we don't fine a TOC entry for the spine entry, this is expected behaviour for EPUBs // Logging here is for debugging @@ -253,20 +270,11 @@ void BookMetadataCache::createTocEntry(const std::string& title, const std::stri return; } - int spineIndex = -1; - // find spine index - // TODO: This lookup is slow as need to scan through all items each time. We can't hold it all in memory due to size. - // But perhaps we can load just the hrefs in a vector/list to do an index lookup? - spineFile.seek(0); - for (int i = 0; i < spineCount; i++) { - auto spineEntry = readSpineEntry(spineFile); - if (spineEntry.href == href) { - spineIndex = i; - break; - } - } - - if (spineIndex == -1) { + int16_t spineIndex = -1; + auto it = hrefToSpineIndex.find(href); + if (it != hrefToSpineIndex.end()) { + spineIndex = it->second; + } else { Serial.printf("[%lu] [BMC] addTocEntry: Could not find spine item for TOC href %s\n", millis(), href.c_str()); } diff --git a/lib/Epub/Epub/BookMetadataCache.h b/lib/Epub/Epub/BookMetadataCache.h index c7e9590..e0efc36 100644 --- a/lib/Epub/Epub/BookMetadataCache.h +++ b/lib/Epub/Epub/BookMetadataCache.h @@ -3,6 +3,7 @@ #include #include +#include #include class BookMetadataCache { @@ -54,6 +55,8 @@ class BookMetadataCache { // Temp file handles during build FsFile spineFile; FsFile tocFile; + // Lookup cache for O(1) href->spineIndex during TOC pass + std::unordered_map hrefToSpineIndex; uint32_t writeSpineEntry(FsFile& file, const SpineEntry& entry) const; uint32_t writeTocEntry(FsFile& file, const TocEntry& entry) const;