diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index 33f920b..7559e3b 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -226,6 +226,8 @@ bool Epub::load(const bool buildIfMissing) { Serial.printf("[%lu] [EBP] Cache not found, building spine/TOC cache\n", millis()); setupCacheDir(); + const uint32_t indexingStart = millis(); + // Begin building cache - stream entries to disk immediately if (!bookMetadataCache->beginWrite()) { Serial.printf("[%lu] [EBP] Could not begin writing cache\n", millis()); @@ -233,6 +235,7 @@ bool Epub::load(const bool buildIfMissing) { } // OPF Pass + const uint32_t opfStart = millis(); BookMetadataCache::BookMetadata bookMetadata; if (!bookMetadataCache->beginContentOpfPass()) { Serial.printf("[%lu] [EBP] Could not begin writing content.opf pass\n", millis()); @@ -246,8 +249,10 @@ bool Epub::load(const bool buildIfMissing) { Serial.printf("[%lu] [EBP] Could not end writing content.opf pass\n", millis()); return false; } + Serial.printf("[%lu] [EBP] OPF pass completed in %lu ms\n", millis(), millis() - opfStart); // TOC Pass - try EPUB 3 nav first, fall back to NCX + const uint32_t tocStart = millis(); if (!bookMetadataCache->beginTocPass()) { Serial.printf("[%lu] [EBP] Could not begin writing toc pass\n", millis()); return false; @@ -276,6 +281,7 @@ bool Epub::load(const bool buildIfMissing) { Serial.printf("[%lu] [EBP] Could not end writing toc pass\n", millis()); return false; } + Serial.printf("[%lu] [EBP] TOC pass completed in %lu ms\n", millis(), millis() - tocStart); // Close the cache files if (!bookMetadataCache->endWrite()) { @@ -284,10 +290,13 @@ bool Epub::load(const bool buildIfMissing) { } // Build final book.bin + const uint32_t buildStart = millis(); if (!bookMetadataCache->buildBookBin(filepath, bookMetadata)) { Serial.printf("[%lu] [EBP] Could not update mappings and sizes\n", millis()); return false; } + Serial.printf("[%lu] [EBP] buildBookBin completed in %lu ms\n", millis(), millis() - buildStart); + Serial.printf("[%lu] [EBP] Total indexing completed in %lu ms\n", millis(), millis() - indexingStart); if (!bookMetadataCache->cleanupTmpFiles()) { Serial.printf("[%lu] [EBP] Could not cleanup tmp files - ignoring\n", millis()); diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index 374cad2..e724213 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -40,7 +40,6 @@ bool BookMetadataCache::endContentOpfPass() { bool BookMetadataCache::beginTocPass() { Serial.printf("[%lu] [BMC] Beginning toc pass\n", millis()); - // Open spine file for reading if (!SdMan.openFileForRead("BMC", cachePath + tmpSpineBinFile, spineFile)) { return false; } @@ -48,12 +47,41 @@ bool BookMetadataCache::beginTocPass() { spineFile.close(); return false; } + + if (spineCount >= LARGE_SPINE_THRESHOLD) { + spineHrefIndex.clear(); + spineHrefIndex.reserve(spineCount); + spineFile.seek(0); + for (int i = 0; i < spineCount; i++) { + auto entry = readSpineEntry(spineFile); + SpineHrefIndexEntry idx; + idx.hrefHash = fnvHash64(entry.href); + idx.hrefLen = static_cast(entry.href.size()); + idx.spineIndex = static_cast(i); + spineHrefIndex.push_back(idx); + } + std::sort(spineHrefIndex.begin(), spineHrefIndex.end(), + [](const SpineHrefIndexEntry& a, const SpineHrefIndexEntry& b) { + return a.hrefHash < b.hrefHash || (a.hrefHash == b.hrefHash && a.hrefLen < b.hrefLen); + }); + spineFile.seek(0); + useSpineHrefIndex = true; + Serial.printf("[%lu] [BMC] Using fast index for %d spine items\n", millis(), spineCount); + } else { + useSpineHrefIndex = false; + } + return true; } bool BookMetadataCache::endTocPass() { tocFile.close(); spineFile.close(); + + spineHrefIndex.clear(); + spineHrefIndex.shrink_to_fit(); + useSpineHrefIndex = false; + return true; } @@ -124,6 +152,18 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta // LUTs complete // Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin + // Build spineIndex->tocIndex mapping in one pass (O(n) instead of O(n*m)) + std::vector spineToTocIndex(spineCount, -1); + tocFile.seek(0); + for (int j = 0; j < tocCount; j++) { + auto tocEntry = readTocEntry(tocFile); + if (tocEntry.spineIndex >= 0 && tocEntry.spineIndex < spineCount) { + if (spineToTocIndex[tocEntry.spineIndex] == -1) { + spineToTocIndex[tocEntry.spineIndex] = static_cast(j); + } + } + } + ZipFile zip(epubPath); // Pre-open zip file to speed up size calculations if (!zip.open()) { @@ -133,31 +173,56 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta tocFile.close(); return false; } - // TODO: For large ZIPs loading the all localHeaderOffsets will crash. - // However not having them loaded is extremely slow. Need a better solution here. - // Perhaps only a cache of spine items or a better way to speedup lookups? - if (!zip.loadAllFileStatSlims()) { - Serial.printf("[%lu] [BMC] Could not load zip local header offsets for size calculations\n", millis()); - bookFile.close(); - spineFile.close(); - tocFile.close(); - zip.close(); - return false; + // NOTE: We intentionally skip calling loadAllFileStatSlims() here. + // For large EPUBs (2000+ chapters), pre-loading all ZIP central directory entries + // into memory causes OOM crashes on ESP32-C3's limited ~380KB RAM. + // Instead, for large books we use a one-pass batch lookup that scans the ZIP + // central directory once and matches against spine targets using hash comparison. + // This is O(n*log(m)) instead of O(n*m) while avoiding memory exhaustion. + // See: https://github.com/crosspoint-reader/crosspoint-reader/issues/134 + + std::vector spineSizes; + bool useBatchSizes = false; + + if (spineCount >= LARGE_SPINE_THRESHOLD) { + Serial.printf("[%lu] [BMC] Using batch size lookup for %d spine items\n", millis(), spineCount); + + std::vector targets; + targets.reserve(spineCount); + + spineFile.seek(0); + for (int i = 0; i < spineCount; i++) { + auto entry = readSpineEntry(spineFile); + std::string path = FsHelpers::normalisePath(entry.href); + + ZipFile::SizeTarget t; + t.hash = ZipFile::fnvHash64(path.c_str(), path.size()); + t.len = static_cast(path.size()); + t.index = static_cast(i); + targets.push_back(t); + } + + std::sort(targets.begin(), targets.end(), [](const ZipFile::SizeTarget& a, const ZipFile::SizeTarget& b) { + return a.hash < b.hash || (a.hash == b.hash && a.len < b.len); + }); + + spineSizes.resize(spineCount, 0); + int matched = zip.fillUncompressedSizes(targets, spineSizes); + Serial.printf("[%lu] [BMC] Batch lookup matched %d/%d spine items\n", millis(), matched, spineCount); + + targets.clear(); + targets.shrink_to_fit(); + + useBatchSizes = true; } + uint32_t cumSize = 0; spineFile.seek(0); int lastSpineTocIndex = -1; for (int i = 0; i < spineCount; i++) { auto spineEntry = readSpineEntry(spineFile); - tocFile.seek(0); - for (int j = 0; j < tocCount; j++) { - auto tocEntry = readTocEntry(tocFile); - if (tocEntry.spineIndex == i) { - spineEntry.tocIndex = j; - break; - } - } + spineEntry.tocIndex = spineToTocIndex[i]; // Not a huge deal if we don't fine a TOC entry for the spine entry, this is expected behaviour for EPUBs // Logging here is for debugging @@ -169,16 +234,25 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta } lastSpineTocIndex = spineEntry.tocIndex; - // Calculate size for cumulative size size_t itemSize = 0; - const std::string path = FsHelpers::normalisePath(spineEntry.href); - if (zip.getInflatedFileSize(path.c_str(), &itemSize)) { - cumSize += itemSize; - spineEntry.cumulativeSize = cumSize; + if (useBatchSizes) { + itemSize = spineSizes[i]; + if (itemSize == 0) { + const std::string path = FsHelpers::normalisePath(spineEntry.href); + if (!zip.getInflatedFileSize(path.c_str(), &itemSize)) { + Serial.printf("[%lu] [BMC] Warning: Could not get size for spine item: %s\n", millis(), path.c_str()); + } + } } else { - Serial.printf("[%lu] [BMC] Warning: Could not get size for spine item: %s\n", millis(), path.c_str()); + const std::string path = FsHelpers::normalisePath(spineEntry.href); + if (!zip.getInflatedFileSize(path.c_str(), &itemSize)) { + Serial.printf("[%lu] [BMC] Warning: Could not get size for spine item: %s\n", millis(), path.c_str()); + } } + cumSize += itemSize; + spineEntry.cumulativeSize = cumSize; + // Write out spine data to book.bin writeSpineEntry(bookFile, spineEntry); } @@ -248,21 +322,38 @@ void BookMetadataCache::createTocEntry(const std::string& title, const std::stri return; } - int spineIndex = -1; - // find spine index - // TODO: This lookup is slow as need to scan through all items each time. We can't hold it all in memory due to size. - // But perhaps we can load just the hrefs in a vector/list to do an index lookup? - spineFile.seek(0); - for (int i = 0; i < spineCount; i++) { - auto spineEntry = readSpineEntry(spineFile); - if (spineEntry.href == href) { - spineIndex = i; + int16_t spineIndex = -1; + + if (useSpineHrefIndex) { + uint64_t targetHash = fnvHash64(href); + uint16_t targetLen = static_cast(href.size()); + + auto it = + std::lower_bound(spineHrefIndex.begin(), spineHrefIndex.end(), SpineHrefIndexEntry{targetHash, targetLen, 0}, + [](const SpineHrefIndexEntry& a, const SpineHrefIndexEntry& b) { + return a.hrefHash < b.hrefHash || (a.hrefHash == b.hrefHash && a.hrefLen < b.hrefLen); + }); + + while (it != spineHrefIndex.end() && it->hrefHash == targetHash && it->hrefLen == targetLen) { + spineIndex = it->spineIndex; break; } - } - if (spineIndex == -1) { - Serial.printf("[%lu] [BMC] addTocEntry: Could not find spine item for TOC href %s\n", millis(), href.c_str()); + if (spineIndex == -1) { + Serial.printf("[%lu] [BMC] createTocEntry: Could not find spine item for TOC href %s\n", millis(), href.c_str()); + } + } else { + spineFile.seek(0); + for (int i = 0; i < spineCount; i++) { + auto spineEntry = readSpineEntry(spineFile); + if (spineEntry.href == href) { + spineIndex = static_cast(i); + break; + } + } + if (spineIndex == -1) { + Serial.printf("[%lu] [BMC] createTocEntry: Could not find spine item for TOC href %s\n", millis(), href.c_str()); + } } const TocEntry entry(title, href, anchor, level, spineIndex); diff --git a/lib/Epub/Epub/BookMetadataCache.h b/lib/Epub/Epub/BookMetadataCache.h index 29b2ae4..20ce655 100644 --- a/lib/Epub/Epub/BookMetadataCache.h +++ b/lib/Epub/Epub/BookMetadataCache.h @@ -2,7 +2,9 @@ #include +#include #include +#include class BookMetadataCache { public: @@ -53,6 +55,27 @@ class BookMetadataCache { FsFile spineFile; FsFile tocFile; + // Index for fast href→spineIndex lookup (used only for large EPUBs) + struct SpineHrefIndexEntry { + uint64_t hrefHash; // FNV-1a 64-bit hash + uint16_t hrefLen; // length for collision reduction + int16_t spineIndex; + }; + std::vector spineHrefIndex; + bool useSpineHrefIndex = false; + + static constexpr uint16_t LARGE_SPINE_THRESHOLD = 400; + + // FNV-1a 64-bit hash function + static uint64_t fnvHash64(const std::string& s) { + uint64_t hash = 14695981039346656037ull; + for (char c : s) { + hash ^= static_cast(c); + hash *= 1099511628211ull; + } + return hash; + } + uint32_t writeSpineEntry(FsFile& file, const SpineEntry& entry) const; uint32_t writeTocEntry(FsFile& file, const TocEntry& entry) const; SpineEntry readSpineEntry(FsFile& file) const; diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.cpp b/lib/Epub/Epub/parsers/ContentOpfParser.cpp index 9fbeb38..ce0e22e 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.cpp +++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp @@ -38,6 +38,9 @@ ContentOpfParser::~ContentOpfParser() { if (SdMan.exists((cachePath + itemCacheFile).c_str())) { SdMan.remove((cachePath + itemCacheFile).c_str()); } + itemIndex.clear(); + itemIndex.shrink_to_fit(); + useItemIndex = false; } size_t ContentOpfParser::write(const uint8_t data) { return write(&data, 1); } @@ -129,6 +132,15 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name "[%lu] [COF] Couldn't open temp items file for reading. This is probably going to be a fatal error.\n", millis()); } + + // Sort item index for binary search if we have enough items + if (self->itemIndex.size() >= LARGE_SPINE_THRESHOLD) { + std::sort(self->itemIndex.begin(), self->itemIndex.end(), [](const ItemIndexEntry& a, const ItemIndexEntry& b) { + return a.idHash < b.idHash || (a.idHash == b.idHash && a.idLen < b.idLen); + }); + self->useItemIndex = true; + Serial.printf("[%lu] [COF] Using fast index for %zu manifest items\n", millis(), self->itemIndex.size()); + } return; } @@ -180,6 +192,15 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name } } + // Record index entry for fast lookup later + if (self->tempItemStore) { + ItemIndexEntry entry; + entry.idHash = fnvHash(itemId); + entry.idLen = static_cast(itemId.size()); + entry.fileOffset = static_cast(self->tempItemStore.position()); + self->itemIndex.push_back(entry); + } + // Write items down to SD card serialization::writeString(self->tempItemStore, itemId); serialization::writeString(self->tempItemStore, href); @@ -215,19 +236,50 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "idref") == 0) { const std::string idref = atts[i + 1]; - // Resolve the idref to href using items map - // TODO: This lookup is slow as need to scan through all items each time. - // It can take up to 200ms per item when getting to 1500 items. - self->tempItemStore.seek(0); - std::string itemId; std::string href; - while (self->tempItemStore.available()) { - serialization::readString(self->tempItemStore, itemId); - serialization::readString(self->tempItemStore, href); - if (itemId == idref) { - self->cache->createSpineEntry(href); - break; + bool found = false; + + if (self->useItemIndex) { + // Fast path: binary search + uint32_t targetHash = fnvHash(idref); + uint16_t targetLen = static_cast(idref.size()); + + auto it = std::lower_bound(self->itemIndex.begin(), self->itemIndex.end(), + ItemIndexEntry{targetHash, targetLen, 0}, + [](const ItemIndexEntry& a, const ItemIndexEntry& b) { + return a.idHash < b.idHash || (a.idHash == b.idHash && a.idLen < b.idLen); + }); + + // Check for match (may need to check a few due to hash collisions) + while (it != self->itemIndex.end() && it->idHash == targetHash) { + self->tempItemStore.seek(it->fileOffset); + std::string itemId; + serialization::readString(self->tempItemStore, itemId); + if (itemId == idref) { + serialization::readString(self->tempItemStore, href); + found = true; + break; + } + ++it; } + } else { + // Slow path: linear scan (for small manifests, keeps original behavior) + // TODO: This lookup is slow as need to scan through all items each time. + // It can take up to 200ms per item when getting to 1500 items. + self->tempItemStore.seek(0); + std::string itemId; + while (self->tempItemStore.available()) { + serialization::readString(self->tempItemStore, itemId); + serialization::readString(self->tempItemStore, href); + if (itemId == idref) { + found = true; + break; + } + } + } + + if (found && self->cache) { + self->cache->createSpineEntry(href); } } } diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.h b/lib/Epub/Epub/parsers/ContentOpfParser.h index 8c56a86..b40a378 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.h +++ b/lib/Epub/Epub/parsers/ContentOpfParser.h @@ -1,6 +1,9 @@ #pragma once #include +#include +#include + #include "Epub.h" #include "expat.h" @@ -28,6 +31,27 @@ class ContentOpfParser final : public Print { FsFile tempItemStore; std::string coverItemId; + // Index for fast idref→href lookup (used only for large EPUBs) + struct ItemIndexEntry { + uint32_t idHash; // FNV-1a hash of itemId + uint16_t idLen; // length for collision reduction + uint32_t fileOffset; // offset in .items.bin + }; + std::vector itemIndex; + bool useItemIndex = false; + + static constexpr uint16_t LARGE_SPINE_THRESHOLD = 400; + + // FNV-1a hash function + static uint32_t fnvHash(const std::string& s) { + uint32_t hash = 2166136261u; + for (char c : s) { + hash ^= static_cast(c); + hash *= 16777619u; + } + return hash; + } + static void startElement(void* userData, const XML_Char* name, const XML_Char** atts); static void characterData(void* userData, const XML_Char* s, int len); static void endElement(void* userData, const XML_Char* name); diff --git a/lib/ZipFile/ZipFile.cpp b/lib/ZipFile/ZipFile.cpp index 2a97858..a5f65ea 100644 --- a/lib/ZipFile/ZipFile.cpp +++ b/lib/ZipFile/ZipFile.cpp @@ -4,6 +4,8 @@ #include #include +#include + bool inflateOneShot(const uint8_t* inputBuf, const size_t deflatedSize, uint8_t* outputBuf, const size_t inflatedSize) { // Setup inflator const auto inflator = static_cast(malloc(sizeof(tinfl_decompressor))); @@ -74,6 +76,10 @@ bool ZipFile::loadAllFileStatSlims() { file.seekCur(m + k); } + // Set cursor to start of central directory for sequential access + lastCentralDirPos = zipDetails.centralDirOffset; + lastCentralDirPosValid = true; + if (!wasOpen) { close(); } @@ -102,15 +108,35 @@ bool ZipFile::loadFileStatSlim(const char* filename, FileStatSlim* fileStat) { return false; } - file.seek(zipDetails.centralDirOffset); + // Phase 1: Try scanning from cursor position first + uint32_t startPos = lastCentralDirPosValid ? lastCentralDirPos : zipDetails.centralDirOffset; + uint32_t wrapPos = zipDetails.centralDirOffset; + bool wrapped = false; + bool found = false; + + file.seek(startPos); uint32_t sig; char itemName[256]; - bool found = false; - while (file.available()) { - file.read(&sig, 4); - if (sig != 0x02014b50) break; // End of list + while (true) { + uint32_t entryStart = file.position(); + + if (file.read(&sig, 4) != 4 || sig != 0x02014b50) { + // End of central directory + if (!wrapped && lastCentralDirPosValid && startPos != zipDetails.centralDirOffset) { + // Wrap around to beginning + file.seek(zipDetails.centralDirOffset); + wrapped = true; + continue; + } + break; + } + + // If we've wrapped and reached our start position, stop + if (wrapped && entryStart >= startPos) { + break; + } file.seekCur(6); file.read(&fileStat->method, 2); @@ -123,15 +149,25 @@ bool ZipFile::loadFileStatSlim(const char* filename, FileStatSlim* fileStat) { file.read(&k, 2); file.seekCur(8); file.read(&fileStat->localHeaderOffset, 4); - file.read(itemName, nameLen); - itemName[nameLen] = '\0'; - if (strcmp(itemName, filename) == 0) { - found = true; - break; + if (nameLen < 256) { + file.read(itemName, nameLen); + itemName[nameLen] = '\0'; + + if (strcmp(itemName, filename) == 0) { + // Found it! Update cursor to next entry + file.seekCur(m + k); + lastCentralDirPos = file.position(); + lastCentralDirPosValid = true; + found = true; + break; + } + } else { + // Name too long, skip it + file.seekCur(nameLen); } - // Skip the rest of this entry (extra field + comment) + // Skip extra field + comment file.seekCur(m + k); } @@ -253,6 +289,8 @@ bool ZipFile::close() { if (file) { file.close(); } + lastCentralDirPos = 0; + lastCentralDirPosValid = false; return true; } @@ -266,6 +304,80 @@ bool ZipFile::getInflatedFileSize(const char* filename, size_t* size) { return true; } +int ZipFile::fillUncompressedSizes(std::vector& targets, std::vector& sizes) { + if (targets.empty()) { + return 0; + } + + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { + return 0; + } + + if (!loadZipDetails()) { + if (!wasOpen) { + close(); + } + return 0; + } + + file.seek(zipDetails.centralDirOffset); + + int matched = 0; + uint32_t sig; + char itemName[256]; + + while (file.available()) { + file.read(&sig, 4); + if (sig != 0x02014b50) break; + + file.seekCur(6); + uint16_t method; + file.read(&method, 2); + file.seekCur(8); + uint32_t compressedSize, uncompressedSize; + file.read(&compressedSize, 4); + file.read(&uncompressedSize, 4); + uint16_t nameLen, m, k; + file.read(&nameLen, 2); + file.read(&m, 2); + file.read(&k, 2); + file.seekCur(8); + uint32_t localHeaderOffset; + file.read(&localHeaderOffset, 4); + + if (nameLen < 256) { + file.read(itemName, nameLen); + itemName[nameLen] = '\0'; + + uint64_t hash = fnvHash64(itemName, nameLen); + SizeTarget key = {hash, nameLen, 0}; + + auto it = std::lower_bound(targets.begin(), targets.end(), key, [](const SizeTarget& a, const SizeTarget& b) { + return a.hash < b.hash || (a.hash == b.hash && a.len < b.len); + }); + + while (it != targets.end() && it->hash == hash && it->len == nameLen) { + if (it->index < sizes.size()) { + sizes[it->index] = uncompressedSize; + matched++; + } + ++it; + } + } else { + file.seekCur(nameLen); + } + + file.seekCur(m + k); + } + + if (!wasOpen) { + close(); + } + + return matched; +} + uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const bool trailingNullByte) { const bool wasOpen = isOpen(); if (!wasOpen && !open()) { diff --git a/lib/ZipFile/ZipFile.h b/lib/ZipFile/ZipFile.h index 0144ed4..0c82e5a 100644 --- a/lib/ZipFile/ZipFile.h +++ b/lib/ZipFile/ZipFile.h @@ -3,6 +3,7 @@ #include #include +#include class ZipFile { public: @@ -19,12 +20,33 @@ class ZipFile { bool isSet; }; + // Target for batch uncompressed size lookup (sorted by hash, then len) + struct SizeTarget { + uint64_t hash; // FNV-1a 64-bit hash of normalized path + uint16_t len; // Length of path for collision reduction + uint16_t index; // Caller's index (e.g. spine index) + }; + + // FNV-1a 64-bit hash computed from char buffer (no std::string allocation) + static uint64_t fnvHash64(const char* s, size_t len) { + uint64_t hash = 14695981039346656037ull; + for (size_t i = 0; i < len; i++) { + hash ^= static_cast(s[i]); + hash *= 1099511628211ull; + } + return hash; + } + private: const std::string& filePath; FsFile file; ZipDetails zipDetails = {0, 0, false}; std::unordered_map fileStatSlimCache; + // Cursor for sequential central-dir scanning optimization + uint32_t lastCentralDirPos = 0; + bool lastCentralDirPosValid = false; + bool loadFileStatSlim(const char* filename, FileStatSlim* fileStat); long getDataOffset(const FileStatSlim& fileStat); bool loadZipDetails(); @@ -39,6 +61,10 @@ class ZipFile { bool close(); bool loadAllFileStatSlims(); bool getInflatedFileSize(const char* filename, size_t* size); + // Batch lookup: scan ZIP central dir once and fill sizes for matching targets. + // targets must be sorted by (hash, len). sizes[target.index] receives uncompressedSize. + // Returns number of targets matched. + int fillUncompressedSizes(std::vector& targets, std::vector& sizes); // Due to the memory required to run each of these, it is recommended to not preopen the zip file for multiple // These functions will open and close the zip as needed uint8_t* readFileToMemory(const char* filename, size_t* size = nullptr, bool trailingNullByte = false);