From d0978167428ae4ce6a43e656409d28018a6cf56d Mon Sep 17 00:00:00 2001 From: Dave Allie Date: Mon, 29 Dec 2025 16:37:19 +1100 Subject: [PATCH] Use zip central directory as source of truth for file stats --- lib/Epub/Epub/BookMetadataCache.cpp | 2 +- lib/ZipFile/ZipFile.cpp | 100 +++++++--------------------- lib/ZipFile/ZipFile.h | 5 +- 3 files changed, 26 insertions(+), 81 deletions(-) diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index e89cce9..8fcee28 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -134,7 +134,7 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta // TODO: For large ZIPs loading the all localHeaderOffsets will crash. // However not having them loaded is extremely slow. Need a better solution here. // Perhaps only a cache of spine items or a better way to speedup lookups? - if (!zip.loadAllLocalHeaderOffsets()) { + if (!zip.loadAllFileStatSlims()) { Serial.printf("[%lu] [BMC] Could not load zip local header offsets for size calculations\n", millis()); bookFile.close(); spineFile.close(); diff --git a/lib/ZipFile/ZipFile.cpp b/lib/ZipFile/ZipFile.cpp index b26067b..23cf0e8 100644 --- a/lib/ZipFile/ZipFile.cpp +++ b/lib/ZipFile/ZipFile.cpp @@ -28,14 +28,13 @@ bool inflateOneShot(const uint8_t* inputBuf, const size_t deflatedSize, uint8_t* return true; } -bool ZipFile::loadAllLocalHeaderOffsets() { +bool ZipFile::loadAllFileStatSlims() { const bool wasOpen = isOpen(); if (!wasOpen && !open()) { return false; } if (!loadZipDetails()) { - Serial.printf("[%lu] [ZIP] loadAllLocalHeaderOffsets failed to load zip details\n", millis()); if (!wasOpen) { close(); } @@ -46,28 +45,30 @@ bool ZipFile::loadAllLocalHeaderOffsets() { uint32_t sig; char itemName[256]; - - localHeaderOffsets.clear(); - localHeaderOffsets.reserve(zipDetails.totalEntries); + fileStatSlimCache.clear(); + fileStatSlimCache.reserve(zipDetails.totalEntries); while (file.available()) { file.read(reinterpret_cast(&sig), 4); if (sig != 0x02014b50) break; // End of list - file.seek(24, SeekCur); + FileStatSlim fileStat = {}; + + file.seek(6, SeekCur); + file.read(reinterpret_cast(&fileStat.method), 2); + file.seek(8, SeekCur); + file.read(reinterpret_cast(&fileStat.compressedSize), 4); + file.read(reinterpret_cast(&fileStat.uncompressedSize), 4); uint16_t nameLen, m, k; file.read(reinterpret_cast(&nameLen), 2); file.read(reinterpret_cast(&m), 2); file.read(reinterpret_cast(&k), 2); - - uint32_t localHeaderOffset; file.seek(8, SeekCur); - file.read(reinterpret_cast(&localHeaderOffset), 4); - + file.read(reinterpret_cast(&fileStat.localHeaderOffset), 4); file.read(reinterpret_cast(itemName), nameLen); itemName[nameLen] = '\0'; - localHeaderOffsets.emplace(itemName, localHeaderOffset); + fileStatSlimCache.emplace(itemName, fileStat); // Skip the rest of this entry (extra field + comment) file.seek(m + k, SeekCur); @@ -79,14 +80,13 @@ bool ZipFile::loadAllLocalHeaderOffsets() { return true; } -bool ZipFile::loadLocalHeaderOffset(const char* filename, uint32_t* localHeaderOffset) { - // If we have saved any offset, assume they're all loaded - if (!localHeaderOffsets.empty()) { - if (localHeaderOffsets.count(filename) > 0) { - *localHeaderOffset = localHeaderOffsets.at(filename); +bool ZipFile::loadFileStatSlim(const char* filename, FileStatSlim* fileStat) { + if (!fileStatSlimCache.empty()) { + const auto it = fileStatSlimCache.find(filename); + if (it != fileStatSlimCache.end()) { + *fileStat = it->second; return true; } - return false; } @@ -112,15 +112,17 @@ bool ZipFile::loadLocalHeaderOffset(const char* filename, uint32_t* localHeaderO file.read(reinterpret_cast(&sig), 4); if (sig != 0x02014b50) break; // End of list - file.seek(24, SeekCur); + file.seek(6, SeekCur); + file.read(reinterpret_cast(&fileStat->method), 2); + file.seek(8, SeekCur); + file.read(reinterpret_cast(&fileStat->compressedSize), 4); + file.read(reinterpret_cast(&fileStat->uncompressedSize), 4); uint16_t nameLen, m, k; file.read(reinterpret_cast(&nameLen), 2); file.read(reinterpret_cast(&m), 2); file.read(reinterpret_cast(&k), 2); - file.seek(8, SeekCur); - file.read(reinterpret_cast(localHeaderOffset), 4); - + file.read(reinterpret_cast(&fileStat->localHeaderOffset), 4); file.read(reinterpret_cast(itemName), nameLen); itemName[nameLen] = '\0'; @@ -139,62 +141,6 @@ bool ZipFile::loadLocalHeaderOffset(const char* filename, uint32_t* localHeaderO return found; } -bool ZipFile::loadFileStatSlim(const char* filename, FileStatSlim* fileStat) { - const bool wasOpen = isOpen(); - if (!wasOpen && !open()) { - return false; - } - - if (!loadLocalHeaderOffset(filename, &fileStat->localHeaderOffset)) { - Serial.printf("[%lu] [ZIP] loadFileStatSlim could not find local header offset for file: %s\n", millis(), filename); - if (!wasOpen) { - close(); - } - return false; - } - - uint32_t sig; - file.seek(fileStat->localHeaderOffset); - file.read(reinterpret_cast(&sig), 4); - if (sig != 0x04034b50) { - Serial.printf("[%lu] [ZIP] Incorrect local file header\n", millis()); - if (!wasOpen) { - close(); - } - return false; - } - - file.seek(4, SeekCur); // Skip to method - if (file.read(reinterpret_cast(&fileStat->method), 2) != 2) { - Serial.printf("[%lu] [ZIP] Could not read compression method\n", millis()); - if (!wasOpen) { - close(); - } - return false; - } - - file.seek(8, SeekCur); // Skip to sizes - if (file.read(reinterpret_cast(&fileStat->compressedSize), 4) != 4) { - Serial.printf("[%lu] [ZIP] Could not read compressed size\n", millis()); - if (!wasOpen) { - close(); - } - return false; - } - if (file.read(reinterpret_cast(&fileStat->uncompressedSize), 4) != 4) { - Serial.printf("[%lu] [ZIP] Could not read uncompressed size\n", millis()); - if (!wasOpen) { - close(); - } - return false; - } - - if (!wasOpen) { - close(); - } - return true; -} - long ZipFile::getDataOffset(const FileStatSlim& fileStat) { const bool wasOpen = isOpen(); if (!wasOpen && !open()) { diff --git a/lib/ZipFile/ZipFile.h b/lib/ZipFile/ZipFile.h index bc86ea8..4758f16 100644 --- a/lib/ZipFile/ZipFile.h +++ b/lib/ZipFile/ZipFile.h @@ -23,8 +23,7 @@ class ZipFile { const std::string& filePath; File file; ZipDetails zipDetails = {0, 0, false}; - std::unordered_map localHeaderOffsets; - bool loadLocalHeaderOffset(const char* filename, uint32_t* localHeaderOffset); + std::unordered_map fileStatSlimCache; bool loadFileStatSlim(const char* filename, FileStatSlim* fileStat); long getDataOffset(const FileStatSlim& fileStat); @@ -38,7 +37,7 @@ class ZipFile { bool isOpen() const { return !!file; } bool open(); bool close(); - bool loadAllLocalHeaderOffsets(); + bool loadAllFileStatSlims(); bool getInflatedFileSize(const char* filename, size_t* size); // Due to the memory required to run each of these, it is recommended to not preopen the zip file for multiple // These functions will open and close the zip as needed