From 134409023b8339f7b95f54c4edce9848538838fa Mon Sep 17 00:00:00 2001 From: Dave Allie Date: Mon, 29 Dec 2025 01:43:55 +1100 Subject: [PATCH] Use custom implementation of ZIP metadata parsing --- lib/Epub/Epub.cpp | 6 +- lib/Epub/Epub/BookMetadataCache.cpp | 10 +- lib/ZipFile/ZipFile.cpp | 332 ++++++++++++++++++++++------ lib/ZipFile/ZipFile.h | 39 +++- 4 files changed, 311 insertions(+), 76 deletions(-) diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index f1dc25e..941e11b 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -320,7 +320,7 @@ bool Epub::generateCoverBmp() const { uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, const bool trailingNullByte) const { const std::string path = FsHelpers::normalisePath(itemHref); - const auto content = ZipFile("/sd" + filepath).readFileToMemory(path.c_str(), size, trailingNullByte); + const auto content = ZipFile(filepath).readFileToMemory(path.c_str(), size, trailingNullByte); if (!content) { Serial.printf("[%lu] [EBP] Failed to read item %s\n", millis(), path.c_str()); return nullptr; @@ -331,12 +331,12 @@ uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, const size_t chunkSize) const { const std::string path = FsHelpers::normalisePath(itemHref); - return ZipFile("/sd" + filepath).readFileToStream(path.c_str(), out, chunkSize); + return ZipFile(filepath).readFileToStream(path.c_str(), out, chunkSize); } bool Epub::getItemSize(const std::string& itemHref, size_t* size) const { const std::string path = FsHelpers::normalisePath(itemHref); - return ZipFile("/sd" + filepath).getInflatedFileSize(path.c_str(), size); + return ZipFile(filepath).getInflatedFileSize(path.c_str(), size); } int Epub::getSpineItemsCount() const { diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index 9774dee..2231e18 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -122,7 +122,7 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta // LUTs complete // Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin - ZipFile zip("/sd" + epubPath); + ZipFile zip(epubPath); // Pre-open zip file to speed up size calculations if (!zip.open()) { Serial.printf("[%lu] [BMC] Could not open EPUB zip for size calculations\n", millis()); @@ -131,6 +131,14 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta tocFile.close(); return false; } + if (!zip.loadAllLocalHeaderOffsets()) { + Serial.printf("[%lu] [BMC] Could not load zip local header offsets for size calculations\n", millis()); + bookFile.close(); + spineFile.close(); + tocFile.close(); + zip.close(); + return false; + } size_t cumSize = 0; spineFile.seek(0); for (int i = 0; i < spineCount; i++) { diff --git a/lib/ZipFile/ZipFile.cpp b/lib/ZipFile/ZipFile.cpp index d6451b9..a575aa4 100644 --- a/lib/ZipFile/ZipFile.cpp +++ b/lib/ZipFile/ZipFile.cpp @@ -1,5 +1,6 @@ #include "ZipFile.h" +#include #include #include @@ -27,27 +28,159 @@ bool inflateOneShot(const uint8_t* inputBuf, const size_t deflatedSize, uint8_t* return true; } -bool ZipFile::loadFileStat(const char* filename, mz_zip_archive_file_stat* fileStat) { +bool ZipFile::loadAllLocalHeaderOffsets() { const bool wasOpen = isOpen(); - if (!wasOpen) { - if (!open()) { - return false; - } + if (!wasOpen && !open()) { + return false; } - // find the file - mz_uint32 fileIndex = 0; - if (!mz_zip_reader_locate_file_v2(zipArchivePtr.get(), filename, nullptr, 0, &fileIndex)) { - Serial.printf("[%lu] [ZIP] Could not find file %s\n", millis(), filename); + if (!loadZipDetails()) { + Serial.printf("[%lu] [ZIP] loadAllLocalHeaderOffsets failed to load zip details\n", millis()); if (!wasOpen) { close(); } return false; } - if (!mz_zip_reader_file_stat(zipArchivePtr.get(), fileIndex, fileStat)) { - Serial.printf("[%lu] [ZIP] mz_zip_reader_file_stat() failed! Error: %s\n", millis(), - mz_zip_get_error_string(zipArchivePtr->m_last_error)); + file.seek(zipDetails.centralDirOffset); + + uint32_t sig; + char itemName[256]; + + while (file.available()) { + file.read(reinterpret_cast(&sig), 4); + if (sig != 0x02014b50) break; // End of list + + file.seek(24, SeekCur); + uint16_t nameLen, m, k; + file.read(reinterpret_cast(&nameLen), 2); + file.read(reinterpret_cast(&m), 2); + file.read(reinterpret_cast(&k), 2); + + uint32_t localHeaderOffset; + file.seek(8, SeekCur); + file.read(reinterpret_cast(&localHeaderOffset), 4); + + file.read(reinterpret_cast(itemName), nameLen); + itemName[nameLen] = '\0'; + + localHeaderOffsets.emplace(itemName, localHeaderOffset); + + // Skip the rest of this entry (extra field + comment) + file.seek(m + k, SeekCur); + } + + if (!wasOpen) { + close(); + } + return true; +} + +bool ZipFile::loadLocalHeaderOffset(const char* filename, uint32_t* localHeaderOffset) { + if (localHeaderOffsets.count(filename) > 0) { + *localHeaderOffset = localHeaderOffsets.at(filename); + Serial.printf("[%lu] [ZIP] Found cached local header offset for file: %s (LHO: %lu)\n", millis(), filename, + static_cast(*localHeaderOffset)); + return true; + } + + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { + return false; + } + + if (!loadZipDetails()) { + Serial.printf("[%lu] [ZIP] loadFileStatV2 failed to load zip details\n", millis()); + if (!wasOpen) { + close(); + } + return false; + } + + file.seek(zipDetails.centralDirOffset); + + uint32_t sig; + char itemName[256]; + bool found = false; + + while (file.available()) { + file.read(reinterpret_cast(&sig), 4); + if (sig != 0x02014b50) break; // End of list + + file.seek(24, SeekCur); + uint16_t nameLen, m, k; + file.read(reinterpret_cast(&nameLen), 2); + file.read(reinterpret_cast(&m), 2); + file.read(reinterpret_cast(&k), 2); + + file.seek(8, SeekCur); + file.read(reinterpret_cast(localHeaderOffset), 4); + + file.read(reinterpret_cast(itemName), nameLen); + itemName[nameLen] = '\0'; + + Serial.printf("[%lu] [ZIP] Checking file in central dir: %s (LHO: %lu, LN: %d)\n", millis(), itemName, + static_cast(*localHeaderOffset), nameLen); + + if (strcmp(itemName, filename) == 0) { + found = true; + break; + } + + // Skip the rest of this entry (extra field + comment) + file.seek(m + k, SeekCur); + } + + if (!wasOpen) { + close(); + } + return found; +} + +bool ZipFile::loadFileStatSlim(const char* filename, FileStatSlim* fileStat) { + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { + return false; + } + + if (!loadLocalHeaderOffset(filename, &fileStat->localHeaderOffset)) { + Serial.printf("[%lu] [ZIP] loadFileStatSlim could not find local header offset for file: %s\n", millis(), filename); + if (!wasOpen) { + close(); + } + return false; + } + + uint32_t sig; + file.seek(fileStat->localHeaderOffset); + file.read(reinterpret_cast(&sig), 4); + if (sig != 0x04034b50) { + Serial.printf("[%lu] [ZIP] Incorrect local file header\n", millis()); + if (!wasOpen) { + close(); + } + return false; + } + + file.seek(4, SeekCur); // Skip to method + if (file.read(reinterpret_cast(&fileStat->method), 2) != 2) { + Serial.printf("[%lu] [ZIP] Could not read compression method\n", millis()); + if (!wasOpen) { + close(); + } + return false; + } + + file.seek(8, SeekCur); // Skip to sizes + if (file.read(reinterpret_cast(&fileStat->compressedSize), 4) != 4) { + Serial.printf("[%lu] [ZIP] Could not read compressed size\n", millis()); + if (!wasOpen) { + close(); + } + return false; + } + if (file.read(reinterpret_cast(&fileStat->uncompressedSize), 4) != 4) { + Serial.printf("[%lu] [ZIP] Could not read uncompressed size\n", millis()); if (!wasOpen) { close(); } @@ -60,20 +193,22 @@ bool ZipFile::loadFileStat(const char* filename, mz_zip_archive_file_stat* fileS return true; } -long ZipFile::getDataOffset(const mz_zip_archive_file_stat& fileStat) const { +long ZipFile::getDataOffset(const FileStatSlim& fileStat) { + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { + return -1; + } + constexpr auto localHeaderSize = 30; uint8_t pLocalHeader[localHeaderSize]; - const uint64_t fileOffset = fileStat.m_local_header_ofs; + const uint64_t fileOffset = fileStat.localHeaderOffset; - FILE* file = fopen(filePath.c_str(), "r"); - if (!file) { - Serial.printf("[%lu] [ZIP] Failed to open file for reading local header\n", millis()); - return -1; + file.seek(fileOffset); + const size_t read = file.read(pLocalHeader, localHeaderSize); + if (!wasOpen) { + close(); } - fseek(file, fileOffset, SEEK_SET); - const size_t read = fread(pLocalHeader, 1, localHeaderSize, file); - fclose(file); if (read != localHeaderSize) { Serial.printf("[%lu] [ZIP] Something went wrong reading the local header\n", millis()); @@ -91,71 +226,133 @@ long ZipFile::getDataOffset(const mz_zip_archive_file_stat& fileStat) const { return fileOffset + localHeaderSize + filenameLength + extraOffset; } -bool ZipFile::open() { - zipArchivePtr.reset(new mz_zip_archive); - mz_zip_zero_struct(zipArchivePtr.get()); - const bool status = - mz_zip_reader_init_file(zipArchivePtr.get(), filePath.c_str(), MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY); +bool ZipFile::loadZipDetails() { + if (zipDetails.isSet) { + return true; + } - if (!status) { - Serial.printf("[%lu] [ZIP] mz_zip_reader_init_file() failed for %s! Error: %s\n", millis(), filePath.c_str(), - mz_zip_get_error_string(zipArchivePtr->m_last_error)); - zipArchivePtr.reset(); + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { + return false; + } + + const size_t fileSize = file.size(); + if (fileSize < 22) { + Serial.printf("[%lu] [ZIP] File too small to be a valid zip\n", millis()); + if (!wasOpen) { + close(); + } + return false; // Minimum EOCD size is 22 bytes + } + + // We scan the last 1KB (or the whole file if smaller) for the EOCD signature + // 0x06054b50 is stored as 0x50, 0x4b, 0x05, 0x06 in little-endian + const int scanRange = fileSize > 1024 ? 1024 : fileSize; + const auto buffer = static_cast(malloc(scanRange)); + + file.seek(fileSize - scanRange); + file.read(buffer, scanRange); + + // Scan backwards for the signature + int foundOffset = -1; + for (int i = scanRange - 22; i >= 0; i--) { + constexpr uint32_t signature = 0x06054b50; + if (*reinterpret_cast(&buffer[i]) == signature) { + foundOffset = i; + break; + } + } + + if (foundOffset == -1) { + Serial.printf("[%lu] [ZIP] EOCD signature not found in zip file\n", millis()); + free(buffer); + if (!wasOpen) { + close(); + } + return false; + } + + // Now extract the values we need from the EOCD record + // Relative positions within EOCD: + // Offset 10: Total number of entries (2 bytes) + // Offset 16: Offset of start of central directory with respect to the starting disk number (4 bytes) + zipDetails.totalEntries = *reinterpret_cast(&buffer[foundOffset + 10]); + zipDetails.centralDirOffset = *reinterpret_cast(&buffer[foundOffset + 16]); + zipDetails.isSet = true; + + free(buffer); + if (!wasOpen) { + close(); + } + return true; +} + +bool ZipFile::open() { + if (!FsHelpers::openFileForRead("ZIP", &filePath[3], file)) { return false; } return true; } bool ZipFile::close() { - if (zipArchivePtr) { - mz_zip_reader_end(zipArchivePtr.get()); - zipArchivePtr.reset(); + if (file) { + file.close(); } return true; } bool ZipFile::getInflatedFileSize(const char* filename, size_t* size) { - mz_zip_archive_file_stat fileStat = {}; - if (!loadFileStat(filename, &fileStat)) { + FileStatSlim fileStat = {}; + if (!loadFileStatSlim(filename, &fileStat)) { return false; } - *size = static_cast(fileStat.m_uncomp_size); + *size = static_cast(fileStat.uncompressedSize); return true; } uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const bool trailingNullByte) { - mz_zip_archive_file_stat fileStat; - if (!loadFileStat(filename, &fileStat)) { + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { + return nullptr; + } + + FileStatSlim fileStat = {}; + if (!loadFileStatSlim(filename, &fileStat)) { + if (!wasOpen) { + close(); + } return nullptr; } const long fileOffset = getDataOffset(fileStat); if (fileOffset < 0) { + if (!wasOpen) { + close(); + } return nullptr; } - FILE* file = fopen(filePath.c_str(), "rb"); - if (!file) { - Serial.printf("[%lu] [ZIP] Failed to open file for reading\n", millis()); - return nullptr; - } - fseek(file, fileOffset, SEEK_SET); + file.seek(fileOffset); - const auto deflatedDataSize = static_cast(fileStat.m_comp_size); - const auto inflatedDataSize = static_cast(fileStat.m_uncomp_size); + const auto deflatedDataSize = fileStat.compressedSize; + const auto inflatedDataSize = fileStat.uncompressedSize; const auto dataSize = trailingNullByte ? inflatedDataSize + 1 : inflatedDataSize; const auto data = static_cast(malloc(dataSize)); if (data == nullptr) { Serial.printf("[%lu] [ZIP] Failed to allocate memory for output buffer (%zu bytes)\n", millis(), dataSize); - fclose(file); + if (!wasOpen) { + close(); + } return nullptr; } - if (fileStat.m_method == MZ_NO_COMPRESSION) { + if (fileStat.method == MZ_NO_COMPRESSION) { // no deflation, just read content - const size_t dataRead = fread(data, 1, inflatedDataSize, file); - fclose(file); + const size_t dataRead = file.read(data, inflatedDataSize); + if (!wasOpen) { + close(); + } if (dataRead != inflatedDataSize) { Serial.printf("[%lu] [ZIP] Failed to read data\n", millis()); @@ -164,17 +361,21 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo } // Continue out of block with data set - } else if (fileStat.m_method == MZ_DEFLATED) { + } else if (fileStat.method == MZ_DEFLATED) { // Read out deflated content from file const auto deflatedData = static_cast(malloc(deflatedDataSize)); if (deflatedData == nullptr) { Serial.printf("[%lu] [ZIP] Failed to allocate memory for decompression buffer\n", millis()); - fclose(file); + if (!wasOpen) { + close(); + } return nullptr; } - const size_t dataRead = fread(deflatedData, 1, deflatedDataSize, file); - fclose(file); + const size_t dataRead = file.read(deflatedData, deflatedDataSize); + if (!wasOpen) { + close(); + } if (dataRead != deflatedDataSize) { Serial.printf("[%lu] [ZIP] Failed to read data, expected %d got %d\n", millis(), deflatedDataSize, dataRead); @@ -183,7 +384,7 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo return nullptr; } - bool success = inflateOneShot(deflatedData, deflatedDataSize, data, inflatedDataSize); + const bool success = inflateOneShot(deflatedData, deflatedDataSize, data, inflatedDataSize); free(deflatedData); if (!success) { @@ -195,7 +396,9 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo // Continue out of block with data set } else { Serial.printf("[%lu] [ZIP] Unsupported compression method\n", millis()); - fclose(file); + if (!wasOpen) { + close(); + } return nullptr; } @@ -205,8 +408,13 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo } bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t chunkSize) { - mz_zip_archive_file_stat fileStat; - if (!loadFileStat(filename, &fileStat)) { + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { + return false; + } + + FileStatSlim fileStat = {}; + if (!loadFileStatSlim(filename, &fileStat)) { return false; } @@ -222,10 +430,10 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch } fseek(file, fileOffset, SEEK_SET); - const auto deflatedDataSize = static_cast(fileStat.m_comp_size); - const auto inflatedDataSize = static_cast(fileStat.m_uncomp_size); + const auto deflatedDataSize = fileStat.compressedSize; + const auto inflatedDataSize = fileStat.uncompressedSize; - if (fileStat.m_method == MZ_NO_COMPRESSION) { + if (fileStat.method == MZ_NO_COMPRESSION) { // no deflation, just read content const auto buffer = static_cast(malloc(chunkSize)); if (!buffer) { @@ -253,7 +461,7 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch return true; } - if (fileStat.m_method == MZ_DEFLATED) { + if (fileStat.method == MZ_DEFLATED) { // Setup inflator const auto inflator = static_cast(malloc(sizeof(tinfl_decompressor))); if (!inflator) { diff --git a/lib/ZipFile/ZipFile.h b/lib/ZipFile/ZipFile.h index a51cfcd..5d5dd4d 100644 --- a/lib/ZipFile/ZipFile.h +++ b/lib/ZipFile/ZipFile.h @@ -1,25 +1,44 @@ #pragma once -#include +#include -#include +#include #include -#include "miniz.h" - class ZipFile { - std::string filePath; - std::unique_ptr zipArchivePtr; - bool loadFileStat(const char* filename, mz_zip_archive_file_stat* fileStat); - long getDataOffset(const mz_zip_archive_file_stat& fileStat) const; + public: + struct FileStatSlim { + uint16_t method; // Compression method + uint32_t compressedSize; // Compressed size + uint32_t uncompressedSize; // Uncompressed size + uint32_t localHeaderOffset; // Offset of local file header + }; + + struct ZipDetails { + uint32_t centralDirOffset; + uint16_t totalEntries; + bool isSet; + }; + + private: + const std::string& filePath; + File file; + ZipDetails zipDetails = {0, 0, false}; + std::map localHeaderOffsets; + bool loadLocalHeaderOffset(const char* filename, uint32_t* localHeaderOffset); + + bool loadFileStatSlim(const char* filename, FileStatSlim* fileStat); + long getDataOffset(const FileStatSlim& fileStat); + bool loadZipDetails(); public: - explicit ZipFile(std::string filePath) : filePath(std::move(filePath)) {} + explicit ZipFile(const std::string& filePath) : filePath(filePath) {} ~ZipFile() = default; // Zip file can be opened and closed by hand in order to allow for quick calculation of inflated file size // It is NOT recommended to pre-open it for any kind of inflation due to memory constraints - bool isOpen() const { return zipArchivePtr != nullptr; } + bool isOpen() const { return !!file; } bool open(); bool close(); + bool loadAllLocalHeaderOffsets(); bool getInflatedFileSize(const char* filename, size_t* size); // Due to the memory required to run each of these, it is recommended to not preopen the zip file for multiple // These functions will open and close the zip as needed