From a325f1265693d3554c2a44c7330ccbb8a9971964 Mon Sep 17 00:00:00 2001 From: Dave Allie Date: Mon, 22 Dec 2025 14:30:34 +1100 Subject: [PATCH] Use cache files for TOC and spine --- lib/Epub/Epub.cpp | 222 ++++++---- lib/Epub/Epub.h | 22 +- lib/Epub/Epub/EpubTocEntry.h | 10 - lib/Epub/Epub/Section.cpp | 2 +- lib/Epub/Epub/SpineTocCache.cpp | 388 ++++++++++++++++++ lib/Epub/Epub/SpineTocCache.h | 75 ++++ lib/Epub/Epub/parsers/ContentOpfParser.cpp | 10 +- lib/Epub/Epub/parsers/ContentOpfParser.h | 7 +- lib/Epub/Epub/parsers/TocNcxParser.cpp | 5 +- lib/Epub/Epub/parsers/TocNcxParser.h | 10 +- src/activities/reader/EpubReaderActivity.cpp | 2 +- .../EpubReaderChapterSelectionActivity.cpp | 2 +- 12 files changed, 635 insertions(+), 120 deletions(-) delete mode 100644 lib/Epub/Epub/EpubTocEntry.h create mode 100644 lib/Epub/Epub/SpineTocCache.cpp create mode 100644 lib/Epub/Epub/SpineTocCache.h diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index d959cb7..7e4cea3 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -53,7 +53,7 @@ bool Epub::parseContentOpf(const std::string& contentOpfFilePath) { return false; } - ContentOpfParser opfParser(getBasePath(), contentOpfSize); + ContentOpfParser opfParser(getBasePath(), contentOpfSize, spineTocCache.get()); if (!opfParser.setup()) { Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis()); @@ -75,17 +75,11 @@ bool Epub::parseContentOpf(const std::string& contentOpfFilePath) { tocNcxItem = opfParser.tocNcxPath; } - for (auto& spineRef : opfParser.spineRefs) { - if (opfParser.items.count(spineRef)) { - spine.emplace_back(spineRef, opfParser.items.at(spineRef)); - } - } - Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis()); return true; } -bool Epub::parseTocNcxFile() { +bool Epub::parseTocNcxFile() const { // the ncx file should have been specified in the content.opf file if (tocNcxItem.empty()) { Serial.printf("[%lu] [EBP] No ncx file specified\n", millis()); @@ -101,7 +95,7 @@ bool Epub::parseTocNcxFile() { tempNcxFile = SD.open(tmpNcxPath.c_str(), FILE_READ); const auto ncxSize = tempNcxFile.size(); - TocNcxParser ncxParser(contentBasePath, ncxSize); + TocNcxParser ncxParser(contentBasePath, ncxSize, spineTocCache.get()); if (!ncxParser.setup()) { Serial.printf("[%lu] [EBP] Could not setup toc ncx parser\n", millis()); @@ -130,9 +124,7 @@ bool Epub::parseTocNcxFile() { tempNcxFile.close(); SD.remove(tmpNcxPath.c_str()); - this->toc = std::move(ncxParser.toc); - - Serial.printf("[%lu] [EBP] Parsed %d TOC items\n", millis(), this->toc.size()); + Serial.printf("[%lu] [EBP] Parsed TOC items\n", millis()); return true; } @@ -140,6 +132,53 @@ bool Epub::parseTocNcxFile() { bool Epub::load() { Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); + // Initialize spine/TOC cache + spineTocCache.reset(new SpineTocCache(cachePath)); + + // Try to load existing cache first + if (spineTocCache->load()) { + Serial.printf("[%lu] [EBP] Loaded spine/TOC from cache\n", millis()); + + // Still need to parse content.opf for title and cover + // TODO: Should this data go in the cache? + std::string contentOpfFilePath; + if (!findContentOpfFile(&contentOpfFilePath)) { + Serial.printf("[%lu] [EBP] Could not find content.opf in zip\n", millis()); + return false; + } + + contentBasePath = contentOpfFilePath.substr(0, contentOpfFilePath.find_last_of('/') + 1); + + // Parse content.opf but without cache (we already have it) + size_t contentOpfSize; + if (!getItemSize(contentOpfFilePath, &contentOpfSize)) { + Serial.printf("[%lu] [EBP] Could not get size of content.opf\n", millis()); + return false; + } + + ContentOpfParser opfParser(getBasePath(), contentOpfSize, nullptr); + if (!opfParser.setup()) { + Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis()); + return false; + } + + if (!readItemContentsToStream(contentOpfFilePath, opfParser, 1024)) { + Serial.printf("[%lu] [EBP] Could not read content.opf\n", millis()); + return false; + } + + title = opfParser.title; + if (!opfParser.coverItemId.empty() && opfParser.items.count(opfParser.coverItemId) > 0) { + coverImageItem = opfParser.items.at(opfParser.coverItemId); + } + + Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); + return true; + } + + // Cache doesn't exist or is invalid, build it + Serial.printf("[%lu] [EBP] Cache not found, building spine/TOC cache\n", millis()); + std::string contentOpfFilePath; if (!findContentOpfFile(&contentOpfFilePath)) { Serial.printf("[%lu] [EBP] Could not find content.opf in zip\n", millis()); @@ -150,6 +189,17 @@ bool Epub::load() { contentBasePath = contentOpfFilePath.substr(0, contentOpfFilePath.find_last_of('/') + 1); + // Ensure cache directory exists + setupCacheDir(); + + Serial.printf("[%lu] [EBP] Cache path: %s\n", millis(), cachePath.c_str()); + + // Begin building cache - stream entries to disk immediately + if (!spineTocCache->beginWrite()) { + Serial.printf("[%lu] [EBP] Could not begin writing cache\n", millis()); + return false; + } + if (!parseContentOpf(contentOpfFilePath)) { Serial.printf("[%lu] [EBP] Could not parse content.opf\n", millis()); return false; @@ -160,30 +210,30 @@ bool Epub::load() { return false; } - initializeSpineItemSizes(); + // Close the cache files + if (!spineTocCache->endWrite()) { + Serial.printf("[%lu] [EBP] Could not end writing cache\n", millis()); + return false; + } + + // Now compute mappings and sizes (this loads entries temporarily, computes, then rewrites) + if (!spineTocCache->updateMappingsAndSizes(filepath)) { + Serial.printf("[%lu] [EBP] Could not update mappings and sizes\n", millis()); + return false; + } + + // Reload the cache from disk so it's in the correct state + spineTocCache.reset(new SpineTocCache(cachePath)); + if (!spineTocCache->load()) { + Serial.printf("[%lu] [EBP] Failed to reload cache after writing\n", millis()); + return false; + } + Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); return true; } -void Epub::initializeSpineItemSizes() { - Serial.printf("[%lu] [EBP] Calculating book size\n", millis()); - - const size_t spineItemsCount = getSpineItemsCount(); - size_t cumSpineItemSize = 0; - const ZipFile zip("/sd" + filepath); - - for (size_t i = 0; i < spineItemsCount; i++) { - std::string spineItem = getSpineItem(i); - size_t s = 0; - getItemSize(zip, spineItem, &s); - cumSpineItemSize += s; - cumulativeSpineItemSize.emplace_back(cumSpineItemSize); - } - - Serial.printf("[%lu] [EBP] Book size: %lu\n", millis(), cumSpineItemSize); -} - bool Epub::clearCache() const { if (!SD.exists(cachePath.c_str())) { Serial.printf("[%lu] [EPB] Cache does not exist, no action needed\n", millis()); @@ -295,7 +345,7 @@ std::string normalisePath(const std::string& path) { return result; } -uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, bool trailingNullByte) const { +uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, const bool trailingNullByte) const { const ZipFile zip("/sd" + filepath); const std::string path = normalisePath(itemHref); @@ -325,99 +375,107 @@ bool Epub::getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* return zip.getInflatedFileSize(path.c_str(), size); } -int Epub::getSpineItemsCount() const { return spine.size(); } +int Epub::getSpineItemsCount() const { + if (!spineTocCache || !spineTocCache->isLoaded()) { + return 0; + } + return spineTocCache->getSpineCount(); +} size_t Epub::getCumulativeSpineItemSize(const int spineIndex) const { - if (spineIndex < 0 || spineIndex >= static_cast(cumulativeSpineItemSize.size())) { + if (!spineTocCache || !spineTocCache->isLoaded()) { + Serial.printf("[%lu] [EBP] getCumulativeSpineItemSize called but cache not loaded\n", millis()); + return 0; + } + if (spineIndex < 0 || spineIndex >= spineTocCache->getSpineCount()) { Serial.printf("[%lu] [EBP] getCumulativeSpineItemSize index:%d is out of range\n", millis(), spineIndex); return 0; } - return cumulativeSpineItemSize.at(spineIndex); + return spineTocCache->getSpineEntry(spineIndex).cumulativeSize; } -std::string& Epub::getSpineItem(const int spineIndex) { - static std::string emptyString; - if (spine.empty()) { - Serial.printf("[%lu] [EBP] getSpineItem called but spine is empty\n", millis()); - return emptyString; +std::string Epub::getSpineHref(const int spineIndex) const { + if (!spineTocCache || !spineTocCache->isLoaded()) { + Serial.printf("[%lu] [EBP] getSpineItem called but cache not loaded\n", millis()); + return ""; } - if (spineIndex < 0 || spineIndex >= static_cast(spine.size())) { + if (spineIndex < 0 || spineIndex >= spineTocCache->getSpineCount()) { Serial.printf("[%lu] [EBP] getSpineItem index:%d is out of range\n", millis(), spineIndex); - return spine.at(0).second; + return spineTocCache->getSpineEntry(0).href; } - return spine.at(spineIndex).second; + return spineTocCache->getSpineEntry(spineIndex).href; } -EpubTocEntry& Epub::getTocItem(const int tocTndex) { - static EpubTocEntry emptyEntry = {}; - if (toc.empty()) { - Serial.printf("[%lu] [EBP] getTocItem called but toc is empty\n", millis()); - return emptyEntry; - } - if (tocTndex < 0 || tocTndex >= static_cast(toc.size())) { - Serial.printf("[%lu] [EBP] getTocItem index:%d is out of range\n", millis(), tocTndex); - return toc.at(0); +SpineTocCache::TocEntry Epub::getTocItem(const int tocIndex) const { + if (!spineTocCache || !spineTocCache->isLoaded()) { + Serial.printf("[%lu] [EBP] getTocItem called but cache not loaded\n", millis()); + return {}; } - return toc.at(tocTndex); + if (tocIndex < 0 || tocIndex >= spineTocCache->getTocCount()) { + Serial.printf("[%lu] [EBP] getTocItem index:%d is out of range\n", millis(), tocIndex); + return {}; + } + + return spineTocCache->getTocEntry(tocIndex); } -int Epub::getTocItemsCount() const { return toc.size(); } +int Epub::getTocItemsCount() const { + if (!spineTocCache || !spineTocCache->isLoaded()) { + return 0; + } + return spineTocCache->getTocCount(); +} // work out the section index for a toc index int Epub::getSpineIndexForTocIndex(const int tocIndex) const { - if (tocIndex < 0 || tocIndex >= toc.size()) { + if (!spineTocCache || !spineTocCache->isLoaded()) { + Serial.printf("[%lu] [EBP] getSpineIndexForTocIndex called but cache not loaded\n", millis()); + return 0; + } + if (tocIndex < 0 || tocIndex >= spineTocCache->getTocCount()) { Serial.printf("[%lu] [EBP] getSpineIndexForTocIndex: tocIndex %d out of range\n", millis(), tocIndex); return 0; } - // the toc entry should have an href that matches the spine item - // so we can find the spine index by looking for the href - for (int i = 0; i < spine.size(); i++) { - if (spine[i].second == toc[tocIndex].href) { - return i; - } + const int spineIndex = spineTocCache->getTocEntry(tocIndex).spineIndex; + if (spineIndex < 0) { + Serial.printf("[%lu] [EBP] Section not found for TOC index %d\n", millis(), tocIndex); + return 0; } - - Serial.printf("[%lu] [EBP] Section not found\n", millis()); - // not found - default to the start of the book - return 0; + return spineIndex; } int Epub::getTocIndexForSpineIndex(const int spineIndex) const { - if (spineIndex < 0 || spineIndex >= spine.size()) { + if (!spineTocCache || !spineTocCache->isLoaded()) { + Serial.printf("[%lu] [EBP] getTocIndexForSpineIndex called but cache not loaded\n", millis()); + return -1; + } + + if (spineIndex < 0 || spineIndex >= spineTocCache->getSpineCount()) { Serial.printf("[%lu] [EBP] getTocIndexForSpineIndex: spineIndex %d out of range\n", millis(), spineIndex); return -1; } - // the toc entry should have an href that matches the spine item - // so we can find the toc index by looking for the href - for (int i = 0; i < toc.size(); i++) { - if (toc[i].href == spine[spineIndex].second) { - return i; - } - } - - Serial.printf("[%lu] [EBP] TOC item not found\n", millis()); - return -1; + return spineTocCache->getSpineEntry(spineIndex).tocIndex; } size_t Epub::getBookSize() const { - if (spine.empty()) { + if (!spineTocCache || !spineTocCache->isLoaded() || spineTocCache->getSpineCount() == 0) { return 0; } return getCumulativeSpineItemSize(getSpineItemsCount() - 1); } // Calculate progress in book -uint8_t Epub::calculateProgress(const int currentSpineIndex, const float currentSpineRead) { - size_t bookSize = getBookSize(); +uint8_t Epub::calculateProgress(const int currentSpineIndex, const float currentSpineRead) const { + const size_t bookSize = getBookSize(); if (bookSize == 0) { return 0; } - size_t prevChapterSize = (currentSpineIndex >= 1) ? getCumulativeSpineItemSize(currentSpineIndex - 1) : 0; - size_t curChapterSize = getCumulativeSpineItemSize(currentSpineIndex) - prevChapterSize; - size_t sectionProgSize = currentSpineRead * curChapterSize; + const size_t prevChapterSize = (currentSpineIndex >= 1) ? getCumulativeSpineItemSize(currentSpineIndex - 1) : 0; + const size_t curChapterSize = getCumulativeSpineItemSize(currentSpineIndex) - prevChapterSize; + const size_t sectionProgSize = currentSpineRead * curChapterSize; return round(static_cast(prevChapterSize + sectionProgSize) / bookSize * 100.0); } diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 381379c..6c0e411 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -1,11 +1,12 @@ #pragma once #include +#include #include #include #include -#include "Epub/EpubTocEntry.h" +#include "Epub/SpineTocCache.h" class ZipFile; @@ -18,21 +19,16 @@ class Epub { std::string tocNcxItem; // where is the EPUBfile? std::string filepath; - // the spine of the EPUB file - std::vector> spine; - // the file size of the spine items (proxy to book progress) - std::vector cumulativeSpineItemSize; - // the toc of the EPUB file - std::vector toc; // the base path for items in the EPUB file std::string contentBasePath; // Uniq cache key based on filepath std::string cachePath; + // Spine and TOC cache + std::unique_ptr spineTocCache; bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(const std::string& contentOpfFilePath); - bool parseTocNcxFile(); - void initializeSpineItemSizes(); + bool parseTocNcxFile() const; static bool getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size); public: @@ -54,14 +50,14 @@ class Epub { bool trailingNullByte = false) const; bool readItemContentsToStream(const std::string& itemHref, Print& out, size_t chunkSize) const; bool getItemSize(const std::string& itemHref, size_t* size) const; - std::string& getSpineItem(int spineIndex); + std::string getSpineHref(int spineIndex) const; int getSpineItemsCount() const; - size_t getCumulativeSpineItemSize(const int spineIndex) const; - EpubTocEntry& getTocItem(int tocIndex); + size_t getCumulativeSpineItemSize(int spineIndex) const; + SpineTocCache::TocEntry getTocItem(int tocIndex) const; int getTocItemsCount() const; int getSpineIndexForTocIndex(int tocIndex) const; int getTocIndexForSpineIndex(int spineIndex) const; size_t getBookSize() const; - uint8_t calculateProgress(const int currentSpineIndex, const float currentSpineRead); + uint8_t calculateProgress(const int currentSpineIndex, const float currentSpineRead) const; }; diff --git a/lib/Epub/Epub/EpubTocEntry.h b/lib/Epub/Epub/EpubTocEntry.h deleted file mode 100644 index 94f0c90..0000000 --- a/lib/Epub/Epub/EpubTocEntry.h +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once - -#include - -struct EpubTocEntry { - std::string title; - std::string href; - std::string anchor; - uint8_t level; -}; diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index 7c9d241..b3c2d26 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -117,7 +117,7 @@ bool Section::clearCache() const { bool Section::persistPageDataToSD(const int fontId, const float lineCompression, const int marginTop, const int marginRight, const int marginBottom, const int marginLeft, const bool extraParagraphSpacing) { - const auto localPath = epub->getSpineItem(spineIndex); + const auto localPath = epub->getSpineHref(spineIndex); // TODO: Should we get rid of this file all together? // It currently saves us a bit of memory by allowing for all the inflation bits to be released diff --git a/lib/Epub/Epub/SpineTocCache.cpp b/lib/Epub/Epub/SpineTocCache.cpp new file mode 100644 index 0000000..b8fec22 --- /dev/null +++ b/lib/Epub/Epub/SpineTocCache.cpp @@ -0,0 +1,388 @@ +#include "SpineTocCache.h" + +#include +#include +#include +#include + +#include + +namespace { +constexpr uint8_t SPINE_TOC_CACHE_VERSION = 1; + +// TODO: Centralize this? +std::string normalisePath(const std::string& path) { + std::vector components; + std::string component; + + for (const auto c : path) { + if (c == '/') { + if (!component.empty()) { + if (component == "..") { + if (!components.empty()) { + components.pop_back(); + } + } else { + components.push_back(component); + } + component.clear(); + } + } else { + component += c; + } + } + + if (!component.empty()) { + components.push_back(component); + } + + std::string result; + for (const auto& c : components) { + if (!result.empty()) { + result += "/"; + } + result += c; + } + + return result; +} +} // namespace + +bool SpineTocCache::beginWrite() { + buildMode = true; + spineCount = 0; + tocCount = 0; + + Serial.printf("[%lu] [STC] Beginning write to cache path: %s\n", millis(), cachePath.c_str()); + + // Open spine file for writing + const std::string spineFilePath = cachePath + "/spine.bin"; + Serial.printf("[%lu] [STC] Opening spine file: %s\n", millis(), spineFilePath.c_str()); + spineFile = SD.open(spineFilePath.c_str(), FILE_WRITE, true); + if (!spineFile) { + Serial.printf("[%lu] [STC] Failed to open spine file for writing: %s\n", millis(), spineFilePath.c_str()); + return false; + } + + // Open TOC file for writing + const std::string tocFilePath = cachePath + "/toc.bin"; + Serial.printf("[%lu] [STC] Opening toc file: %s\n", millis(), tocFilePath.c_str()); + tocFile = SD.open(tocFilePath.c_str(), FILE_WRITE, true); + if (!tocFile) { + Serial.printf("[%lu] [STC] Failed to open toc file for writing: %s\n", millis(), tocFilePath.c_str()); + spineFile.close(); + return false; + } + + Serial.printf("[%lu] [STC] Began writing cache files\n", millis()); + return true; +} + +void SpineTocCache::writeString(File& file, const std::string& s) const { + const auto len = static_cast(s.size()); + file.write(reinterpret_cast(&len), sizeof(len)); + file.write(reinterpret_cast(s.data()), len); +} + +void SpineTocCache::writeSpineEntry(File& file, const SpineEntry& entry) const { + writeString(file, entry.href); + file.write(reinterpret_cast(&entry.cumulativeSize), sizeof(entry.cumulativeSize)); + file.write(reinterpret_cast(&entry.tocIndex), sizeof(entry.tocIndex)); +} + +void SpineTocCache::writeTocEntry(File& file, const TocEntry& entry) const { + writeString(file, entry.title); + writeString(file, entry.href); + writeString(file, entry.anchor); + file.write(&entry.level, 1); + file.write(reinterpret_cast(&entry.spineIndex), sizeof(entry.spineIndex)); +} + +void SpineTocCache::addSpineEntry(const std::string& href) { + if (!buildMode || !spineFile) { + Serial.printf("[%lu] [STC] addSpineEntry called but not in build mode\n", millis()); + return; + } + + const SpineEntry entry(href, 0, -1); + writeSpineEntry(spineFile, entry); + spineCount++; +} + +void SpineTocCache::addTocEntry(const std::string& title, const std::string& href, const std::string& anchor, + const uint8_t level) { + if (!buildMode || !tocFile) { + Serial.printf("[%lu] [STC] addTocEntry called but not in build mode\n", millis()); + return; + } + + const TocEntry entry(title, href, anchor, level, -1); + writeTocEntry(tocFile, entry); + tocCount++; +} + +bool SpineTocCache::endWrite() { + if (!buildMode) { + Serial.printf("[%lu] [STC] endWrite called but not in build mode\n", millis()); + return false; + } + + spineFile.close(); + tocFile.close(); + + // Write metadata files with counts + const auto spineMetaPath = cachePath + "/spine_meta.bin"; + File metaFile = SD.open(spineMetaPath.c_str(), FILE_WRITE, true); + if (!metaFile) { + Serial.printf("[%lu] [STC] Failed to write spine metadata\n", millis()); + return false; + } + metaFile.write(&SPINE_TOC_CACHE_VERSION, 1); + metaFile.write(reinterpret_cast(&spineCount), sizeof(spineCount)); + metaFile.write(reinterpret_cast(&tocCount), sizeof(tocCount)); + metaFile.close(); + + buildMode = false; + Serial.printf("[%lu] [STC] Wrote %d spine, %d TOC entries\n", millis(), spineCount, tocCount); + return true; +} + +void SpineTocCache::readString(std::ifstream& is, std::string& s) const { + uint32_t len; + is.read(reinterpret_cast(&len), sizeof(len)); + s.resize(len); + is.read(&s[0], len); +} + +SpineTocCache::SpineEntry SpineTocCache::readSpineEntry(std::ifstream& is) const { + SpineEntry entry; + readString(is, entry.href); + is.read(reinterpret_cast(&entry.cumulativeSize), sizeof(entry.cumulativeSize)); + is.read(reinterpret_cast(&entry.tocIndex), sizeof(entry.tocIndex)); + return entry; +} + +SpineTocCache::TocEntry SpineTocCache::readTocEntry(std::ifstream& is) const { + TocEntry entry; + readString(is, entry.title); + readString(is, entry.href); + readString(is, entry.anchor); + is.read(reinterpret_cast(&entry.level), 1); + is.read(reinterpret_cast(&entry.spineIndex), sizeof(entry.spineIndex)); + return entry; +} + +bool SpineTocCache::updateMappingsAndSizes(const std::string& epubPath) const { + Serial.printf("[%lu] [STC] Computing mappings and sizes for %d spine, %d TOC entries\n", millis(), spineCount, + tocCount); + + // Read all spine and TOC entries into temporary arrays (we need them all to compute mappings) + // TODO: can we do this a bit smarter and avoid loading everything? + std::vector spineEntries; + std::vector tocEntries; + + spineEntries.reserve(spineCount); + tocEntries.reserve(tocCount); + + // Read spine entries + { + const auto spineFilePath = "/sd" + cachePath + "/spine.bin"; + std::ifstream spineStream(spineFilePath.c_str(), std::ios::binary); + if (!spineStream) { + Serial.printf("[%lu] [STC] Failed to open spine file for reading\n", millis()); + return false; + } + + for (int i = 0; i < spineCount; i++) { + spineEntries.push_back(readSpineEntry(spineStream)); + } + spineStream.close(); + } + + // Read TOC entries + { + const auto tocFilePath = "/sd" + cachePath + "/toc.bin"; + std::ifstream tocStream(tocFilePath.c_str(), std::ios::binary); + if (!tocStream) { + Serial.printf("[%lu] [STC] Failed to open toc file for reading\n", millis()); + return false; + } + + for (int i = 0; i < tocCount; i++) { + tocEntries.push_back(readTocEntry(tocStream)); + } + tocStream.close(); + } + + // Compute cumulative sizes + const ZipFile zip("/sd" + epubPath); + size_t cumSize = 0; + + for (int i = 0; i < spineCount; i++) { + size_t itemSize = 0; + const std::string path = normalisePath(spineEntries[i].href); + if (zip.getInflatedFileSize(path.c_str(), &itemSize)) { + cumSize += itemSize; + spineEntries[i].cumulativeSize = cumSize; + } else { + Serial.printf("[%lu] [STC] Warning: Could not get size for spine item: %s\n", millis(), path.c_str()); + } + } + + Serial.printf("[%lu] [STC] Book size: %lu\n", millis(), cumSize); + + // Compute spine → TOC mappings + for (int i = 0; i < spineCount; i++) { + for (int j = 0; j < tocCount; j++) { + if (tocEntries[j].href == spineEntries[i].href) { + spineEntries[i].tocIndex = static_cast(j); + break; + } + } + } + + // Compute TOC → spine mappings + for (int i = 0; i < tocCount; i++) { + for (int j = 0; j < spineCount; j++) { + if (spineEntries[j].href == tocEntries[i].href) { + tocEntries[i].spineIndex = static_cast(j); + break; + } + } + } + + // Rewrite spine file with updated data + { + const auto spineFilePath = cachePath + "/spine.bin"; + File spineFile = SD.open(spineFilePath.c_str(), FILE_WRITE, true); + if (!spineFile) { + Serial.printf("[%lu] [STC] Failed to reopen spine file for writing\n", millis()); + return false; + } + + for (const auto& entry : spineEntries) { + writeSpineEntry(spineFile, entry); + } + spineFile.close(); + } + + // Rewrite TOC file with updated data + { + const auto tocFilePath = cachePath + "/toc.bin"; + File tocFile = SD.open(tocFilePath.c_str(), FILE_WRITE, true); + if (!tocFile) { + Serial.printf("[%lu] [STC] Failed to reopen toc file for writing\n", millis()); + return false; + } + + for (const auto& entry : tocEntries) { + writeTocEntry(tocFile, entry); + } + tocFile.close(); + } + + // Clear vectors to free memory + spineEntries.clear(); + spineEntries.shrink_to_fit(); + tocEntries.clear(); + tocEntries.shrink_to_fit(); + + Serial.printf("[%lu] [STC] Updated cache with mappings and sizes\n", millis()); + return true; +} + +bool SpineTocCache::load() { + // Load metadata + const auto metaPath = cachePath + "/spine_meta.bin"; + if (!SD.exists(metaPath.c_str())) { + Serial.printf("[%lu] [STC] Cache metadata does not exist: %s\n", millis(), metaPath.c_str()); + return false; + } + + File metaFile = SD.open(metaPath.c_str(), FILE_READ); + if (!metaFile) { + Serial.printf("[%lu] [STC] Failed to open cache metadata\n", millis()); + return false; + } + + uint8_t version; + metaFile.read(&version, 1); + if (version != SPINE_TOC_CACHE_VERSION) { + Serial.printf("[%lu] [STC] Cache version mismatch: expected %d, got %d\n", millis(), SPINE_TOC_CACHE_VERSION, + version); + metaFile.close(); + return false; + } + + metaFile.read(reinterpret_cast(&spineCount), sizeof(spineCount)); + metaFile.read(reinterpret_cast(&tocCount), sizeof(tocCount)); + metaFile.close(); + + loaded = true; + Serial.printf("[%lu] [STC] Loaded cache metadata: %d spine, %d TOC entries\n", millis(), spineCount, tocCount); + return true; +} + +SpineTocCache::SpineEntry SpineTocCache::getSpineEntry(const int index) const { + if (!loaded) { + Serial.printf("[%lu] [STC] getSpineEntry called but cache not loaded\n", millis()); + return SpineEntry(); + } + + if (index < 0 || index >= static_cast(spineCount)) { + Serial.printf("[%lu] [STC] getSpineEntry index %d out of range\n", millis(), index); + return SpineEntry(); + } + + const auto spineFilePath = "/sd" + cachePath + "/spine.bin"; + std::ifstream spineStream(spineFilePath.c_str(), std::ios::binary); + if (!spineStream) { + Serial.printf("[%lu] [STC] Failed to open spine file for reading entry\n", millis()); + return SpineEntry(); + } + + // Seek to the correct entry - need to read entries sequentially until we reach the index + // TODO: This could/should be based on a look up table/fixed sizes + for (int i = 0; i < index; i++) { + readSpineEntry(spineStream); // Skip entries + } + + auto entry = readSpineEntry(spineStream); + spineStream.close(); + return entry; +} + +SpineTocCache::TocEntry SpineTocCache::getTocEntry(const int index) const { + if (!loaded) { + Serial.printf("[%lu] [STC] getTocEntry called but cache not loaded\n", millis()); + return TocEntry(); + } + + if (index < 0 || index >= static_cast(tocCount)) { + Serial.printf("[%lu] [STC] getTocEntry index %d out of range\n", millis(), index); + return TocEntry(); + } + + const auto tocFilePath = "/sd" + cachePath + "/toc.bin"; + std::ifstream tocStream(tocFilePath.c_str(), std::ios::binary); + if (!tocStream) { + Serial.printf("[%lu] [STC] Failed to open toc file for reading entry\n", millis()); + return TocEntry(); + } + + // Seek to the correct entry - need to read entries sequentially until we reach the index + // TODO: This could/should be based on a look up table/fixed sizes + for (int i = 0; i < index; i++) { + readTocEntry(tocStream); // Skip entries + } + + auto entry = readTocEntry(tocStream); + tocStream.close(); + return entry; +} + +int SpineTocCache::getSpineCount() const { return spineCount; } + +int SpineTocCache::getTocCount() const { return tocCount; } + +bool SpineTocCache::isLoaded() const { return loaded; } diff --git a/lib/Epub/Epub/SpineTocCache.h b/lib/Epub/Epub/SpineTocCache.h new file mode 100644 index 0000000..7ae9d7b --- /dev/null +++ b/lib/Epub/Epub/SpineTocCache.h @@ -0,0 +1,75 @@ +#pragma once + +#include + +#include +#include + +class SpineTocCache { + public: + struct SpineEntry { + std::string href; + size_t cumulativeSize; + int16_t tocIndex; + + SpineEntry() : cumulativeSize(0), tocIndex(-1) {} + SpineEntry(std::string href, size_t cumulativeSize, int16_t tocIndex) + : href(std::move(href)), cumulativeSize(cumulativeSize), tocIndex(tocIndex) {} + }; + + struct TocEntry { + std::string title; + std::string href; + std::string anchor; + uint8_t level; + int16_t spineIndex; + + TocEntry() : level(0), spineIndex(-1) {} + TocEntry(std::string title, std::string href, std::string anchor, uint8_t level, int16_t spineIndex) + : title(std::move(title)), + href(std::move(href)), + anchor(std::move(anchor)), + level(level), + spineIndex(spineIndex) {} + }; + + private: + std::string cachePath; + uint16_t spineCount; + uint16_t tocCount; + bool loaded; + bool buildMode; + + // Temp file handles during build + File spineFile; + File tocFile; + + void writeString(File& file, const std::string& s) const; + void readString(std::ifstream& is, std::string& s) const; + void writeSpineEntry(File& file, const SpineEntry& entry) const; + void writeTocEntry(File& file, const TocEntry& entry) const; + SpineEntry readSpineEntry(std::ifstream& is) const; + TocEntry readTocEntry(std::ifstream& is) const; + + public: + explicit SpineTocCache(std::string cachePath) + : cachePath(std::move(cachePath)), spineCount(0), tocCount(0), loaded(false), buildMode(false) {} + ~SpineTocCache() = default; + + // Building phase (stream to disk immediately) + bool beginWrite(); + void addSpineEntry(const std::string& href); + void addTocEntry(const std::string& title, const std::string& href, const std::string& anchor, uint8_t level); + bool endWrite(); + + // Post-processing to update mappings and sizes + bool updateMappingsAndSizes(const std::string& epubPath) const; + + // Reading phase (read mode) + bool load(); + SpineEntry getSpineEntry(int index) const; + TocEntry getTocEntry(int index) const; + int getSpineCount() const; + int getTocCount() const; + bool isLoaded() const; +}; diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.cpp b/lib/Epub/Epub/parsers/ContentOpfParser.cpp index 4d3d776..6384935 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.cpp +++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp @@ -148,10 +148,18 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name return; } + // NOTE: This relies on spine appearing after item manifest if (self->state == IN_SPINE && (strcmp(name, "itemref") == 0 || strcmp(name, "opf:itemref") == 0)) { for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "idref") == 0) { - self->spineRefs.emplace_back(atts[i + 1]); + const std::string idref = atts[i + 1]; + // Resolve the idref to href using items map + if (self->items.count(idref) > 0) { + const std::string& href = self->items.at(idref); + if (self->cache) { + self->cache->addSpineEntry(href); + } + } break; } } diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.h b/lib/Epub/Epub/parsers/ContentOpfParser.h index a3070fc..90e2921 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.h +++ b/lib/Epub/Epub/parsers/ContentOpfParser.h @@ -4,6 +4,7 @@ #include #include "Epub.h" +#include "Epub/SpineTocCache.h" #include "expat.h" class ContentOpfParser final : public Print { @@ -20,6 +21,7 @@ class ContentOpfParser final : public Print { size_t remainingSize; XML_Parser parser = nullptr; ParserState state = START; + SpineTocCache* cache; static void startElement(void* userData, const XML_Char* name, const XML_Char** atts); static void characterData(void* userData, const XML_Char* s, int len); @@ -30,10 +32,9 @@ class ContentOpfParser final : public Print { std::string tocNcxPath; std::string coverItemId; std::map items; - std::vector spineRefs; - explicit ContentOpfParser(const std::string& baseContentPath, const size_t xmlSize) - : baseContentPath(baseContentPath), remainingSize(xmlSize) {} + explicit ContentOpfParser(const std::string& baseContentPath, const size_t xmlSize, SpineTocCache* cache) + : baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {} ~ContentOpfParser() override; bool setup(); diff --git a/lib/Epub/Epub/parsers/TocNcxParser.cpp b/lib/Epub/Epub/parsers/TocNcxParser.cpp index f470055..463501a 100644 --- a/lib/Epub/Epub/parsers/TocNcxParser.cpp +++ b/lib/Epub/Epub/parsers/TocNcxParser.cpp @@ -167,8 +167,9 @@ void XMLCALL TocNcxParser::endElement(void* userData, const XML_Char* name) { href = href.substr(0, pos); } - // Push to vector - self->toc.push_back({std::move(self->currentLabel), std::move(href), std::move(anchor), self->currentDepth}); + if (self->cache) { + self->cache->addTocEntry(self->currentLabel, href, anchor, self->currentDepth); + } // Clear them so we don't re-add them if there are weird XML structures self->currentLabel.clear(); diff --git a/lib/Epub/Epub/parsers/TocNcxParser.h b/lib/Epub/Epub/parsers/TocNcxParser.h index 2f3601a..7c5e48f 100644 --- a/lib/Epub/Epub/parsers/TocNcxParser.h +++ b/lib/Epub/Epub/parsers/TocNcxParser.h @@ -2,9 +2,8 @@ #include #include -#include -#include "Epub/EpubTocEntry.h" +#include "Epub/SpineTocCache.h" #include "expat.h" class TocNcxParser final : public Print { @@ -14,6 +13,7 @@ class TocNcxParser final : public Print { size_t remainingSize; XML_Parser parser = nullptr; ParserState state = START; + SpineTocCache* cache; std::string currentLabel; std::string currentSrc; @@ -24,10 +24,8 @@ class TocNcxParser final : public Print { static void endElement(void* userData, const XML_Char* name); public: - std::vector toc; - - explicit TocNcxParser(const std::string& baseContentPath, const size_t xmlSize) - : baseContentPath(baseContentPath), remainingSize(xmlSize) {} + explicit TocNcxParser(const std::string& baseContentPath, const size_t xmlSize, SpineTocCache* cache) + : baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {} ~TocNcxParser() override; bool setup(); diff --git a/src/activities/reader/EpubReaderActivity.cpp b/src/activities/reader/EpubReaderActivity.cpp index fd9a813..245ee95 100644 --- a/src/activities/reader/EpubReaderActivity.cpp +++ b/src/activities/reader/EpubReaderActivity.cpp @@ -212,7 +212,7 @@ void EpubReaderActivity::renderScreen() { } if (!section) { - const auto filepath = epub->getSpineItem(currentSpineIndex); + const auto filepath = epub->getSpineHref(currentSpineIndex); Serial.printf("[%lu] [ERS] Loading file: %s, index: %d\n", millis(), filepath.c_str(), currentSpineIndex); section = std::unique_ptr
(new Section(epub, currentSpineIndex, renderer)); if (!section->loadCacheMetadata(READER_FONT_ID, lineCompression, marginTop, marginRight, marginBottom, marginLeft, diff --git a/src/activities/reader/EpubReaderChapterSelectionActivity.cpp b/src/activities/reader/EpubReaderChapterSelectionActivity.cpp index 1cda06e..3754fa0 100644 --- a/src/activities/reader/EpubReaderChapterSelectionActivity.cpp +++ b/src/activities/reader/EpubReaderChapterSelectionActivity.cpp @@ -29,7 +29,7 @@ void EpubReaderChapterSelectionActivity::onEnter() { // Trigger first update updateRequired = true; xTaskCreate(&EpubReaderChapterSelectionActivity::taskTrampoline, "EpubReaderChapterSelectionActivityTask", - 2048, // Stack size + 4096, // Stack size this, // Parameters 1, // Priority &displayTaskHandle // Task handle