Rebuild of SpineTocCache as BookMetadataCache using new file format

This commit is contained in:
Dave Allie 2025-12-23 15:51:24 +11:00
parent 09e73b34b5
commit 75fd818c93
No known key found for this signature in database
GPG Key ID: F2FDDB3AD8D0276F
12 changed files with 483 additions and 427 deletions

View File

@ -42,7 +42,7 @@ bool Epub::findContentOpfFile(std::string* contentOpfFile) const {
return true;
}
bool Epub::parseContentOpf(bool useCache) {
bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) {
std::string contentOpfFilePath;
if (!findContentOpfFile(&contentOpfFilePath)) {
Serial.printf("[%lu] [EBP] Could not find content.opf in zip\n", millis());
@ -59,7 +59,9 @@ bool Epub::parseContentOpf(bool useCache) {
return false;
}
ContentOpfParser opfParser(getBasePath(), contentOpfSize, useCache ? spineTocCache.get() : nullptr);
ContentOpfParser opfParser(getCachePath(), getBasePath(), contentOpfSize, bookMetadataCache.get());
Serial.printf("[%lu] [MEM] Free: %d bytes, Total: %d bytes, Min Free: %d bytes\n", millis(), ESP.getFreeHeap(),
ESP.getHeapSize(), ESP.getMinFreeHeap());
if (!opfParser.setup()) {
Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis());
@ -72,10 +74,10 @@ bool Epub::parseContentOpf(bool useCache) {
}
// Grab data from opfParser into epub
title = opfParser.title;
// if (!opfParser.coverItemId.empty() && opfParser.items.count(opfParser.coverItemId) > 0) {
// coverImageItem = opfParser.items.at(opfParser.coverItemId);
// }
bookMetadata.title = opfParser.title;
// TODO: Parse author
bookMetadata.author = "";
bookMetadata.coverItemHref = opfParser.coverItemHref;
if (!opfParser.tocNcxPath.empty()) {
tocNcxItem = opfParser.tocNcxPath;
@ -106,7 +108,7 @@ bool Epub::parseTocNcxFile() const {
}
const auto ncxSize = tempNcxFile.size();
TocNcxParser ncxParser(contentBasePath, ncxSize, spineTocCache.get());
TocNcxParser ncxParser(contentBasePath, ncxSize, bookMetadataCache.get());
if (!ncxParser.setup()) {
Serial.printf("[%lu] [EBP] Could not setup toc ncx parser\n", millis());
@ -144,18 +146,10 @@ bool Epub::load() {
Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str());
// Initialize spine/TOC cache
spineTocCache.reset(new SpineTocCache(cachePath));
bookMetadataCache.reset(new BookMetadataCache(cachePath));
// Try to load existing cache first
if (spineTocCache->load()) {
Serial.printf("[%lu] [EBP] Loaded spine/TOC from cache\n", millis());
// Still need to parse content.opf for title and cover
if (!parseContentOpf(false)) {
Serial.printf("[%lu] [EBP] Could not parse content.opf\n", millis());
return false;
}
if (bookMetadataCache->load()) {
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
return true;
}
@ -165,33 +159,59 @@ bool Epub::load() {
setupCacheDir();
// Begin building cache - stream entries to disk immediately
if (!spineTocCache->beginWrite()) {
if (!bookMetadataCache->beginWrite()) {
Serial.printf("[%lu] [EBP] Could not begin writing cache\n", millis());
return false;
}
if (!parseContentOpf(true)) {
// OPF Pass
BookMetadataCache::BookMetadata bookMetadata;
if (!bookMetadataCache->beginContentOpfPass()) {
Serial.printf("[%lu] [EBP] Could not begin writing content.opf pass\n", millis());
return false;
}
if (!parseContentOpf(bookMetadata)) {
Serial.printf("[%lu] [EBP] Could not parse content.opf\n", millis());
return false;
}
if (!bookMetadataCache->endContentOpfPass()) {
Serial.printf("[%lu] [EBP] Could not end writing content.opf pass\n", millis());
return false;
}
// TOC Pass
if (!bookMetadataCache->beginTocPass()) {
Serial.printf("[%lu] [EBP] Could not begin writing toc pass\n", millis());
return false;
}
if (!parseTocNcxFile()) {
Serial.printf("[%lu] [EBP] Could not parse toc\n", millis());
return false;
}
if (!bookMetadataCache->endTocPass()) {
Serial.printf("[%lu] [EBP] Could not end writing toc pass\n", millis());
return false;
}
// Close the cache files
if (!spineTocCache->endWrite()) {
if (!bookMetadataCache->endWrite()) {
Serial.printf("[%lu] [EBP] Could not end writing cache\n", millis());
return false;
}
// Now compute mappings and sizes (this loads entries temporarily, computes, then rewrites)
if (!spineTocCache->updateMapsAndSizes(filepath)) {
// Build final book.bin
if (!bookMetadataCache->buildBookBin(filepath, bookMetadata)) {
Serial.printf("[%lu] [EBP] Could not update mappings and sizes\n", millis());
return false;
}
if (!bookMetadataCache->cleanupTmpFiles()) {
Serial.printf("[%lu] [EBP] Could not cleanup tmp files - ignoring\n", millis());
}
// Reload the cache from disk so it's in the correct state
spineTocCache.reset(new SpineTocCache(cachePath));
if (!spineTocCache->load()) {
bookMetadataCache.reset(new BookMetadataCache(cachePath));
if (!bookMetadataCache->load()) {
Serial.printf("[%lu] [EBP] Failed to reload cache after writing\n", millis());
return false;
}
@ -233,7 +253,14 @@ const std::string& Epub::getCachePath() const { return cachePath; }
const std::string& Epub::getPath() const { return filepath; }
const std::string& Epub::getTitle() const { return title; }
const std::string& Epub::getTitle() const {
static std::string blank;
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
return blank;
}
return bookMetadataCache->coreMetadata.title;
}
std::string Epub::getCoverBmpPath() const { return cachePath + "/cover.bmp"; }
@ -243,13 +270,19 @@ bool Epub::generateCoverBmp() const {
return true;
}
if (coverImageItem.empty()) {
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
Serial.printf("[%lu] [EBP] Cannot generate cover BMP, cache not loaded\n", millis());
return false;
}
const auto coverImageHref = bookMetadataCache->coreMetadata.coverItemHref;
if (coverImageHref.empty()) {
Serial.printf("[%lu] [EBP] No known cover image\n", millis());
return false;
}
if (coverImageItem.substr(coverImageItem.length() - 4) == ".jpg" ||
coverImageItem.substr(coverImageItem.length() - 5) == ".jpeg") {
if (coverImageHref.substr(coverImageHref.length() - 4) == ".jpg" ||
coverImageHref.substr(coverImageHref.length() - 5) == ".jpeg") {
Serial.printf("[%lu] [EBP] Generating BMP from JPG cover image\n", millis());
const auto coverJpgTempPath = getCachePath() + "/.cover.jpg";
@ -257,7 +290,7 @@ bool Epub::generateCoverBmp() const {
if (!FsHelpers::openFileForWrite("EBP", coverJpgTempPath, coverJpg)) {
return false;
}
readItemContentsToStream(coverImageItem, coverJpg, 1024);
readItemContentsToStream(coverImageHref, coverJpg, 1024);
coverJpg.close();
if (!FsHelpers::openFileForRead("EBP", coverJpgTempPath, coverJpg)) {
@ -318,75 +351,63 @@ bool Epub::getItemSize(const ZipFile& zip, const std::string& itemHref, size_t*
}
int Epub::getSpineItemsCount() const {
if (!spineTocCache || !spineTocCache->isLoaded()) {
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
return 0;
}
return spineTocCache->getSpineCount();
return bookMetadataCache->getSpineCount();
}
size_t Epub::getCumulativeSpineItemSize(const int spineIndex) const {
if (!spineTocCache || !spineTocCache->isLoaded()) {
Serial.printf("[%lu] [EBP] getCumulativeSpineItemSize called but cache not loaded\n", millis());
return 0;
}
size_t Epub::getCumulativeSpineItemSize(const int spineIndex) const { return getSpineItem(spineIndex).cumulativeSize; }
if (spineIndex < 0 || spineIndex >= spineTocCache->getSpineCount()) {
Serial.printf("[%lu] [EBP] getCumulativeSpineItemSize index:%d is out of range\n", millis(), spineIndex);
return 0;
}
return spineTocCache->getSpineEntry(spineIndex).cumulativeSize;
}
std::string Epub::getSpineHref(const int spineIndex) const {
if (!spineTocCache || !spineTocCache->isLoaded()) {
BookMetadataCache::SpineEntry Epub::getSpineItem(const int spineIndex) const {
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
Serial.printf("[%lu] [EBP] getSpineItem called but cache not loaded\n", millis());
return "";
return {};
}
if (spineIndex < 0 || spineIndex >= spineTocCache->getSpineCount()) {
if (spineIndex < 0 || spineIndex >= bookMetadataCache->getSpineCount()) {
Serial.printf("[%lu] [EBP] getSpineItem index:%d is out of range\n", millis(), spineIndex);
return spineTocCache->getSpineEntry(0).href;
return bookMetadataCache->getSpineEntry(0);
}
return spineTocCache->getSpineEntry(spineIndex).href;
return bookMetadataCache->getSpineEntry(spineIndex);
}
SpineTocCache::TocEntry Epub::getTocItem(const int tocIndex) const {
if (!spineTocCache || !spineTocCache->isLoaded()) {
BookMetadataCache::TocEntry Epub::getTocItem(const int tocIndex) const {
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
Serial.printf("[%lu] [EBP] getTocItem called but cache not loaded\n", millis());
return {};
}
if (tocIndex < 0 || tocIndex >= spineTocCache->getTocCount()) {
if (tocIndex < 0 || tocIndex >= bookMetadataCache->getTocCount()) {
Serial.printf("[%lu] [EBP] getTocItem index:%d is out of range\n", millis(), tocIndex);
return {};
}
return spineTocCache->getTocEntry(tocIndex);
return bookMetadataCache->getTocEntry(tocIndex);
}
int Epub::getTocItemsCount() const {
if (!spineTocCache || !spineTocCache->isLoaded()) {
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
return 0;
}
return spineTocCache->getTocCount();
return bookMetadataCache->getTocCount();
}
// work out the section index for a toc index
int Epub::getSpineIndexForTocIndex(const int tocIndex) const {
if (!spineTocCache || !spineTocCache->isLoaded()) {
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
Serial.printf("[%lu] [EBP] getSpineIndexForTocIndex called but cache not loaded\n", millis());
return 0;
}
if (tocIndex < 0 || tocIndex >= spineTocCache->getTocCount()) {
if (tocIndex < 0 || tocIndex >= bookMetadataCache->getTocCount()) {
Serial.printf("[%lu] [EBP] getSpineIndexForTocIndex: tocIndex %d out of range\n", millis(), tocIndex);
return 0;
}
const int spineIndex = spineTocCache->getTocEntry(tocIndex).spineIndex;
const int spineIndex = bookMetadataCache->getTocEntry(tocIndex).spineIndex;
if (spineIndex < 0) {
Serial.printf("[%lu] [EBP] Section not found for TOC index %d\n", millis(), tocIndex);
return 0;
@ -395,22 +416,10 @@ int Epub::getSpineIndexForTocIndex(const int tocIndex) const {
return spineIndex;
}
int Epub::getTocIndexForSpineIndex(const int spineIndex) const {
if (!spineTocCache || !spineTocCache->isLoaded()) {
Serial.printf("[%lu] [EBP] getTocIndexForSpineIndex called but cache not loaded\n", millis());
return -1;
}
if (spineIndex < 0 || spineIndex >= spineTocCache->getSpineCount()) {
Serial.printf("[%lu] [EBP] getTocIndexForSpineIndex: spineIndex %d out of range\n", millis(), spineIndex);
return -1;
}
return spineTocCache->getSpineEntry(spineIndex).tocIndex;
}
int Epub::getTocIndexForSpineIndex(const int spineIndex) const { return getSpineItem(spineIndex).tocIndex; }
size_t Epub::getBookSize() const {
if (!spineTocCache || !spineTocCache->isLoaded() || spineTocCache->getSpineCount() == 0) {
if (!bookMetadataCache || !bookMetadataCache->isLoaded() || bookMetadataCache->getSpineCount() == 0) {
return 0;
}
return getCumulativeSpineItemSize(getSpineItemsCount() - 1);

View File

@ -6,15 +6,11 @@
#include <unordered_map>
#include <vector>
#include "Epub/SpineTocCache.h"
#include "Epub/BookMetadataCache.h"
class ZipFile;
class Epub {
// the title read from the EPUB meta data
std::string title;
// the cover image
std::string coverImageItem;
// the ncx file
std::string tocNcxItem;
// where is the EPUBfile?
@ -24,10 +20,10 @@ class Epub {
// Uniq cache key based on filepath
std::string cachePath;
// Spine and TOC cache
std::unique_ptr<SpineTocCache> spineTocCache;
std::unique_ptr<BookMetadataCache> bookMetadataCache;
bool findContentOpfFile(std::string* contentOpfFile) const;
bool parseContentOpf(bool useCache);
bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata);
bool parseTocNcxFile() const;
static bool getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size);
@ -50,13 +46,13 @@ class Epub {
bool trailingNullByte = false) const;
bool readItemContentsToStream(const std::string& itemHref, Print& out, size_t chunkSize) const;
bool getItemSize(const std::string& itemHref, size_t* size) const;
std::string getSpineHref(int spineIndex) const;
BookMetadataCache::SpineEntry getSpineItem(int spineIndex) const;
BookMetadataCache::TocEntry getTocItem(int tocIndex) const;
int getSpineItemsCount() const;
size_t getCumulativeSpineItemSize(int spineIndex) const;
SpineTocCache::TocEntry getTocItem(int tocIndex) const;
int getTocItemsCount() const;
int getSpineIndexForTocIndex(int tocIndex) const;
int getTocIndexForSpineIndex(int spineIndex) const;
size_t getCumulativeSpineItemSize(int spineIndex) const;
size_t getBookSize() const;
uint8_t calculateProgress(const int currentSpineIndex, const float currentSpineRead) const;

View File

@ -0,0 +1,323 @@
#include "BookMetadataCache.h"
#include <HardwareSerial.h>
#include <SD.h>
#include <Serialization.h>
#include <ZipFile.h>
#include <vector>
#include "FsHelpers.h"
namespace {
constexpr uint8_t BOOK_CACHE_VERSION = 1;
constexpr char bookBinFile[] = "/book.bin";
constexpr char tmpSpineBinFile[] = "/spine.bin.tmp";
constexpr char tmpTocBinFile[] = "/toc.bin.tmp";
} // namespace
/* ============= WRITING / BUILDING FUNCTIONS ================ */
bool BookMetadataCache::beginWrite() {
buildMode = true;
spineCount = 0;
tocCount = 0;
Serial.printf("[%lu] [BMC] Entering write mode\n", millis());
return true;
}
bool BookMetadataCache::beginContentOpfPass() {
Serial.printf("[%lu] [BMC] Beginning content opf pass\n", millis());
// Open spine file for writing
return FsHelpers::openFileForWrite("BMC", cachePath + tmpSpineBinFile, spineFile);
}
bool BookMetadataCache::endContentOpfPass() {
spineFile.close();
return true;
}
bool BookMetadataCache::beginTocPass() {
Serial.printf("[%lu] [BMC] Beginning toc pass\n", millis());
// Open spine file for reading
if (!FsHelpers::openFileForRead("BMC", cachePath + tmpSpineBinFile, spineFile)) {
return false;
}
if (!FsHelpers::openFileForWrite("BMC", cachePath + tmpTocBinFile, tocFile)) {
spineFile.close();
return false;
}
return true;
}
bool BookMetadataCache::endTocPass() {
tocFile.close();
spineFile.close();
return true;
}
bool BookMetadataCache::endWrite() {
if (!buildMode) {
Serial.printf("[%lu] [BMC] endWrite called but not in build mode\n", millis());
return false;
}
buildMode = false;
Serial.printf("[%lu] [BMC] Wrote %d spine, %d TOC entries\n", millis(), spineCount, tocCount);
return true;
}
bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMetadata& metadata) {
// Open all three files, writing to meta, reading from spine and toc
if (!FsHelpers::openFileForWrite("BMC", cachePath + bookBinFile, bookFile)) {
return false;
}
if (!FsHelpers::openFileForRead("BMC", cachePath + tmpSpineBinFile, spineFile)) {
bookFile.close();
return false;
}
if (!FsHelpers::openFileForRead("BMC", cachePath + tmpTocBinFile, tocFile)) {
bookFile.close();
spineFile.close();
return false;
}
constexpr size_t headerASize =
sizeof(BOOK_CACHE_VERSION) + /* LUT Offset */ sizeof(size_t) + sizeof(spineCount) + sizeof(tocCount);
const size_t metadataSize =
metadata.title.size() + metadata.author.size() + metadata.coverItemHref.size() + sizeof(uint32_t) * 3;
const size_t lutSize = sizeof(size_t) * spineCount + sizeof(size_t) * tocCount;
const size_t lutOffset = headerASize + metadataSize;
// Header A
serialization::writePod(bookFile, BOOK_CACHE_VERSION);
serialization::writePod(bookFile, lutOffset);
serialization::writePod(bookFile, spineCount);
serialization::writePod(bookFile, tocCount);
// Metadata
serialization::writeString(bookFile, metadata.title);
serialization::writeString(bookFile, metadata.author);
serialization::writeString(bookFile, metadata.coverItemHref);
// Loop through spine entries, writing LUT positions
spineFile.seek(0);
for (int i = 0; i < spineCount; i++) {
auto spineEntry = readSpineEntry(spineFile);
serialization::writePod(bookFile, spineFile.position() + lutOffset + lutSize);
}
// Loop through toc entries, writing LUT positions
tocFile.seek(0);
for (int i = 0; i < tocCount; i++) {
auto tocEntry = readTocEntry(tocFile);
serialization::writePod(bookFile, tocFile.position() + lutOffset + lutSize + spineFile.position());
}
// LUTs complete
// Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin
const ZipFile zip("/sd" + epubPath);
size_t cumSize = 0;
spineFile.seek(0);
for (int i = 0; i < spineCount; i++) {
auto spineEntry = readSpineEntry(spineFile);
tocFile.seek(0);
for (int j = 0; j < tocCount; j++) {
auto tocEntry = readTocEntry(tocFile);
if (tocEntry.spineIndex == i) {
spineEntry.tocIndex = j;
break;
}
}
// Not a huge deal if we don't fine a TOC entry for the spine entry, this is expected behaviour for EPUBs
// Logging here is for debugging
if (spineEntry.tocIndex == -1) {
Serial.printf("[%lu] [BMC] Warning: Could not find TOC entry for spine item %d: %s\n", millis(), i,
spineEntry.href.c_str());
}
// Calculate size for cumulative size
size_t itemSize = 0;
const std::string path = FsHelpers::normalisePath(spineEntry.href);
if (zip.getInflatedFileSize(path.c_str(), &itemSize)) {
cumSize += itemSize;
spineEntry.cumulativeSize = cumSize;
} else {
Serial.printf("[%lu] [BMC] Warning: Could not get size for spine item: %s\n", millis(), path.c_str());
}
// Write out spine data to book.bin
writeSpineEntry(bookFile, spineEntry);
}
// Loop through toc entries from toc file writing to book.bin
tocFile.seek(0);
for (int i = 0; i < tocCount; i++) {
auto tocEntry = readTocEntry(tocFile);
writeTocEntry(bookFile, tocEntry);
}
bookFile.close();
spineFile.close();
tocFile.close();
Serial.printf("[%lu] [BMC] Successfully built book.bin\n", millis());
return true;
}
bool BookMetadataCache::cleanupTmpFiles() const {
if (SD.exists((cachePath + tmpSpineBinFile).c_str())) {
SD.remove((cachePath + tmpSpineBinFile).c_str());
}
if (SD.exists((cachePath + tmpTocBinFile).c_str())) {
SD.remove((cachePath + tmpTocBinFile).c_str());
}
return true;
}
size_t BookMetadataCache::writeSpineEntry(File& file, const SpineEntry& entry) const {
const auto pos = file.position();
serialization::writeString(file, entry.href);
serialization::writePod(file, entry.cumulativeSize);
serialization::writePod(file, entry.tocIndex);
return pos;
}
size_t BookMetadataCache::writeTocEntry(File& file, const TocEntry& entry) const {
const auto pos = file.position();
serialization::writeString(file, entry.title);
serialization::writeString(file, entry.href);
serialization::writeString(file, entry.anchor);
serialization::writePod(file, entry.level);
serialization::writePod(file, entry.spineIndex);
return pos;
}
// Note: for the LUT to be accurate, this **MUST** be called for all spine items before `addTocEntry` is ever called
// this is because in this function we're marking positions of the items
void BookMetadataCache::createSpineEntry(const std::string& href) {
if (!buildMode || !spineFile) {
Serial.printf("[%lu] [BMC] createSpineEntry called but not in build mode\n", millis());
return;
}
const SpineEntry entry(href, 0, -1);
writeSpineEntry(spineFile, entry);
spineCount++;
}
void BookMetadataCache::createTocEntry(const std::string& title, const std::string& href, const std::string& anchor,
const uint8_t level) {
if (!buildMode || !tocFile || !spineFile) {
Serial.printf("[%lu] [BMC] createTocEntry called but not in build mode\n", millis());
return;
}
int spineIndex = -1;
// find spine index
spineFile.seek(0);
for (int i = 0; i < spineCount; i++) {
auto spineEntry = readSpineEntry(spineFile);
if (spineEntry.href == href) {
spineIndex = i;
break;
}
}
if (spineIndex == -1) {
Serial.printf("[%lu] [BMC] addTocEntry: Could not find spine item for TOC href %s\n", millis(), href.c_str());
}
const TocEntry entry(title, href, anchor, level, spineIndex);
writeTocEntry(tocFile, entry);
tocCount++;
}
/* ============= READING / LOADING FUNCTIONS ================ */
bool BookMetadataCache::load() {
if (!FsHelpers::openFileForRead("BMC", cachePath + bookBinFile, bookFile)) {
return false;
}
uint8_t version;
serialization::readPod(bookFile, version);
if (version != BOOK_CACHE_VERSION) {
Serial.printf("[%lu] [BMC] Cache version mismatch: expected %d, got %d\n", millis(), BOOK_CACHE_VERSION, version);
bookFile.close();
return false;
}
serialization::readPod(bookFile, lutOffset);
serialization::readPod(bookFile, spineCount);
serialization::readPod(bookFile, tocCount);
loaded = true;
Serial.printf("[%lu] [BMC] Loaded cache data: %d spine, %d TOC entries\n", millis(), spineCount, tocCount);
return true;
}
BookMetadataCache::SpineEntry BookMetadataCache::getSpineEntry(const int index) {
if (!loaded) {
Serial.printf("[%lu] [BMC] getSpineEntry called but cache not loaded\n", millis());
return {};
}
if (index < 0 || index >= static_cast<int>(spineCount)) {
Serial.printf("[%lu] [BMC] getSpineEntry index %d out of range\n", millis(), index);
return {};
}
// Seek to spine LUT item, read from LUT and get out data
bookFile.seek(lutOffset + sizeof(size_t) * index);
size_t spineEntryPos;
serialization::readPod(bookFile, spineEntryPos);
bookFile.seek(spineEntryPos);
return readSpineEntry(bookFile);
}
BookMetadataCache::TocEntry BookMetadataCache::getTocEntry(const int index) {
if (!loaded) {
Serial.printf("[%lu] [BMC] getTocEntry called but cache not loaded\n", millis());
return {};
}
if (index < 0 || index >= static_cast<int>(tocCount)) {
Serial.printf("[%lu] [BMC] getTocEntry index %d out of range\n", millis(), index);
return {};
}
// Seek to TOC LUT item, read from LUT and get out data
bookFile.seek(lutOffset + sizeof(size_t) * spineCount + sizeof(size_t) * index);
size_t tocEntryPos;
serialization::readPod(bookFile, tocEntryPos);
Serial.printf("[%lu] [BMC] getTocEntry tocEntryPos: %d\n", millis(), tocEntryPos);
bookFile.seek(tocEntryPos);
return readTocEntry(tocFile);
}
BookMetadataCache::SpineEntry BookMetadataCache::readSpineEntry(File& file) const {
SpineEntry entry;
serialization::readString(file, entry.href);
serialization::readPod(file, entry.cumulativeSize);
serialization::readPod(file, entry.tocIndex);
return entry;
}
BookMetadataCache::TocEntry BookMetadataCache::readTocEntry(File& file) const {
TocEntry entry;
serialization::readString(file, entry.title);
serialization::readString(file, entry.href);
serialization::readString(file, entry.anchor);
serialization::readPod(file, entry.level);
serialization::readPod(file, entry.spineIndex);
return entry;
}

View File

@ -4,8 +4,14 @@
#include <string>
class SpineTocCache {
class BookMetadataCache {
public:
struct BookMetadata {
std::string title;
std::string author;
std::string coverItemHref;
};
struct SpineEntry {
std::string href;
size_t cumulativeSize;
@ -34,13 +40,14 @@ class SpineTocCache {
private:
std::string cachePath;
size_t lutOffset;
uint16_t spineCount;
uint16_t tocCount;
bool loaded;
bool buildMode;
File bookFile;
// Temp file handles during build
File metaFile;
File spineFile;
File tocFile;
@ -50,24 +57,31 @@ class SpineTocCache {
TocEntry readTocEntry(File& file) const;
public:
explicit SpineTocCache(std::string cachePath)
: cachePath(std::move(cachePath)), spineCount(0), tocCount(0), loaded(false), buildMode(false) {}
~SpineTocCache() = default;
BookMetadata coreMetadata;
explicit BookMetadataCache(std::string cachePath)
: cachePath(std::move(cachePath)), lutOffset(0), spineCount(0), tocCount(0), loaded(false), buildMode(false) {}
~BookMetadataCache() = default;
// Building phase (stream to disk immediately)
bool beginWrite();
void addSpineEntry(const std::string& href);
void addTocEntry(const std::string& title, const std::string& href, const std::string& anchor, uint8_t level);
bool beginContentOpfPass();
void createSpineEntry(const std::string& href);
bool endContentOpfPass();
bool beginTocPass();
void createTocEntry(const std::string& title, const std::string& href, const std::string& anchor, uint8_t level);
bool endTocPass();
bool endWrite();
bool cleanupTmpFiles() const;
// Post-processing to update mappings and sizes
bool updateMapsAndSizes(const std::string& epubPath);
bool buildBookBin(const std::string& epubPath, const BookMetadata& metadata);
// Reading phase (read mode)
bool load();
SpineEntry getSpineEntry(int index);
TocEntry getTocEntry(int index);
int getSpineCount() const;
int getTocCount() const;
bool isLoaded() const;
int getSpineCount() const { return spineCount; }
int getTocCount() const { return tocCount; }
bool isLoaded() const { return loaded; }
};

View File

@ -16,7 +16,7 @@ bool FsHelpers::openFileForRead(const char* moduleName, const std::string& path,
bool FsHelpers::openFileForWrite(const char* moduleName, const std::string& path, File& file) {
file = SD.open(path.c_str(), FILE_WRITE, true);
if (!file) {
Serial.printf("[%lu] [%s] Failed to open spine file for writing: %s\n", millis(), moduleName, path.c_str());
Serial.printf("[%lu] [%s] Failed to open file for writing: %s\n", millis(), moduleName, path.c_str());
return false;
}
return true;

View File

@ -116,8 +116,7 @@ bool Section::clearCache() const {
bool Section::persistPageDataToSD(const int fontId, const float lineCompression, const int marginTop,
const int marginRight, const int marginBottom, const int marginLeft,
const bool extraParagraphSpacing) {
const auto localPath = epub->getSpineHref(spineIndex);
const auto localPath = epub->getSpineItem(spineIndex).href;
const auto tmpHtmlPath = epub->getCachePath() + "/.tmp_" + std::to_string(spineIndex) + ".html";
File tmpHtml;
if (!FsHelpers::openFileForWrite("SCT", tmpHtmlPath, tmpHtml)) {

View File

@ -1,311 +0,0 @@
#include "SpineTocCache.h"
#include <HardwareSerial.h>
#include <SD.h>
#include <Serialization.h>
#include <ZipFile.h>
#include <vector>
#include "FsHelpers.h"
namespace {
constexpr uint8_t SPINE_TOC_CACHE_VERSION = 1;
constexpr size_t SPINE_TOC_META_HEADER_SIZE = sizeof(SPINE_TOC_CACHE_VERSION) + sizeof(uint16_t) * 2;
constexpr char spineTocMetaBinFile[] = "/spine_toc_meta.bin";
constexpr char spineBinFile[] = "/spine.bin";
constexpr char tocBinFile[] = "/toc.bin";
} // namespace
bool SpineTocCache::beginWrite() {
buildMode = true;
spineCount = 0;
tocCount = 0;
Serial.printf("[%lu] [STC] Beginning write to cache path: %s\n", millis(), cachePath.c_str());
// Open spine file for writing
if (!FsHelpers::openFileForWrite("STC", cachePath + spineBinFile, spineFile)) {
return false;
}
// Open TOC file for writing
if (!FsHelpers::openFileForWrite("STC", cachePath + tocBinFile, tocFile)) {
spineFile.close();
return false;
}
// Open meta file for writing
if (!FsHelpers::openFileForWrite("STC", cachePath + spineTocMetaBinFile, metaFile)) {
spineFile.close();
tocFile.close();
return false;
}
// Write 0s into first slots, LUT is written during `addSpineEntry` and `addTocEntry`, and counts are rewritten at
// the end
serialization::writePod(metaFile, SPINE_TOC_CACHE_VERSION);
serialization::writePod(metaFile, spineCount);
serialization::writePod(metaFile, tocCount);
Serial.printf("[%lu] [STC] Began writing cache files\n", millis());
return true;
}
size_t SpineTocCache::writeSpineEntry(File& file, const SpineEntry& entry) const {
const auto pos = file.position();
serialization::writeString(file, entry.href);
serialization::writePod(file, entry.cumulativeSize);
serialization::writePod(file, entry.tocIndex);
return pos;
}
size_t SpineTocCache::writeTocEntry(File& file, const TocEntry& entry) const {
const auto pos = file.position();
serialization::writeString(file, entry.title);
serialization::writeString(file, entry.href);
serialization::writeString(file, entry.anchor);
serialization::writePod(file, entry.level);
serialization::writePod(file, entry.spineIndex);
return pos;
}
// Note: for the LUT to be accurate, this **MUST** be called for all spine items before `addTocEntry` is ever called
// this is because in this function we're marking positions of the items
void SpineTocCache::addSpineEntry(const std::string& href) {
if (!buildMode || !spineFile || !metaFile) {
Serial.printf("[%lu] [STC] addSpineEntry called but not in build mode\n", millis());
return;
}
const SpineEntry entry(href, 0, -1);
const auto position = writeSpineEntry(spineFile, entry);
serialization::writePod(metaFile, position);
spineCount++;
}
void SpineTocCache::addTocEntry(const std::string& title, const std::string& href, const std::string& anchor,
const uint8_t level) {
if (!buildMode || !tocFile || !metaFile) {
Serial.printf("[%lu] [STC] addTocEntry called but not in build mode\n", millis());
return;
}
const TocEntry entry(title, href, anchor, level, -1);
const auto position = writeTocEntry(tocFile, entry);
serialization::writePod(metaFile, position);
tocCount++;
}
bool SpineTocCache::endWrite() {
if (!buildMode) {
Serial.printf("[%lu] [STC] endWrite called but not in build mode\n", millis());
return false;
}
spineFile.close();
tocFile.close();
// Write correct counts into meta file
metaFile.seek(sizeof(SPINE_TOC_CACHE_VERSION));
serialization::writePod(metaFile, spineCount);
serialization::writePod(metaFile, tocCount);
metaFile.close();
buildMode = false;
Serial.printf("[%lu] [STC] Wrote %d spine, %d TOC entries\n", millis(), spineCount, tocCount);
return true;
}
SpineTocCache::SpineEntry SpineTocCache::readSpineEntry(File& file) const {
SpineEntry entry;
serialization::readString(file, entry.href);
serialization::readPod(file, entry.cumulativeSize);
serialization::readPod(file, entry.tocIndex);
return entry;
}
SpineTocCache::TocEntry SpineTocCache::readTocEntry(File& file) const {
TocEntry entry;
serialization::readString(file, entry.title);
serialization::readString(file, entry.href);
serialization::readString(file, entry.anchor);
serialization::readPod(file, entry.level);
serialization::readPod(file, entry.spineIndex);
return entry;
}
bool SpineTocCache::updateMapsAndSizes(const std::string& epubPath) {
Serial.printf("[%lu] [STC] Computing mappings and sizes for %d spine, %d TOC entries\n", millis(), spineCount,
tocCount);
std::vector<SpineEntry> spineEntries;
spineEntries.reserve(spineCount);
// Load only the spine items, update them in memory while loading one TOC at a time and storing it
{
if (!FsHelpers::openFileForRead("STC", cachePath + spineBinFile, spineFile)) {
return false;
}
for (int i = 0; i < spineCount; i++) {
spineEntries.push_back(readSpineEntry(spineFile));
}
spineFile.close();
}
// Iterate over TOC entries and update them with the spine mapping
// We do this by moving the TOC file and then making a new one parsing through both at the same time
{
SD.rename((cachePath + tocBinFile).c_str(), (cachePath + tocBinFile + ".tmp").c_str());
File tempTocFile;
if (!FsHelpers::openFileForRead("STC", cachePath + tocBinFile + ".tmp", tempTocFile)) {
SD.remove((cachePath + tocBinFile + ".tmp").c_str());
return false;
}
if (!FsHelpers::openFileForWrite("STC", cachePath + tocBinFile, tocFile)) {
tempTocFile.close();
SD.remove((cachePath + tocBinFile + ".tmp").c_str());
return false;
}
for (int i = 0; i < tocCount; i++) {
auto tocEntry = readTocEntry(tempTocFile);
// Find the matching spine entry
for (int j = 0; j < spineCount; j++) {
if (spineEntries[j].href == tocEntry.href) {
tocEntry.spineIndex = static_cast<int16_t>(j);
// Point the spine to the first TOC entry we come across (in the case that there are multiple)
if (spineEntries[j].tocIndex == -1) spineEntries[j].tocIndex = static_cast<int16_t>(i);
break;
}
}
writeTocEntry(tocFile, tocEntry);
}
tocFile.close();
tempTocFile.close();
SD.remove((cachePath + tocBinFile + ".tmp").c_str());
}
// By this point all the spine items in memory should have the right `tocIndex` and the TOC file is complete
// Next, compute cumulative sizes
{
const ZipFile zip("/sd" + epubPath);
size_t cumSize = 0;
for (int i = 0; i < spineCount; i++) {
size_t itemSize = 0;
const std::string path = FsHelpers::normalisePath(spineEntries[i].href);
if (zip.getInflatedFileSize(path.c_str(), &itemSize)) {
cumSize += itemSize;
spineEntries[i].cumulativeSize = cumSize;
} else {
Serial.printf("[%lu] [STC] Warning: Could not get size for spine item: %s\n", millis(), path.c_str());
}
}
Serial.printf("[%lu] [STC] Book size: %lu\n", millis(), cumSize);
}
// Rewrite spine file with updated data
{
if (!FsHelpers::openFileForWrite("STC", cachePath + spineBinFile, spineFile)) {
// metaFile.close();
return false;
}
for (const auto& entry : spineEntries) {
writeSpineEntry(spineFile, entry);
}
spineFile.close();
}
// Clear vectors to free memory
spineEntries.clear();
spineEntries.shrink_to_fit();
Serial.printf("[%lu] [STC] Updated cache with mappings and sizes\n", millis());
return true;
}
// Opens (and leaves open all three files for fast access)
bool SpineTocCache::load() {
// Load metadata
if (!FsHelpers::openFileForRead("STC", cachePath + spineTocMetaBinFile, metaFile)) {
return false;
}
uint8_t version;
serialization::readPod(metaFile, version);
if (version != SPINE_TOC_CACHE_VERSION) {
Serial.printf("[%lu] [STC] Cache version mismatch: expected %d, got %d\n", millis(), SPINE_TOC_CACHE_VERSION,
version);
metaFile.close();
return false;
}
if (!FsHelpers::openFileForRead("STC", cachePath + spineBinFile, spineFile)) {
metaFile.close();
return false;
}
if (!FsHelpers::openFileForRead("STC", cachePath + tocBinFile, tocFile)) {
metaFile.close();
spineFile.close();
return false;
}
serialization::readPod(metaFile, spineCount);
serialization::readPod(metaFile, tocCount);
loaded = true;
Serial.printf("[%lu] [STC] Loaded cache metadata: %d spine, %d TOC entries\n", millis(), spineCount, tocCount);
return true;
}
SpineTocCache::SpineEntry SpineTocCache::getSpineEntry(const int index) {
if (!loaded) {
Serial.printf("[%lu] [STC] getSpineEntry called but cache not loaded\n", millis());
return {};
}
if (index < 0 || index >= static_cast<int>(spineCount)) {
Serial.printf("[%lu] [STC] getSpineEntry index %d out of range\n", millis(), index);
return {};
}
// Seek to spine LUT item, read from LUT and get out data
metaFile.seek(SPINE_TOC_META_HEADER_SIZE + sizeof(size_t) * index);
size_t spineEntryPos;
serialization::readPod(metaFile, spineEntryPos);
spineFile.seek(spineEntryPos);
auto entry = readSpineEntry(spineFile);
return entry;
}
SpineTocCache::TocEntry SpineTocCache::getTocEntry(const int index) {
if (!loaded) {
Serial.printf("[%lu] [STC] getTocEntry called but cache not loaded\n", millis());
return {};
}
if (index < 0 || index >= static_cast<int>(tocCount)) {
Serial.printf("[%lu] [STC] getTocEntry index %d out of range\n", millis(), index);
return {};
}
// Seek to TOC LUT item, read from LUT and get out data
metaFile.seek(SPINE_TOC_META_HEADER_SIZE + sizeof(size_t) * spineCount + sizeof(size_t) * index);
size_t tocEntryPos;
serialization::readPod(metaFile, tocEntryPos);
tocFile.seek(tocEntryPos);
auto entry = readTocEntry(tocFile);
return entry;
}
int SpineTocCache::getSpineCount() const { return spineCount; }
int SpineTocCache::getTocCount() const { return tocCount; }
bool SpineTocCache::isLoaded() const { return loaded; }

View File

@ -1,12 +1,16 @@
#include "ContentOpfParser.h"
#include <FsHelpers.h>
#include <HardwareSerial.h>
#include <Serialization.h>
#include <ZipFile.h>
#include "../BookMetadataCache.h"
namespace {
constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml";
}
constexpr char itemCacheFile[] = "/.items.bin";
} // namespace
bool ContentOpfParser::setup() {
parser = XML_ParserCreate(nullptr);
@ -29,6 +33,12 @@ ContentOpfParser::~ContentOpfParser() {
XML_ParserFree(parser);
parser = nullptr;
}
if (tempItemStore) {
tempItemStore.close();
}
if (SD.exists((cachePath + itemCacheFile).c_str())) {
SD.remove((cachePath + itemCacheFile).c_str());
}
}
size_t ContentOpfParser::write(const uint8_t data) { return write(&data, 1); }
@ -95,13 +105,21 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
if (self->state == IN_PACKAGE && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) {
self->state = IN_MANIFEST;
self->tempItemStore = SD.open("/.crosspoint/.tmp-items.bin", FILE_WRITE, true);
if (!FsHelpers::openFileForWrite("COF", self->cachePath + itemCacheFile, self->tempItemStore)) {
Serial.printf(
"[%lu] [COF] Couldn't open temp items file for writing. This is probably going to be a fatal error.\n",
millis());
}
return;
}
if (self->state == IN_PACKAGE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) {
self->state = IN_SPINE;
self->tempItemStore = SD.open("/.crosspoint/.tmp-items.bin", FILE_READ);
if (!FsHelpers::openFileForRead("COF", self->cachePath + itemCacheFile, self->tempItemStore)) {
Serial.printf(
"[%lu] [COF] Couldn't open temp items file for reading. This is probably going to be a fatal error.\n",
millis());
}
return;
}
@ -138,10 +156,13 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
}
}
// Write items down to SD card
serialization::writeString(self->tempItemStore, itemId);
serialization::writeString(self->tempItemStore, href);
// // Write items down to SD card
// self->items[itemId] = href;
if (itemId == self->coverItemId) {
self->coverItemHref = href;
}
if (mediaType == MEDIA_TYPE_NCX) {
if (self->tocNcxPath.empty()) {
@ -154,7 +175,7 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
return;
}
// NOTE: This relies on spine appearing after item manifest
// NOTE: This relies on spine appearing after item manifest (which is pretty safe as it's part of the EPUB spec)
// Only run the spine parsing if there's a cache to add it to
if (self->cache) {
if (self->state == IN_SPINE && (strcmp(name, "itemref") == 0 || strcmp(name, "opf:itemref") == 0)) {
@ -169,7 +190,7 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
serialization::readString(self->tempItemStore, itemId);
serialization::readString(self->tempItemStore, href);
if (itemId == idref) {
self->cache->addSpineEntry(href);
self->cache->createSpineEntry(href);
break;
}
}

View File

@ -2,9 +2,10 @@
#include <Print.h>
#include "Epub.h"
#include "Epub/SpineTocCache.h"
#include "expat.h"
class BookMetadataCache;
class ContentOpfParser final : public Print {
enum ParserState {
START,
@ -15,12 +16,14 @@ class ContentOpfParser final : public Print {
IN_SPINE,
};
const std::string& cachePath;
const std::string& baseContentPath;
size_t remainingSize;
XML_Parser parser = nullptr;
ParserState state = START;
SpineTocCache* cache;
BookMetadataCache* cache;
File tempItemStore;
std::string coverItemId;
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
static void characterData(void* userData, const XML_Char* s, int len);
@ -29,10 +32,11 @@ class ContentOpfParser final : public Print {
public:
std::string title;
std::string tocNcxPath;
std::string coverItemId;
std::string coverItemHref;
explicit ContentOpfParser(const std::string& baseContentPath, const size_t xmlSize, SpineTocCache* cache)
: baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {}
explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize,
BookMetadataCache* cache)
: cachePath(cachePath), baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {}
~ContentOpfParser() override;
bool setup();

View File

@ -1,8 +1,9 @@
#include "TocNcxParser.h"
#include <Esp.h>
#include <HardwareSerial.h>
#include "../BookMetadataCache.h"
bool TocNcxParser::setup() {
parser = XML_ParserCreate(nullptr);
if (!parser) {
@ -168,7 +169,7 @@ void XMLCALL TocNcxParser::endElement(void* userData, const XML_Char* name) {
}
if (self->cache) {
self->cache->addTocEntry(self->currentLabel, href, anchor, self->currentDepth);
self->cache->createTocEntry(self->currentLabel, href, anchor, self->currentDepth);
}
// Clear them so we don't re-add them if there are weird XML structures

View File

@ -1,10 +1,10 @@
#pragma once
#include <Print.h>
#include <expat.h>
#include <string>
#include "Epub/SpineTocCache.h"
#include "expat.h"
class BookMetadataCache;
class TocNcxParser final : public Print {
enum ParserState { START, IN_NCX, IN_NAV_MAP, IN_NAV_POINT, IN_NAV_LABEL, IN_NAV_LABEL_TEXT, IN_CONTENT };
@ -13,7 +13,7 @@ class TocNcxParser final : public Print {
size_t remainingSize;
XML_Parser parser = nullptr;
ParserState state = START;
SpineTocCache* cache;
BookMetadataCache* cache;
std::string currentLabel;
std::string currentSrc;
@ -24,7 +24,7 @@ class TocNcxParser final : public Print {
static void endElement(void* userData, const XML_Char* name);
public:
explicit TocNcxParser(const std::string& baseContentPath, const size_t xmlSize, SpineTocCache* cache)
explicit TocNcxParser(const std::string& baseContentPath, const size_t xmlSize, BookMetadataCache* cache)
: baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {}
~TocNcxParser() override;

View File

@ -212,7 +212,7 @@ void EpubReaderActivity::renderScreen() {
}
if (!section) {
const auto filepath = epub->getSpineHref(currentSpineIndex);
const auto filepath = epub->getSpineItem(currentSpineIndex).href;
Serial.printf("[%lu] [ERS] Loading file: %s, index: %d\n", millis(), filepath.c_str(), currentSpineIndex);
section = std::unique_ptr<Section>(new Section(epub, currentSpineIndex, renderer));
if (!section->loadCacheMetadata(READER_FONT_ID, lineCompression, marginTop, marginRight, marginBottom, marginLeft,