This commit is contained in:
Dave Allie 2025-12-22 20:41:09 +11:00
parent a325f12656
commit dc3869ac1c
No known key found for this signature in database
GPG Key ID: F2FDDB3AD8D0276F
8 changed files with 134 additions and 200 deletions

View File

@ -44,7 +44,15 @@ bool Epub::findContentOpfFile(std::string* contentOpfFile) const {
return true;
}
bool Epub::parseContentOpf(const std::string& contentOpfFilePath) {
bool Epub::parseContentOpf(bool useCache) {
std::string contentOpfFilePath;
if (!findContentOpfFile(&contentOpfFilePath)) {
Serial.printf("[%lu] [EBP] Could not find content.opf in zip\n", millis());
return false;
}
contentBasePath = contentOpfFilePath.substr(0, contentOpfFilePath.find_last_of('/') + 1);
Serial.printf("[%lu] [EBP] Parsing content.opf: %s\n", millis(), contentOpfFilePath.c_str());
size_t contentOpfSize;
@ -53,7 +61,7 @@ bool Epub::parseContentOpf(const std::string& contentOpfFilePath) {
return false;
}
ContentOpfParser opfParser(getBasePath(), contentOpfSize, spineTocCache.get());
ContentOpfParser opfParser(getBasePath(), contentOpfSize, useCache ? spineTocCache.get() : nullptr);
if (!opfParser.setup()) {
Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis());
@ -140,76 +148,32 @@ bool Epub::load() {
Serial.printf("[%lu] [EBP] Loaded spine/TOC from cache\n", millis());
// Still need to parse content.opf for title and cover
// TODO: Should this data go in the cache?
std::string contentOpfFilePath;
if (!findContentOpfFile(&contentOpfFilePath)) {
Serial.printf("[%lu] [EBP] Could not find content.opf in zip\n", millis());
if (!parseContentOpf(false)) {
Serial.printf("[%lu] [EBP] Could not parse content.opf\n", millis());
return false;
}
contentBasePath = contentOpfFilePath.substr(0, contentOpfFilePath.find_last_of('/') + 1);
// Parse content.opf but without cache (we already have it)
size_t contentOpfSize;
if (!getItemSize(contentOpfFilePath, &contentOpfSize)) {
Serial.printf("[%lu] [EBP] Could not get size of content.opf\n", millis());
return false;
}
ContentOpfParser opfParser(getBasePath(), contentOpfSize, nullptr);
if (!opfParser.setup()) {
Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis());
return false;
}
if (!readItemContentsToStream(contentOpfFilePath, opfParser, 1024)) {
Serial.printf("[%lu] [EBP] Could not read content.opf\n", millis());
return false;
}
title = opfParser.title;
if (!opfParser.coverItemId.empty() && opfParser.items.count(opfParser.coverItemId) > 0) {
coverImageItem = opfParser.items.at(opfParser.coverItemId);
}
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
return true;
}
// Cache doesn't exist or is invalid, build it
Serial.printf("[%lu] [EBP] Cache not found, building spine/TOC cache\n", millis());
std::string contentOpfFilePath;
if (!findContentOpfFile(&contentOpfFilePath)) {
Serial.printf("[%lu] [EBP] Could not find content.opf in zip\n", millis());
return false;
}
Serial.printf("[%lu] [EBP] Found content.opf at: %s\n", millis(), contentOpfFilePath.c_str());
contentBasePath = contentOpfFilePath.substr(0, contentOpfFilePath.find_last_of('/') + 1);
// Ensure cache directory exists
setupCacheDir();
Serial.printf("[%lu] [EBP] Cache path: %s\n", millis(), cachePath.c_str());
// Begin building cache - stream entries to disk immediately
if (!spineTocCache->beginWrite()) {
Serial.printf("[%lu] [EBP] Could not begin writing cache\n", millis());
return false;
}
if (!parseContentOpf(contentOpfFilePath)) {
if (!parseContentOpf(true)) {
Serial.printf("[%lu] [EBP] Could not parse content.opf\n", millis());
return false;
}
if (!parseTocNcxFile()) {
Serial.printf("[%lu] [EBP] Could not parse toc\n", millis());
return false;
}
// Close the cache files
if (!spineTocCache->endWrite()) {
Serial.printf("[%lu] [EBP] Could not end writing cache\n", millis());
@ -230,7 +194,6 @@ bool Epub::load() {
}
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
return true;
}
@ -309,45 +272,9 @@ bool Epub::generateCoverBmp() const {
return false;
}
std::string normalisePath(const std::string& path) {
std::vector<std::string> components;
std::string component;
for (const auto c : path) {
if (c == '/') {
if (!component.empty()) {
if (component == "..") {
if (!components.empty()) {
components.pop_back();
}
} else {
components.push_back(component);
}
component.clear();
}
} else {
component += c;
}
}
if (!component.empty()) {
components.push_back(component);
}
std::string result;
for (const auto& c : components) {
if (!result.empty()) {
result += "/";
}
result += c;
}
return result;
}
uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, const bool trailingNullByte) const {
const ZipFile zip("/sd" + filepath);
const std::string path = normalisePath(itemHref);
const std::string path = FsHelpers::normalisePath(itemHref);
const auto content = zip.readFileToMemory(path.c_str(), size, trailingNullByte);
if (!content) {
@ -360,7 +287,7 @@ uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size
bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, const size_t chunkSize) const {
const ZipFile zip("/sd" + filepath);
const std::string path = normalisePath(itemHref);
const std::string path = FsHelpers::normalisePath(itemHref);
return zip.readFileToStream(path.c_str(), out, chunkSize);
}
@ -371,7 +298,7 @@ bool Epub::getItemSize(const std::string& itemHref, size_t* size) const {
}
bool Epub::getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size) {
const std::string path = normalisePath(itemHref);
const std::string path = FsHelpers::normalisePath(itemHref);
return zip.getInflatedFileSize(path.c_str(), size);
}
@ -387,10 +314,12 @@ size_t Epub::getCumulativeSpineItemSize(const int spineIndex) const {
Serial.printf("[%lu] [EBP] getCumulativeSpineItemSize called but cache not loaded\n", millis());
return 0;
}
if (spineIndex < 0 || spineIndex >= spineTocCache->getSpineCount()) {
Serial.printf("[%lu] [EBP] getCumulativeSpineItemSize index:%d is out of range\n", millis(), spineIndex);
return 0;
}
return spineTocCache->getSpineEntry(spineIndex).cumulativeSize;
}
@ -399,6 +328,7 @@ std::string Epub::getSpineHref(const int spineIndex) const {
Serial.printf("[%lu] [EBP] getSpineItem called but cache not loaded\n", millis());
return "";
}
if (spineIndex < 0 || spineIndex >= spineTocCache->getSpineCount()) {
Serial.printf("[%lu] [EBP] getSpineItem index:%d is out of range\n", millis(), spineIndex);
return spineTocCache->getSpineEntry(0).href;
@ -425,6 +355,7 @@ int Epub::getTocItemsCount() const {
if (!spineTocCache || !spineTocCache->isLoaded()) {
return 0;
}
return spineTocCache->getTocCount();
}
@ -434,6 +365,7 @@ int Epub::getSpineIndexForTocIndex(const int tocIndex) const {
Serial.printf("[%lu] [EBP] getSpineIndexForTocIndex called but cache not loaded\n", millis());
return 0;
}
if (tocIndex < 0 || tocIndex >= spineTocCache->getTocCount()) {
Serial.printf("[%lu] [EBP] getSpineIndexForTocIndex: tocIndex %d out of range\n", millis(), tocIndex);
return 0;
@ -444,6 +376,7 @@ int Epub::getSpineIndexForTocIndex(const int tocIndex) const {
Serial.printf("[%lu] [EBP] Section not found for TOC index %d\n", millis(), tocIndex);
return 0;
}
return spineIndex;
}

View File

@ -27,7 +27,7 @@ class Epub {
std::unique_ptr<SpineTocCache> spineTocCache;
bool findContentOpfFile(std::string* contentOpfFile) const;
bool parseContentOpf(const std::string& contentOpfFilePath);
bool parseContentOpf(bool useCache);
bool parseTocNcxFile() const;
static bool getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size);

View File

@ -1,6 +1,7 @@
#include "FsHelpers.h"
#include <SD.h>
#include <vector>
bool FsHelpers::removeDir(const char* path) {
// 1. Open the directory
@ -34,3 +35,39 @@ bool FsHelpers::removeDir(const char* path) {
return SD.rmdir(path);
}
std::string FsHelpers::normalisePath(const std::string& path) {
std::vector<std::string> components;
std::string component;
for (const auto c : path) {
if (c == '/') {
if (!component.empty()) {
if (component == "..") {
if (!components.empty()) {
components.pop_back();
}
} else {
components.push_back(component);
}
component.clear();
}
} else {
component += c;
}
}
if (!component.empty()) {
components.push_back(component);
}
std::string result;
for (const auto& c : components) {
if (!result.empty()) {
result += "/";
}
result += c;
}
return result;
}

View File

@ -1,6 +1,8 @@
#pragma once
#include <string>
class FsHelpers {
public:
static bool removeDir(const char* path);
static std::string normalisePath(const std::string &path);
};

View File

@ -7,45 +7,13 @@
#include <vector>
#include "FsHelpers.h"
namespace {
constexpr uint8_t SPINE_TOC_CACHE_VERSION = 1;
// TODO: Centralize this?
std::string normalisePath(const std::string& path) {
std::vector<std::string> components;
std::string component;
for (const auto c : path) {
if (c == '/') {
if (!component.empty()) {
if (component == "..") {
if (!components.empty()) {
components.pop_back();
}
} else {
components.push_back(component);
}
component.clear();
}
} else {
component += c;
}
}
if (!component.empty()) {
components.push_back(component);
}
std::string result;
for (const auto& c : components) {
if (!result.empty()) {
result += "/";
}
result += c;
}
return result;
}
constexpr char spineTocMetaBinFile[] = "/spine_toc_meta.bin";
constexpr char spineBinFile[] = "/spine.bin";
constexpr char tocBinFile[] = "/toc.bin";
} // namespace
bool SpineTocCache::beginWrite() {
@ -56,7 +24,7 @@ bool SpineTocCache::beginWrite() {
Serial.printf("[%lu] [STC] Beginning write to cache path: %s\n", millis(), cachePath.c_str());
// Open spine file for writing
const std::string spineFilePath = cachePath + "/spine.bin";
const std::string spineFilePath = cachePath + spineBinFile;
Serial.printf("[%lu] [STC] Opening spine file: %s\n", millis(), spineFilePath.c_str());
spineFile = SD.open(spineFilePath.c_str(), FILE_WRITE, true);
if (!spineFile) {
@ -65,7 +33,7 @@ bool SpineTocCache::beginWrite() {
}
// Open TOC file for writing
const std::string tocFilePath = cachePath + "/toc.bin";
const std::string tocFilePath = cachePath + tocBinFile;
Serial.printf("[%lu] [STC] Opening toc file: %s\n", millis(), tocFilePath.c_str());
tocFile = SD.open(tocFilePath.c_str(), FILE_WRITE, true);
if (!tocFile) {
@ -78,24 +46,18 @@ bool SpineTocCache::beginWrite() {
return true;
}
void SpineTocCache::writeString(File& file, const std::string& s) const {
const auto len = static_cast<uint32_t>(s.size());
file.write(reinterpret_cast<const uint8_t*>(&len), sizeof(len));
file.write(reinterpret_cast<const uint8_t*>(s.data()), len);
}
void SpineTocCache::writeSpineEntry(File& file, const SpineEntry& entry) const {
writeString(file, entry.href);
file.write(reinterpret_cast<const uint8_t*>(&entry.cumulativeSize), sizeof(entry.cumulativeSize));
file.write(reinterpret_cast<const uint8_t*>(&entry.tocIndex), sizeof(entry.tocIndex));
serialization::writeString(file, entry.href);
serialization::writePod(file, entry.cumulativeSize);
serialization::writePod(file, entry.tocIndex);
}
void SpineTocCache::writeTocEntry(File& file, const TocEntry& entry) const {
writeString(file, entry.title);
writeString(file, entry.href);
writeString(file, entry.anchor);
file.write(&entry.level, 1);
file.write(reinterpret_cast<const uint8_t*>(&entry.spineIndex), sizeof(entry.spineIndex));
serialization::writeString(file, entry.title);
serialization::writeString(file, entry.href);
serialization::writeString(file, entry.anchor);
serialization::writePod(file, entry.level);
serialization::writePod(file, entry.spineIndex);
}
void SpineTocCache::addSpineEntry(const std::string& href) {
@ -131,15 +93,15 @@ bool SpineTocCache::endWrite() {
tocFile.close();
// Write metadata files with counts
const auto spineMetaPath = cachePath + "/spine_meta.bin";
File metaFile = SD.open(spineMetaPath.c_str(), FILE_WRITE, true);
const auto spineTocMetaPath = cachePath + spineTocMetaBinFile;
File metaFile = SD.open(spineTocMetaPath.c_str(), FILE_WRITE, true);
if (!metaFile) {
Serial.printf("[%lu] [STC] Failed to write spine metadata\n", millis());
return false;
}
metaFile.write(&SPINE_TOC_CACHE_VERSION, 1);
metaFile.write(reinterpret_cast<const uint8_t*>(&spineCount), sizeof(spineCount));
metaFile.write(reinterpret_cast<const uint8_t*>(&tocCount), sizeof(tocCount));
serialization::writePod(metaFile, SPINE_TOC_CACHE_VERSION);
serialization::writePod(metaFile, spineCount);
serialization::writePod(metaFile, tocCount);
metaFile.close();
buildMode = false;
@ -147,28 +109,21 @@ bool SpineTocCache::endWrite() {
return true;
}
void SpineTocCache::readString(std::ifstream& is, std::string& s) const {
uint32_t len;
is.read(reinterpret_cast<char*>(&len), sizeof(len));
s.resize(len);
is.read(&s[0], len);
}
SpineTocCache::SpineEntry SpineTocCache::readSpineEntry(std::ifstream& is) const {
SpineEntry entry;
readString(is, entry.href);
is.read(reinterpret_cast<char*>(&entry.cumulativeSize), sizeof(entry.cumulativeSize));
is.read(reinterpret_cast<char*>(&entry.tocIndex), sizeof(entry.tocIndex));
serialization::readString(is, entry.href);
serialization::readPod(is, entry.cumulativeSize);
serialization::readPod(is, entry.tocIndex);
return entry;
}
SpineTocCache::TocEntry SpineTocCache::readTocEntry(std::ifstream& is) const {
TocEntry entry;
readString(is, entry.title);
readString(is, entry.href);
readString(is, entry.anchor);
is.read(reinterpret_cast<char*>(&entry.level), 1);
is.read(reinterpret_cast<char*>(&entry.spineIndex), sizeof(entry.spineIndex));
serialization::readString(is, entry.title);
serialization::readString(is, entry.href);
serialization::readString(is, entry.anchor);
serialization::readPod(is, entry.level);
serialization::readPod(is, entry.spineIndex);
return entry;
}
@ -186,7 +141,7 @@ bool SpineTocCache::updateMappingsAndSizes(const std::string& epubPath) const {
// Read spine entries
{
const auto spineFilePath = "/sd" + cachePath + "/spine.bin";
const auto spineFilePath = "/sd" + cachePath + spineBinFile;
std::ifstream spineStream(spineFilePath.c_str(), std::ios::binary);
if (!spineStream) {
Serial.printf("[%lu] [STC] Failed to open spine file for reading\n", millis());
@ -201,7 +156,7 @@ bool SpineTocCache::updateMappingsAndSizes(const std::string& epubPath) const {
// Read TOC entries
{
const auto tocFilePath = "/sd" + cachePath + "/toc.bin";
const auto tocFilePath = "/sd" + cachePath + tocBinFile;
std::ifstream tocStream(tocFilePath.c_str(), std::ios::binary);
if (!tocStream) {
Serial.printf("[%lu] [STC] Failed to open toc file for reading\n", millis());
@ -220,7 +175,7 @@ bool SpineTocCache::updateMappingsAndSizes(const std::string& epubPath) const {
for (int i = 0; i < spineCount; i++) {
size_t itemSize = 0;
const std::string path = normalisePath(spineEntries[i].href);
const std::string path = FsHelpers::normalisePath(spineEntries[i].href);
if (zip.getInflatedFileSize(path.c_str(), &itemSize)) {
cumSize += itemSize;
spineEntries[i].cumulativeSize = cumSize;
@ -231,21 +186,12 @@ bool SpineTocCache::updateMappingsAndSizes(const std::string& epubPath) const {
Serial.printf("[%lu] [STC] Book size: %lu\n", millis(), cumSize);
// Compute spine TOC mappings
// Compute spine <-> TOC mappings
for (int i = 0; i < spineCount; i++) {
for (int j = 0; j < tocCount; j++) {
if (tocEntries[j].href == spineEntries[i].href) {
spineEntries[i].tocIndex = static_cast<int16_t>(j);
break;
}
}
}
// Compute TOC → spine mappings
for (int i = 0; i < tocCount; i++) {
for (int j = 0; j < spineCount; j++) {
if (spineEntries[j].href == tocEntries[i].href) {
tocEntries[i].spineIndex = static_cast<int16_t>(j);
tocEntries[j].spineIndex = static_cast<int16_t>(i);
break;
}
}
@ -253,7 +199,7 @@ bool SpineTocCache::updateMappingsAndSizes(const std::string& epubPath) const {
// Rewrite spine file with updated data
{
const auto spineFilePath = cachePath + "/spine.bin";
const auto spineFilePath = cachePath + spineBinFile;
File spineFile = SD.open(spineFilePath.c_str(), FILE_WRITE, true);
if (!spineFile) {
Serial.printf("[%lu] [STC] Failed to reopen spine file for writing\n", millis());
@ -268,7 +214,7 @@ bool SpineTocCache::updateMappingsAndSizes(const std::string& epubPath) const {
// Rewrite TOC file with updated data
{
const auto tocFilePath = cachePath + "/toc.bin";
const auto tocFilePath = cachePath + tocBinFile;
File tocFile = SD.open(tocFilePath.c_str(), FILE_WRITE, true);
if (!tocFile) {
Serial.printf("[%lu] [STC] Failed to reopen toc file for writing\n", millis());
@ -293,20 +239,20 @@ bool SpineTocCache::updateMappingsAndSizes(const std::string& epubPath) const {
bool SpineTocCache::load() {
// Load metadata
const auto metaPath = cachePath + "/spine_meta.bin";
if (!SD.exists(metaPath.c_str())) {
Serial.printf("[%lu] [STC] Cache metadata does not exist: %s\n", millis(), metaPath.c_str());
const auto spineTocMetaPath = cachePath + spineTocMetaBinFile;
if (!SD.exists(spineTocMetaPath.c_str())) {
Serial.printf("[%lu] [STC] Cache metadata does not exist: %s\n", millis(), spineTocMetaPath.c_str());
return false;
}
File metaFile = SD.open(metaPath.c_str(), FILE_READ);
File metaFile = SD.open(spineTocMetaPath.c_str(), FILE_READ);
if (!metaFile) {
Serial.printf("[%lu] [STC] Failed to open cache metadata\n", millis());
return false;
}
uint8_t version;
metaFile.read(&version, 1);
serialization::readPod(metaFile, version);
if (version != SPINE_TOC_CACHE_VERSION) {
Serial.printf("[%lu] [STC] Cache version mismatch: expected %d, got %d\n", millis(), SPINE_TOC_CACHE_VERSION,
version);
@ -314,8 +260,9 @@ bool SpineTocCache::load() {
return false;
}
metaFile.read(reinterpret_cast<uint8_t*>(&spineCount), sizeof(spineCount));
metaFile.read(reinterpret_cast<uint8_t*>(&tocCount), sizeof(tocCount));
serialization::readPod(metaFile, spineCount);
serialization::readPod(metaFile, tocCount);
// TODO: Add LUT to back of meta file
metaFile.close();
loaded = true;
@ -334,7 +281,7 @@ SpineTocCache::SpineEntry SpineTocCache::getSpineEntry(const int index) const {
return SpineEntry();
}
const auto spineFilePath = "/sd" + cachePath + "/spine.bin";
const auto spineFilePath = "/sd" + cachePath + spineBinFile;
std::ifstream spineStream(spineFilePath.c_str(), std::ios::binary);
if (!spineStream) {
Serial.printf("[%lu] [STC] Failed to open spine file for reading entry\n", millis());
@ -363,7 +310,7 @@ SpineTocCache::TocEntry SpineTocCache::getTocEntry(const int index) const {
return TocEntry();
}
const auto tocFilePath = "/sd" + cachePath + "/toc.bin";
const auto tocFilePath = "/sd" + cachePath + tocBinFile;
std::ifstream tocStream(tocFilePath.c_str(), std::ios::binary);
if (!tocStream) {
Serial.printf("[%lu] [STC] Failed to open toc file for reading entry\n", millis());

View File

@ -44,8 +44,6 @@ class SpineTocCache {
File spineFile;
File tocFile;
void writeString(File& file, const std::string& s) const;
void readString(std::ifstream& is, std::string& s) const;
void writeSpineEntry(File& file, const SpineEntry& entry) const;
void writeTocEntry(File& file, const TocEntry& entry) const;
SpineEntry readSpineEntry(std::ifstream& is) const;

View File

@ -149,6 +149,8 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
}
// NOTE: This relies on spine appearing after item manifest
// Only run the spine parsing if there's a cache to add it to
if (self->cache) {
if (self->state == IN_SPINE && (strcmp(name, "itemref") == 0 || strcmp(name, "opf:itemref") == 0)) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "idref") == 0) {
@ -156,15 +158,14 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
// Resolve the idref to href using items map
if (self->items.count(idref) > 0) {
const std::string& href = self->items.at(idref);
if (self->cache) {
self->cache->addSpineEntry(href);
}
}
break;
}
}
return;
}
}
}
void XMLCALL ContentOpfParser::characterData(void* userData, const XML_Char* s, const int len) {

View File

@ -7,17 +7,33 @@ static void writePod(std::ostream& os, const T& value) {
os.write(reinterpret_cast<const char*>(&value), sizeof(T));
}
template <typename T>
static void writePod(File& file, const T& value) {
file.write(reinterpret_cast<const uint8_t*>(&value), sizeof(T));
}
template <typename T>
static void readPod(std::istream& is, T& value) {
is.read(reinterpret_cast<char*>(&value), sizeof(T));
}
template <typename T>
static void readPod(File& file, T& value) {
file.read(reinterpret_cast<uint8_t*>(&value), sizeof(T));
}
static void writeString(std::ostream& os, const std::string& s) {
const uint32_t len = s.size();
writePod(os, len);
os.write(s.data(), len);
}
static void writeString(File& file, const std::string& s) {
const uint32_t len = s.size();
writePod(file, len);
file.write(reinterpret_cast<const uint8_t *>(s.data()), len);
}
static void readString(std::istream& is, std::string& s) {
uint32_t len;
readPod(is, len);