Merge branch 'daveallie:master' into feature/show-img-alt-text

This commit is contained in:
Jonas Diemer
2025-12-30 14:18:31 +01:00
committed by GitHub
16 changed files with 391 additions and 97 deletions

View File

@@ -74,6 +74,7 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) {
bookMetadata.title = opfParser.title;
bookMetadata.author = opfParser.author;
bookMetadata.coverItemHref = opfParser.coverItemHref;
bookMetadata.textReferenceHref = opfParser.textReferenceHref;
if (!opfParser.tocNcxPath.empty()) {
tocNcxItem = opfParser.tocNcxPath;
@@ -108,17 +109,20 @@ bool Epub::parseTocNcxFile() const {
if (!ncxParser.setup()) {
Serial.printf("[%lu] [EBP] Could not setup toc ncx parser\n", millis());
tempNcxFile.close();
return false;
}
const auto ncxBuffer = static_cast<uint8_t*>(malloc(1024));
if (!ncxBuffer) {
Serial.printf("[%lu] [EBP] Could not allocate memory for toc ncx parser\n", millis());
tempNcxFile.close();
return false;
}
while (tempNcxFile.available()) {
const auto readSize = tempNcxFile.read(ncxBuffer, 1024);
if (readSize == 0) break;
const auto processedSize = ncxParser.write(ncxBuffer, readSize);
if (processedSize != readSize) {
@@ -138,7 +142,7 @@ bool Epub::parseTocNcxFile() const {
}
// load in the meta data for the epub file
bool Epub::load() {
bool Epub::load(const bool buildIfMissing) {
Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str());
// Initialize spine/TOC cache
@@ -150,6 +154,11 @@ bool Epub::load() {
return true;
}
// If we didn't load from cache above and we aren't allowed to build, fail now
if (!buildIfMissing) {
return false;
}
// Cache doesn't exist or is invalid, build it
Serial.printf("[%lu] [EBP] Cache not found, building spine/TOC cache\n", millis());
setupCacheDir();
@@ -426,6 +435,35 @@ size_t Epub::getBookSize() const {
return getCumulativeSpineItemSize(getSpineItemsCount() - 1);
}
int Epub::getSpineIndexForTextReference() const {
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
Serial.printf("[%lu] [EBP] getSpineIndexForTextReference called but cache not loaded\n", millis());
return 0;
}
Serial.printf("[%lu] [ERS] Core Metadata: cover(%d)=%s, textReference(%d)=%s\n", millis(),
bookMetadataCache->coreMetadata.coverItemHref.size(),
bookMetadataCache->coreMetadata.coverItemHref.c_str(),
bookMetadataCache->coreMetadata.textReferenceHref.size(),
bookMetadataCache->coreMetadata.textReferenceHref.c_str());
if (bookMetadataCache->coreMetadata.textReferenceHref.empty()) {
// there was no textReference in epub, so we return 0 (the first chapter)
return 0;
}
// loop through spine items to get the correct index matching the text href
for (size_t i = 0; i < getSpineItemsCount(); i++) {
if (getSpineItem(i).href == bookMetadataCache->coreMetadata.textReferenceHref) {
Serial.printf("[%lu] [ERS] Text reference %s found at index %d\n", millis(),
bookMetadataCache->coreMetadata.textReferenceHref.c_str(), i);
return i;
}
}
// This should not happen, as we checked for empty textReferenceHref earlier
Serial.printf("[%lu] [EBP] Section not found for text reference\n", millis());
return 0;
}
// Calculate progress in book
uint8_t Epub::calculateProgress(const int currentSpineIndex, const float currentSpineRead) const {
const size_t bookSize = getBookSize();

View File

@@ -34,7 +34,7 @@ class Epub {
}
~Epub() = default;
std::string& getBasePath() { return contentBasePath; }
bool load();
bool load(bool buildIfMissing = true);
bool clearCache() const;
void setupCacheDir() const;
const std::string& getCachePath() const;
@@ -54,6 +54,7 @@ class Epub {
int getSpineIndexForTocIndex(int tocIndex) const;
int getTocIndexForSpineIndex(int spineIndex) const;
size_t getCumulativeSpineItemSize(int spineIndex) const;
int getSpineIndexForTextReference() const;
size_t getBookSize() const;
uint8_t calculateProgress(int currentSpineIndex, float currentSpineRead) const;

View File

@@ -9,7 +9,7 @@
#include "FsHelpers.h"
namespace {
constexpr uint8_t BOOK_CACHE_VERSION = 2;
constexpr uint8_t BOOK_CACHE_VERSION = 3;
constexpr char bookBinFile[] = "/book.bin";
constexpr char tmpSpineBinFile[] = "/spine.bin.tmp";
constexpr char tmpTocBinFile[] = "/toc.bin.tmp";
@@ -87,8 +87,8 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
constexpr uint32_t headerASize =
sizeof(BOOK_CACHE_VERSION) + /* LUT Offset */ sizeof(uint32_t) + sizeof(spineCount) + sizeof(tocCount);
const uint32_t metadataSize =
metadata.title.size() + metadata.author.size() + metadata.coverItemHref.size() + sizeof(uint32_t) * 3;
const uint32_t metadataSize = metadata.title.size() + metadata.author.size() + metadata.coverItemHref.size() +
metadata.textReferenceHref.size() + sizeof(uint32_t) * 4;
const uint32_t lutSize = sizeof(uint32_t) * spineCount + sizeof(uint32_t) * tocCount;
const uint32_t lutOffset = headerASize + metadataSize;
@@ -101,6 +101,7 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
serialization::writeString(bookFile, metadata.title);
serialization::writeString(bookFile, metadata.author);
serialization::writeString(bookFile, metadata.coverItemHref);
serialization::writeString(bookFile, metadata.textReferenceHref);
// Loop through spine entries, writing LUT positions
spineFile.seek(0);
@@ -289,6 +290,7 @@ bool BookMetadataCache::load() {
serialization::readString(bookFile, coreMetadata.title);
serialization::readString(bookFile, coreMetadata.author);
serialization::readString(bookFile, coreMetadata.coverItemHref);
serialization::readString(bookFile, coreMetadata.textReferenceHref);
loaded = true;
Serial.printf("[%lu] [BMC] Loaded cache data: %d spine, %d TOC entries\n", millis(), spineCount, tocCount);

View File

@@ -10,6 +10,7 @@ class BookMetadataCache {
std::string title;
std::string author;
std::string coverItemHref;
std::string textReferenceHref;
};
struct SpineEntry {

View File

@@ -57,7 +57,6 @@ void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::BLOCK_STYLE style
void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
(void)atts;
// Middle of skip
if (self->skipUntilDepth < self->depth) {
@@ -111,7 +110,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
self->startNewTextBlock(TextBlock::CENTER_ALIGN);
self->boldUntilDepth = min(self->boldUntilDepth, self->depth);
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
} else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) {
if (strcmp(name, "br") == 0) {
self->startNewTextBlock(self->currentTextBlock->getStyle());
@@ -119,9 +118,9 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
self->startNewTextBlock(TextBlock::JUSTIFIED);
}
} else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) {
self->boldUntilDepth = min(self->boldUntilDepth, self->depth);
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
} else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) {
self->italicUntilDepth = min(self->italicUntilDepth, self->depth);
self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth);
}
self->depth += 1;
@@ -180,7 +179,6 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
(void)name;
if (self->partWordBufferIndex > 0) {
// Only flush out part word buffer if we're closing a block tag or are at the top of the HTML file.
@@ -263,9 +261,9 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() {
return false;
}
const size_t len = file.read(static_cast<uint8_t*>(buf), 1024);
const size_t len = file.read(buf, 1024);
if (len == 0) {
if (len == 0 && file.available() > 0) {
Serial.printf("[%lu] [EHP] File read error\n", millis());
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks

View File

@@ -127,6 +127,18 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
return;
}
if (self->state == IN_PACKAGE && (strcmp(name, "guide") == 0 || strcmp(name, "opf:guide") == 0)) {
self->state = IN_GUIDE;
// TODO Remove print
Serial.printf("[%lu] [COF] Entering guide state.\n", millis());
if (!SdMan.openFileForRead("COF", self->cachePath + itemCacheFile, self->tempItemStore)) {
Serial.printf(
"[%lu] [COF] Couldn't open temp items file for reading. This is probably going to be a fatal error.\n",
millis());
}
return;
}
if (self->state == IN_METADATA && (strcmp(name, "meta") == 0 || strcmp(name, "opf:meta") == 0)) {
bool isCover = false;
std::string coverItemId;
@@ -205,6 +217,29 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
return;
}
}
// parse the guide
if (self->state == IN_GUIDE && (strcmp(name, "reference") == 0 || strcmp(name, "opf:reference") == 0)) {
std::string type;
std::string textHref;
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "type") == 0) {
type = atts[i + 1];
if (type == "text" || type == "start") {
continue;
} else {
Serial.printf("[%lu] [COF] Skipping non-text reference in guide: %s\n", millis(), type.c_str());
break;
}
} else if (strcmp(atts[i], "href") == 0) {
textHref = self->baseContentPath + atts[i + 1];
}
}
if ((type == "text" || (type == "start" && !self->textReferenceHref.empty())) && (textHref.length() > 0)) {
Serial.printf("[%lu] [COF] Found %s reference in guide: %s.\n", millis(), type.c_str(), textHref.c_str());
self->textReferenceHref = textHref;
}
return;
}
}
void XMLCALL ContentOpfParser::characterData(void* userData, const XML_Char* s, const int len) {
@@ -231,6 +266,12 @@ void XMLCALL ContentOpfParser::endElement(void* userData, const XML_Char* name)
return;
}
if (self->state == IN_GUIDE && (strcmp(name, "guide") == 0 || strcmp(name, "opf:guide") == 0)) {
self->state = IN_PACKAGE;
self->tempItemStore.close();
return;
}
if (self->state == IN_MANIFEST && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) {
self->state = IN_PACKAGE;
self->tempItemStore.close();

View File

@@ -15,6 +15,7 @@ class ContentOpfParser final : public Print {
IN_BOOK_AUTHOR,
IN_MANIFEST,
IN_SPINE,
IN_GUIDE,
};
const std::string& cachePath;
@@ -35,6 +36,7 @@ class ContentOpfParser final : public Print {
std::string author;
std::string tocNcxPath;
std::string coverItemHref;
std::string textReferenceHref;
explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize,
BookMetadataCache* cache)