diff --git a/README.md b/README.md index f015f71..d59df83 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ This project is **not affiliated with Xteink**; it's built as a community projec ## Features & Usage -- [x] EPUB parsing and rendering +- [x] EPUB parsing and rendering (EPUB 2 and EPUB 3) - [ ] Image support within EPUB - [x] Saved reading position - [x] File explorer with file picker diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index fde2e16..234344d 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -8,6 +8,7 @@ #include "Epub/parsers/ContainerParser.h" #include "Epub/parsers/ContentOpfParser.h" +#include "Epub/parsers/TocNavParser.h" #include "Epub/parsers/TocNcxParser.h" bool Epub::findContentOpfFile(std::string* contentOpfFile) const { @@ -80,6 +81,10 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) { tocNcxItem = opfParser.tocNcxPath; } + if (!opfParser.tocNavPath.empty()) { + tocNavItem = opfParser.tocNavPath; + } + Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis()); return true; } @@ -141,6 +146,60 @@ bool Epub::parseTocNcxFile() const { return true; } +bool Epub::parseTocNavFile() const { + // the nav file should have been specified in the content.opf file (EPUB 3) + if (tocNavItem.empty()) { + Serial.printf("[%lu] [EBP] No nav file specified\n", millis()); + return false; + } + + Serial.printf("[%lu] [EBP] Parsing toc nav file: %s\n", millis(), tocNavItem.c_str()); + + const auto tmpNavPath = getCachePath() + "/toc.nav"; + FsFile tempNavFile; + if (!SdMan.openFileForWrite("EBP", tmpNavPath, tempNavFile)) { + return false; + } + readItemContentsToStream(tocNavItem, tempNavFile, 1024); + tempNavFile.close(); + if (!SdMan.openFileForRead("EBP", tmpNavPath, tempNavFile)) { + return false; + } + const auto navSize = tempNavFile.size(); + + TocNavParser navParser(contentBasePath, navSize, bookMetadataCache.get()); + + if (!navParser.setup()) { + Serial.printf("[%lu] [EBP] Could not setup toc nav parser\n", millis()); + return false; + } + + const auto navBuffer = static_cast(malloc(1024)); + if (!navBuffer) { + Serial.printf("[%lu] [EBP] Could not allocate memory for toc nav parser\n", millis()); + return false; + } + + while (tempNavFile.available()) { + const auto readSize = tempNavFile.read(navBuffer, 1024); + const auto processedSize = navParser.write(navBuffer, readSize); + + if (processedSize != readSize) { + Serial.printf("[%lu] [EBP] Could not process all toc nav data\n", millis()); + free(navBuffer); + tempNavFile.close(); + return false; + } + } + + free(navBuffer); + tempNavFile.close(); + SdMan.remove(tmpNavPath.c_str()); + + Serial.printf("[%lu] [EBP] Parsed TOC nav items\n", millis()); + return true; +} + // load in the meta data for the epub file bool Epub::load(const bool buildIfMissing) { Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); @@ -184,15 +243,31 @@ bool Epub::load(const bool buildIfMissing) { return false; } - // TOC Pass + // TOC Pass - try EPUB 3 nav first, fall back to NCX if (!bookMetadataCache->beginTocPass()) { Serial.printf("[%lu] [EBP] Could not begin writing toc pass\n", millis()); return false; } - if (!parseTocNcxFile()) { - Serial.printf("[%lu] [EBP] Could not parse toc\n", millis()); - return false; + + bool tocParsed = false; + + // Try EPUB 3 nav document first (preferred) + if (!tocNavItem.empty()) { + Serial.printf("[%lu] [EBP] Attempting to parse EPUB 3 nav document\n", millis()); + tocParsed = parseTocNavFile(); } + + // Fall back to NCX if nav parsing failed or wasn't available + if (!tocParsed && !tocNcxItem.empty()) { + Serial.printf("[%lu] [EBP] Falling back to NCX TOC\n", millis()); + tocParsed = parseTocNcxFile(); + } + + if (!tocParsed) { + Serial.printf("[%lu] [EBP] Warning: Could not parse any TOC format\n", millis()); + // Continue anyway - book will work without TOC + } + if (!bookMetadataCache->endTocPass()) { Serial.printf("[%lu] [EBP] Could not end writing toc pass\n", millis()); return false; diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 1b82462..a6555e7 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -12,8 +12,10 @@ class ZipFile; class Epub { - // the ncx file + // the ncx file (EPUB 2) std::string tocNcxItem; + // the nav file (EPUB 3) + std::string tocNavItem; // where is the EPUBfile? std::string filepath; // the base path for items in the EPUB file @@ -26,6 +28,7 @@ class Epub { bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseTocNcxFile() const; + bool parseTocNavFile() const; public: explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.cpp b/lib/Epub/Epub/parsers/ContentOpfParser.cpp index c939877..2c90d01 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.cpp +++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp @@ -161,6 +161,7 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name std::string itemId; std::string href; std::string mediaType; + std::string properties; for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "id") == 0) { @@ -169,6 +170,8 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name href = self->baseContentPath + atts[i + 1]; } else if (strcmp(atts[i], "media-type") == 0) { mediaType = atts[i + 1]; + } else if (strcmp(atts[i], "properties") == 0) { + properties = atts[i + 1]; } } @@ -188,6 +191,15 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name href.c_str()); } } + + // EPUB 3: Check for nav document (properties contains "nav") + if (!properties.empty() && self->tocNavPath.empty()) { + // Properties is space-separated, check if "nav" is present as a word + if (properties == "nav" || properties.find("nav ") == 0 || properties.find(" nav") != std::string::npos) { + self->tocNavPath = href; + Serial.printf("[%lu] [COF] Found EPUB 3 nav document: %s\n", millis(), href.c_str()); + } + } return; } diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.h b/lib/Epub/Epub/parsers/ContentOpfParser.h index 245fca3..1940aaa 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.h +++ b/lib/Epub/Epub/parsers/ContentOpfParser.h @@ -35,6 +35,7 @@ class ContentOpfParser final : public Print { std::string title; std::string author; std::string tocNcxPath; + std::string tocNavPath; // EPUB 3 nav document path std::string coverItemHref; std::string textReferenceHref; diff --git a/lib/Epub/Epub/parsers/TocNavParser.cpp b/lib/Epub/Epub/parsers/TocNavParser.cpp new file mode 100644 index 0000000..b8a4e7f --- /dev/null +++ b/lib/Epub/Epub/parsers/TocNavParser.cpp @@ -0,0 +1,184 @@ +#include "TocNavParser.h" + +#include + +#include "../BookMetadataCache.h" + +bool TocNavParser::setup() { + parser = XML_ParserCreate(nullptr); + if (!parser) { + Serial.printf("[%lu] [NAV] Couldn't allocate memory for parser\n", millis()); + return false; + } + + XML_SetUserData(parser, this); + XML_SetElementHandler(parser, startElement, endElement); + XML_SetCharacterDataHandler(parser, characterData); + return true; +} + +TocNavParser::~TocNavParser() { + if (parser) { + XML_StopParser(parser, XML_FALSE); + XML_SetElementHandler(parser, nullptr, nullptr); + XML_SetCharacterDataHandler(parser, nullptr); + XML_ParserFree(parser); + parser = nullptr; + } +} + +size_t TocNavParser::write(const uint8_t data) { return write(&data, 1); } + +size_t TocNavParser::write(const uint8_t* buffer, const size_t size) { + if (!parser) return 0; + + const uint8_t* currentBufferPos = buffer; + auto remainingInBuffer = size; + + while (remainingInBuffer > 0) { + void* const buf = XML_GetBuffer(parser, 1024); + if (!buf) { + Serial.printf("[%lu] [NAV] Couldn't allocate memory for buffer\n", millis()); + XML_StopParser(parser, XML_FALSE); + XML_SetElementHandler(parser, nullptr, nullptr); + XML_SetCharacterDataHandler(parser, nullptr); + XML_ParserFree(parser); + parser = nullptr; + return 0; + } + + const auto toRead = remainingInBuffer < 1024 ? remainingInBuffer : 1024; + memcpy(buf, currentBufferPos, toRead); + + if (XML_ParseBuffer(parser, static_cast(toRead), remainingSize == toRead) == XML_STATUS_ERROR) { + Serial.printf("[%lu] [NAV] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser), + XML_ErrorString(XML_GetErrorCode(parser))); + XML_StopParser(parser, XML_FALSE); + XML_SetElementHandler(parser, nullptr, nullptr); + XML_SetCharacterDataHandler(parser, nullptr); + XML_ParserFree(parser); + parser = nullptr; + return 0; + } + + currentBufferPos += toRead; + remainingInBuffer -= toRead; + remainingSize -= toRead; + } + return size; +} + +void XMLCALL TocNavParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { + auto* self = static_cast(userData); + + // Track HTML structure loosely - we mainly care about finding