New book.bin spine and table of contents cache (#104)
## Summary * Use single unified cache file for book spine, table of contents, and core metadata (title, author, cover image) * Use new temp item store file in OPF parsing to store items to be rescaned when parsing spine * This avoids us holding these items in memory * Use new toc.bin.tmp and spine.bin.tmp to build out partial toc / spine data as part of parsing content.opf and the NCX file * These files are re-read multiple times to ultimately build book.bin ## Additional Context * Spec for file format included below as an image * This should help with: * #10 * #60 * #99
This commit is contained in:
@@ -1,11 +1,16 @@
|
||||
#include "ContentOpfParser.h"
|
||||
|
||||
#include <FsHelpers.h>
|
||||
#include <HardwareSerial.h>
|
||||
#include <Serialization.h>
|
||||
#include <ZipFile.h>
|
||||
|
||||
#include "../BookMetadataCache.h"
|
||||
|
||||
namespace {
|
||||
constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml";
|
||||
}
|
||||
constexpr char itemCacheFile[] = "/.items.bin";
|
||||
} // namespace
|
||||
|
||||
bool ContentOpfParser::setup() {
|
||||
parser = XML_ParserCreate(nullptr);
|
||||
@@ -28,6 +33,12 @@ ContentOpfParser::~ContentOpfParser() {
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
}
|
||||
if (tempItemStore) {
|
||||
tempItemStore.close();
|
||||
}
|
||||
if (SD.exists((cachePath + itemCacheFile).c_str())) {
|
||||
SD.remove((cachePath + itemCacheFile).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
size_t ContentOpfParser::write(const uint8_t data) { return write(&data, 1); }
|
||||
@@ -94,11 +105,21 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
|
||||
|
||||
if (self->state == IN_PACKAGE && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) {
|
||||
self->state = IN_MANIFEST;
|
||||
if (!FsHelpers::openFileForWrite("COF", self->cachePath + itemCacheFile, self->tempItemStore)) {
|
||||
Serial.printf(
|
||||
"[%lu] [COF] Couldn't open temp items file for writing. This is probably going to be a fatal error.\n",
|
||||
millis());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (self->state == IN_PACKAGE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) {
|
||||
self->state = IN_SPINE;
|
||||
if (!FsHelpers::openFileForRead("COF", self->cachePath + itemCacheFile, self->tempItemStore)) {
|
||||
Serial.printf(
|
||||
"[%lu] [COF] Couldn't open temp items file for reading. This is probably going to be a fatal error.\n",
|
||||
millis());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -135,7 +156,13 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
|
||||
}
|
||||
}
|
||||
|
||||
self->items[itemId] = href;
|
||||
// Write items down to SD card
|
||||
serialization::writeString(self->tempItemStore, itemId);
|
||||
serialization::writeString(self->tempItemStore, href);
|
||||
|
||||
if (itemId == self->coverItemId) {
|
||||
self->coverItemHref = href;
|
||||
}
|
||||
|
||||
if (mediaType == MEDIA_TYPE_NCX) {
|
||||
if (self->tocNcxPath.empty()) {
|
||||
@@ -148,14 +175,29 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
|
||||
return;
|
||||
}
|
||||
|
||||
if (self->state == IN_SPINE && (strcmp(name, "itemref") == 0 || strcmp(name, "opf:itemref") == 0)) {
|
||||
for (int i = 0; atts[i]; i += 2) {
|
||||
if (strcmp(atts[i], "idref") == 0) {
|
||||
self->spineRefs.emplace_back(atts[i + 1]);
|
||||
break;
|
||||
// NOTE: This relies on spine appearing after item manifest (which is pretty safe as it's part of the EPUB spec)
|
||||
// Only run the spine parsing if there's a cache to add it to
|
||||
if (self->cache) {
|
||||
if (self->state == IN_SPINE && (strcmp(name, "itemref") == 0 || strcmp(name, "opf:itemref") == 0)) {
|
||||
for (int i = 0; atts[i]; i += 2) {
|
||||
if (strcmp(atts[i], "idref") == 0) {
|
||||
const std::string idref = atts[i + 1];
|
||||
// Resolve the idref to href using items map
|
||||
self->tempItemStore.seek(0);
|
||||
std::string itemId;
|
||||
std::string href;
|
||||
while (self->tempItemStore.available()) {
|
||||
serialization::readString(self->tempItemStore, itemId);
|
||||
serialization::readString(self->tempItemStore, href);
|
||||
if (itemId == idref) {
|
||||
self->cache->createSpineEntry(href);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,11 +216,13 @@ void XMLCALL ContentOpfParser::endElement(void* userData, const XML_Char* name)
|
||||
|
||||
if (self->state == IN_SPINE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) {
|
||||
self->state = IN_PACKAGE;
|
||||
self->tempItemStore.close();
|
||||
return;
|
||||
}
|
||||
|
||||
if (self->state == IN_MANIFEST && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) {
|
||||
self->state = IN_PACKAGE;
|
||||
self->tempItemStore.close();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
#pragma once
|
||||
#include <Print.h>
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "Epub.h"
|
||||
#include "expat.h"
|
||||
|
||||
class BookMetadataCache;
|
||||
|
||||
class ContentOpfParser final : public Print {
|
||||
enum ParserState {
|
||||
START,
|
||||
@@ -16,10 +16,14 @@ class ContentOpfParser final : public Print {
|
||||
IN_SPINE,
|
||||
};
|
||||
|
||||
const std::string& cachePath;
|
||||
const std::string& baseContentPath;
|
||||
size_t remainingSize;
|
||||
XML_Parser parser = nullptr;
|
||||
ParserState state = START;
|
||||
BookMetadataCache* cache;
|
||||
File tempItemStore;
|
||||
std::string coverItemId;
|
||||
|
||||
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
|
||||
static void characterData(void* userData, const XML_Char* s, int len);
|
||||
@@ -28,12 +32,11 @@ class ContentOpfParser final : public Print {
|
||||
public:
|
||||
std::string title;
|
||||
std::string tocNcxPath;
|
||||
std::string coverItemId;
|
||||
std::map<std::string, std::string> items;
|
||||
std::vector<std::string> spineRefs;
|
||||
std::string coverItemHref;
|
||||
|
||||
explicit ContentOpfParser(const std::string& baseContentPath, const size_t xmlSize)
|
||||
: baseContentPath(baseContentPath), remainingSize(xmlSize) {}
|
||||
explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize,
|
||||
BookMetadataCache* cache)
|
||||
: cachePath(cachePath), baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {}
|
||||
~ContentOpfParser() override;
|
||||
|
||||
bool setup();
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
#include "TocNcxParser.h"
|
||||
|
||||
#include <Esp.h>
|
||||
#include <HardwareSerial.h>
|
||||
|
||||
#include "../BookMetadataCache.h"
|
||||
|
||||
bool TocNcxParser::setup() {
|
||||
parser = XML_ParserCreate(nullptr);
|
||||
if (!parser) {
|
||||
@@ -167,8 +168,9 @@ void XMLCALL TocNcxParser::endElement(void* userData, const XML_Char* name) {
|
||||
href = href.substr(0, pos);
|
||||
}
|
||||
|
||||
// Push to vector
|
||||
self->toc.push_back({std::move(self->currentLabel), std::move(href), std::move(anchor), self->currentDepth});
|
||||
if (self->cache) {
|
||||
self->cache->createTocEntry(self->currentLabel, href, anchor, self->currentDepth);
|
||||
}
|
||||
|
||||
// Clear them so we don't re-add them if there are weird XML structures
|
||||
self->currentLabel.clear();
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
#pragma once
|
||||
#include <Print.h>
|
||||
#include <expat.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Epub/EpubTocEntry.h"
|
||||
#include "expat.h"
|
||||
class BookMetadataCache;
|
||||
|
||||
class TocNcxParser final : public Print {
|
||||
enum ParserState { START, IN_NCX, IN_NAV_MAP, IN_NAV_POINT, IN_NAV_LABEL, IN_NAV_LABEL_TEXT, IN_CONTENT };
|
||||
@@ -14,6 +13,7 @@ class TocNcxParser final : public Print {
|
||||
size_t remainingSize;
|
||||
XML_Parser parser = nullptr;
|
||||
ParserState state = START;
|
||||
BookMetadataCache* cache;
|
||||
|
||||
std::string currentLabel;
|
||||
std::string currentSrc;
|
||||
@@ -24,10 +24,8 @@ class TocNcxParser final : public Print {
|
||||
static void endElement(void* userData, const XML_Char* name);
|
||||
|
||||
public:
|
||||
std::vector<EpubTocEntry> toc;
|
||||
|
||||
explicit TocNcxParser(const std::string& baseContentPath, const size_t xmlSize)
|
||||
: baseContentPath(baseContentPath), remainingSize(xmlSize) {}
|
||||
explicit TocNcxParser(const std::string& baseContentPath, const size_t xmlSize, BookMetadataCache* cache)
|
||||
: baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {}
|
||||
~TocNcxParser() override;
|
||||
|
||||
bool setup();
|
||||
|
||||
Reference in New Issue
Block a user