Merge branch 'master' into hyphenation-v2
This commit is contained in:
@@ -1,12 +1,11 @@
|
||||
#include "Epub.h"
|
||||
|
||||
#include <FsHelpers.h>
|
||||
#include <HardwareSerial.h>
|
||||
#include <JpegToBmpConverter.h>
|
||||
#include <SD.h>
|
||||
#include <ZipFile.h>
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "Epub/FsHelpers.h"
|
||||
#include "Epub/parsers/ContainerParser.h"
|
||||
#include "Epub/parsers/ContentOpfParser.h"
|
||||
#include "Epub/parsers/TocNcxParser.h"
|
||||
@@ -30,31 +29,39 @@ bool Epub::findContentOpfFile(std::string* contentOpfFile) const {
|
||||
// Stream read (reusing your existing stream logic)
|
||||
if (!readItemContentsToStream(containerPath, containerParser, 512)) {
|
||||
Serial.printf("[%lu] [EBP] Could not read META-INF/container.xml\n", millis());
|
||||
containerParser.teardown();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Extract the result
|
||||
if (containerParser.fullPath.empty()) {
|
||||
Serial.printf("[%lu] [EBP] Could not find valid rootfile in container.xml\n", millis());
|
||||
containerParser.teardown();
|
||||
return false;
|
||||
}
|
||||
|
||||
*contentOpfFile = std::move(containerParser.fullPath);
|
||||
|
||||
containerParser.teardown();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Epub::parseContentOpf(const std::string& contentOpfFilePath) {
|
||||
bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) {
|
||||
std::string contentOpfFilePath;
|
||||
if (!findContentOpfFile(&contentOpfFilePath)) {
|
||||
Serial.printf("[%lu] [EBP] Could not find content.opf in zip\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
contentBasePath = contentOpfFilePath.substr(0, contentOpfFilePath.find_last_of('/') + 1);
|
||||
|
||||
Serial.printf("[%lu] [EBP] Parsing content.opf: %s\n", millis(), contentOpfFilePath.c_str());
|
||||
|
||||
size_t contentOpfSize;
|
||||
if (!getItemSize(contentOpfFilePath, &contentOpfSize)) {
|
||||
Serial.printf("[%lu] [EBP] Could not get size of content.opf\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
ContentOpfParser opfParser(getBasePath(), contentOpfSize);
|
||||
ContentOpfParser opfParser(getCachePath(), getBasePath(), contentOpfSize, bookMetadataCache.get());
|
||||
Serial.printf("[%lu] [MEM] Free: %d bytes, Total: %d bytes, Min Free: %d bytes\n", millis(), ESP.getFreeHeap(),
|
||||
ESP.getHeapSize(), ESP.getMinFreeHeap());
|
||||
|
||||
if (!opfParser.setup()) {
|
||||
Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis());
|
||||
@@ -63,137 +70,154 @@ bool Epub::parseContentOpf(const std::string& contentOpfFilePath) {
|
||||
|
||||
if (!readItemContentsToStream(contentOpfFilePath, opfParser, 1024)) {
|
||||
Serial.printf("[%lu] [EBP] Could not read content.opf\n", millis());
|
||||
opfParser.teardown();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Grab data from opfParser into epub
|
||||
title = opfParser.title;
|
||||
if (!opfParser.coverItemId.empty() && opfParser.items.count(opfParser.coverItemId) > 0) {
|
||||
coverImageItem = opfParser.items.at(opfParser.coverItemId);
|
||||
}
|
||||
bookMetadata.title = opfParser.title;
|
||||
// TODO: Parse author
|
||||
bookMetadata.author = "";
|
||||
bookMetadata.coverItemHref = opfParser.coverItemHref;
|
||||
|
||||
if (!opfParser.tocNcxPath.empty()) {
|
||||
tocNcxItem = opfParser.tocNcxPath;
|
||||
}
|
||||
|
||||
for (auto& spineRef : opfParser.spineRefs) {
|
||||
if (opfParser.items.count(spineRef)) {
|
||||
spine.emplace_back(spineRef, opfParser.items.at(spineRef));
|
||||
}
|
||||
}
|
||||
|
||||
Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis());
|
||||
|
||||
opfParser.teardown();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Epub::parseTocNcxFile() {
|
||||
bool Epub::parseTocNcxFile() const {
|
||||
// the ncx file should have been specified in the content.opf file
|
||||
if (tocNcxItem.empty()) {
|
||||
Serial.printf("[%lu] [EBP] No ncx file specified\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t tocSize;
|
||||
if (!getItemSize(tocNcxItem, &tocSize)) {
|
||||
Serial.printf("[%lu] [EBP] Could not get size of toc ncx\n", millis());
|
||||
Serial.printf("[%lu] [EBP] Parsing toc ncx file: %s\n", millis(), tocNcxItem.c_str());
|
||||
|
||||
const auto tmpNcxPath = getCachePath() + "/toc.ncx";
|
||||
File tempNcxFile;
|
||||
if (!FsHelpers::openFileForWrite("EBP", tmpNcxPath, tempNcxFile)) {
|
||||
return false;
|
||||
}
|
||||
readItemContentsToStream(tocNcxItem, tempNcxFile, 1024);
|
||||
tempNcxFile.close();
|
||||
if (!FsHelpers::openFileForRead("EBP", tmpNcxPath, tempNcxFile)) {
|
||||
return false;
|
||||
}
|
||||
const auto ncxSize = tempNcxFile.size();
|
||||
|
||||
TocNcxParser ncxParser(contentBasePath, tocSize);
|
||||
TocNcxParser ncxParser(contentBasePath, ncxSize, bookMetadataCache.get());
|
||||
|
||||
if (!ncxParser.setup()) {
|
||||
Serial.printf("[%lu] [EBP] Could not setup toc ncx parser\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!readItemContentsToStream(tocNcxItem, ncxParser, 1024)) {
|
||||
Serial.printf("[%lu] [EBP] Could not read toc ncx stream\n", millis());
|
||||
ncxParser.teardown();
|
||||
const auto ncxBuffer = static_cast<uint8_t*>(malloc(1024));
|
||||
if (!ncxBuffer) {
|
||||
Serial.printf("[%lu] [EBP] Could not allocate memory for toc ncx parser\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
this->toc = std::move(ncxParser.toc);
|
||||
while (tempNcxFile.available()) {
|
||||
const auto readSize = tempNcxFile.read(ncxBuffer, 1024);
|
||||
const auto processedSize = ncxParser.write(ncxBuffer, readSize);
|
||||
|
||||
Serial.printf("[%lu] [EBP] Parsed %d TOC items\n", millis(), this->toc.size());
|
||||
if (processedSize != readSize) {
|
||||
Serial.printf("[%lu] [EBP] Could not process all toc ncx data\n", millis());
|
||||
free(ncxBuffer);
|
||||
tempNcxFile.close();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
ncxParser.teardown();
|
||||
free(ncxBuffer);
|
||||
tempNcxFile.close();
|
||||
SD.remove(tmpNcxPath.c_str());
|
||||
|
||||
Serial.printf("[%lu] [EBP] Parsed TOC items\n", millis());
|
||||
return true;
|
||||
}
|
||||
|
||||
// load in the meta data for the epub file
|
||||
bool Epub::load() {
|
||||
Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str());
|
||||
ZipFile zip("/sd" + filepath);
|
||||
|
||||
std::string contentOpfFilePath;
|
||||
if (!findContentOpfFile(&contentOpfFilePath)) {
|
||||
Serial.printf("[%lu] [EBP] Could not find content.opf in zip\n", millis());
|
||||
// Initialize spine/TOC cache
|
||||
bookMetadataCache.reset(new BookMetadataCache(cachePath));
|
||||
|
||||
// Try to load existing cache first
|
||||
if (bookMetadataCache->load()) {
|
||||
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Cache doesn't exist or is invalid, build it
|
||||
Serial.printf("[%lu] [EBP] Cache not found, building spine/TOC cache\n", millis());
|
||||
setupCacheDir();
|
||||
|
||||
// Begin building cache - stream entries to disk immediately
|
||||
if (!bookMetadataCache->beginWrite()) {
|
||||
Serial.printf("[%lu] [EBP] Could not begin writing cache\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
Serial.printf("[%lu] [EBP] Found content.opf at: %s\n", millis(), contentOpfFilePath.c_str());
|
||||
|
||||
contentBasePath = contentOpfFilePath.substr(0, contentOpfFilePath.find_last_of('/') + 1);
|
||||
|
||||
if (!parseContentOpf(contentOpfFilePath)) {
|
||||
// OPF Pass
|
||||
BookMetadataCache::BookMetadata bookMetadata;
|
||||
if (!bookMetadataCache->beginContentOpfPass()) {
|
||||
Serial.printf("[%lu] [EBP] Could not begin writing content.opf pass\n", millis());
|
||||
return false;
|
||||
}
|
||||
if (!parseContentOpf(bookMetadata)) {
|
||||
Serial.printf("[%lu] [EBP] Could not parse content.opf\n", millis());
|
||||
return false;
|
||||
}
|
||||
if (!bookMetadataCache->endContentOpfPass()) {
|
||||
Serial.printf("[%lu] [EBP] Could not end writing content.opf pass\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
// TOC Pass
|
||||
if (!bookMetadataCache->beginTocPass()) {
|
||||
Serial.printf("[%lu] [EBP] Could not begin writing toc pass\n", millis());
|
||||
return false;
|
||||
}
|
||||
if (!parseTocNcxFile()) {
|
||||
Serial.printf("[%lu] [EBP] Could not parse toc\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
initializeSpineItemSizes();
|
||||
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Epub::initializeSpineItemSizes() {
|
||||
setupCacheDir();
|
||||
|
||||
size_t spineItemsCount = getSpineItemsCount();
|
||||
size_t cumSpineItemSize = 0;
|
||||
if (SD.exists((getCachePath() + "/spine_size.bin").c_str())) {
|
||||
File f = SD.open((getCachePath() + "/spine_size.bin").c_str());
|
||||
uint8_t data[4];
|
||||
for (size_t i = 0; i < spineItemsCount; i++) {
|
||||
f.read(data, 4);
|
||||
cumSpineItemSize = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
|
||||
cumulativeSpineItemSize.emplace_back(cumSpineItemSize);
|
||||
// Serial.printf("[%lu] [EBP] Loading item %d size %u to %u %u\n", millis(),
|
||||
// i, cumSpineItemSize, data[1], data[0]);
|
||||
}
|
||||
f.close();
|
||||
} else {
|
||||
File f = SD.open((getCachePath() + "/spine_size.bin").c_str(), FILE_WRITE);
|
||||
uint8_t data[4];
|
||||
// determine size of spine items
|
||||
for (size_t i = 0; i < spineItemsCount; i++) {
|
||||
std::string spineItem = getSpineItem(i);
|
||||
size_t s = 0;
|
||||
getItemSize(spineItem, &s);
|
||||
cumSpineItemSize += s;
|
||||
cumulativeSpineItemSize.emplace_back(cumSpineItemSize);
|
||||
|
||||
// and persist to cache
|
||||
data[0] = cumSpineItemSize & 0xFF;
|
||||
data[1] = (cumSpineItemSize >> 8) & 0xFF;
|
||||
data[2] = (cumSpineItemSize >> 16) & 0xFF;
|
||||
data[3] = (cumSpineItemSize >> 24) & 0xFF;
|
||||
// Serial.printf("[%lu] [EBP] Persisting item %d size %u to %u %u\n", millis(),
|
||||
// i, cumSpineItemSize, data[1], data[0]);
|
||||
f.write(data, 4);
|
||||
}
|
||||
|
||||
f.close();
|
||||
if (!bookMetadataCache->endTocPass()) {
|
||||
Serial.printf("[%lu] [EBP] Could not end writing toc pass\n", millis());
|
||||
return false;
|
||||
}
|
||||
Serial.printf("[%lu] [EBP] Book size: %lu\n", millis(), cumSpineItemSize);
|
||||
|
||||
// Close the cache files
|
||||
if (!bookMetadataCache->endWrite()) {
|
||||
Serial.printf("[%lu] [EBP] Could not end writing cache\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Build final book.bin
|
||||
if (!bookMetadataCache->buildBookBin(filepath, bookMetadata)) {
|
||||
Serial.printf("[%lu] [EBP] Could not update mappings and sizes\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!bookMetadataCache->cleanupTmpFiles()) {
|
||||
Serial.printf("[%lu] [EBP] Could not cleanup tmp files - ignoring\n", millis());
|
||||
}
|
||||
|
||||
// Reload the cache from disk so it's in the correct state
|
||||
bookMetadataCache.reset(new BookMetadataCache(cachePath));
|
||||
if (!bookMetadataCache->load()) {
|
||||
Serial.printf("[%lu] [EBP] Failed to reload cache after writing\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Epub::clearCache() const {
|
||||
@@ -229,49 +253,76 @@ const std::string& Epub::getCachePath() const { return cachePath; }
|
||||
|
||||
const std::string& Epub::getPath() const { return filepath; }
|
||||
|
||||
const std::string& Epub::getTitle() const { return title; }
|
||||
|
||||
const std::string& Epub::getCoverImageItem() const { return coverImageItem; }
|
||||
|
||||
std::string normalisePath(const std::string& path) {
|
||||
std::vector<std::string> components;
|
||||
std::string component;
|
||||
|
||||
for (const auto c : path) {
|
||||
if (c == '/') {
|
||||
if (!component.empty()) {
|
||||
if (component == "..") {
|
||||
if (!components.empty()) {
|
||||
components.pop_back();
|
||||
}
|
||||
} else {
|
||||
components.push_back(component);
|
||||
}
|
||||
component.clear();
|
||||
}
|
||||
} else {
|
||||
component += c;
|
||||
}
|
||||
const std::string& Epub::getTitle() const {
|
||||
static std::string blank;
|
||||
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
|
||||
return blank;
|
||||
}
|
||||
|
||||
if (!component.empty()) {
|
||||
components.push_back(component);
|
||||
}
|
||||
|
||||
std::string result;
|
||||
for (const auto& c : components) {
|
||||
if (!result.empty()) {
|
||||
result += "/";
|
||||
}
|
||||
result += c;
|
||||
}
|
||||
|
||||
return result;
|
||||
return bookMetadataCache->coreMetadata.title;
|
||||
}
|
||||
|
||||
uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, bool trailingNullByte) const {
|
||||
std::string Epub::getCoverBmpPath() const { return cachePath + "/cover.bmp"; }
|
||||
|
||||
bool Epub::generateCoverBmp() const {
|
||||
// Already generated, return true
|
||||
if (SD.exists(getCoverBmpPath().c_str())) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
|
||||
Serial.printf("[%lu] [EBP] Cannot generate cover BMP, cache not loaded\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto coverImageHref = bookMetadataCache->coreMetadata.coverItemHref;
|
||||
if (coverImageHref.empty()) {
|
||||
Serial.printf("[%lu] [EBP] No known cover image\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (coverImageHref.substr(coverImageHref.length() - 4) == ".jpg" ||
|
||||
coverImageHref.substr(coverImageHref.length() - 5) == ".jpeg") {
|
||||
Serial.printf("[%lu] [EBP] Generating BMP from JPG cover image\n", millis());
|
||||
const auto coverJpgTempPath = getCachePath() + "/.cover.jpg";
|
||||
|
||||
File coverJpg;
|
||||
if (!FsHelpers::openFileForWrite("EBP", coverJpgTempPath, coverJpg)) {
|
||||
return false;
|
||||
}
|
||||
readItemContentsToStream(coverImageHref, coverJpg, 1024);
|
||||
coverJpg.close();
|
||||
|
||||
if (!FsHelpers::openFileForRead("EBP", coverJpgTempPath, coverJpg)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
File coverBmp;
|
||||
if (!FsHelpers::openFileForWrite("EBP", getCoverBmpPath(), coverBmp)) {
|
||||
coverJpg.close();
|
||||
return false;
|
||||
}
|
||||
const bool success = JpegToBmpConverter::jpegFileToBmpStream(coverJpg, coverBmp);
|
||||
coverJpg.close();
|
||||
coverBmp.close();
|
||||
SD.remove(coverJpgTempPath.c_str());
|
||||
|
||||
if (!success) {
|
||||
Serial.printf("[%lu] [EBP] Failed to generate BMP from JPG cover image\n", millis());
|
||||
SD.remove(getCoverBmpPath().c_str());
|
||||
}
|
||||
Serial.printf("[%lu] [EBP] Generated BMP from JPG cover image, success: %s\n", millis(), success ? "yes" : "no");
|
||||
return success;
|
||||
} else {
|
||||
Serial.printf("[%lu] [EBP] Cover image is not a JPG, skipping\n", millis());
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, const bool trailingNullByte) const {
|
||||
const ZipFile zip("/sd" + filepath);
|
||||
const std::string path = normalisePath(itemHref);
|
||||
const std::string path = FsHelpers::normalisePath(itemHref);
|
||||
|
||||
const auto content = zip.readFileToMemory(path.c_str(), size, trailingNullByte);
|
||||
if (!content) {
|
||||
@@ -284,95 +335,104 @@ uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size
|
||||
|
||||
bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, const size_t chunkSize) const {
|
||||
const ZipFile zip("/sd" + filepath);
|
||||
const std::string path = normalisePath(itemHref);
|
||||
const std::string path = FsHelpers::normalisePath(itemHref);
|
||||
|
||||
return zip.readFileToStream(path.c_str(), out, chunkSize);
|
||||
}
|
||||
|
||||
bool Epub::getItemSize(const std::string& itemHref, size_t* size) const {
|
||||
const ZipFile zip("/sd" + filepath);
|
||||
const std::string path = normalisePath(itemHref);
|
||||
return getItemSize(zip, itemHref, size);
|
||||
}
|
||||
|
||||
bool Epub::getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size) {
|
||||
const std::string path = FsHelpers::normalisePath(itemHref);
|
||||
return zip.getInflatedFileSize(path.c_str(), size);
|
||||
}
|
||||
|
||||
int Epub::getSpineItemsCount() const { return spine.size(); }
|
||||
int Epub::getSpineItemsCount() const {
|
||||
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
|
||||
return 0;
|
||||
}
|
||||
return bookMetadataCache->getSpineCount();
|
||||
}
|
||||
|
||||
size_t Epub::getCumulativeSpineItemSize(const int spineIndex) const { return cumulativeSpineItemSize.at(spineIndex); }
|
||||
size_t Epub::getCumulativeSpineItemSize(const int spineIndex) const { return getSpineItem(spineIndex).cumulativeSize; }
|
||||
|
||||
std::string& Epub::getSpineItem(const int spineIndex) {
|
||||
if (spineIndex < 0 || spineIndex >= spine.size()) {
|
||||
BookMetadataCache::SpineEntry Epub::getSpineItem(const int spineIndex) const {
|
||||
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
|
||||
Serial.printf("[%lu] [EBP] getSpineItem called but cache not loaded\n", millis());
|
||||
return {};
|
||||
}
|
||||
|
||||
if (spineIndex < 0 || spineIndex >= bookMetadataCache->getSpineCount()) {
|
||||
Serial.printf("[%lu] [EBP] getSpineItem index:%d is out of range\n", millis(), spineIndex);
|
||||
return spine.at(0).second;
|
||||
return bookMetadataCache->getSpineEntry(0);
|
||||
}
|
||||
|
||||
return spine.at(spineIndex).second;
|
||||
return bookMetadataCache->getSpineEntry(spineIndex);
|
||||
}
|
||||
|
||||
EpubTocEntry& Epub::getTocItem(const int tocTndex) {
|
||||
if (tocTndex < 0 || tocTndex >= toc.size()) {
|
||||
Serial.printf("[%lu] [EBP] getTocItem index:%d is out of range\n", millis(), tocTndex);
|
||||
return toc.at(0);
|
||||
BookMetadataCache::TocEntry Epub::getTocItem(const int tocIndex) const {
|
||||
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
|
||||
Serial.printf("[%lu] [EBP] getTocItem called but cache not loaded\n", millis());
|
||||
return {};
|
||||
}
|
||||
|
||||
return toc.at(tocTndex);
|
||||
if (tocIndex < 0 || tocIndex >= bookMetadataCache->getTocCount()) {
|
||||
Serial.printf("[%lu] [EBP] getTocItem index:%d is out of range\n", millis(), tocIndex);
|
||||
return {};
|
||||
}
|
||||
|
||||
return bookMetadataCache->getTocEntry(tocIndex);
|
||||
}
|
||||
|
||||
int Epub::getTocItemsCount() const { return toc.size(); }
|
||||
int Epub::getTocItemsCount() const {
|
||||
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return bookMetadataCache->getTocCount();
|
||||
}
|
||||
|
||||
// work out the section index for a toc index
|
||||
int Epub::getSpineIndexForTocIndex(const int tocIndex) const {
|
||||
if (tocIndex < 0 || tocIndex >= toc.size()) {
|
||||
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
|
||||
Serial.printf("[%lu] [EBP] getSpineIndexForTocIndex called but cache not loaded\n", millis());
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (tocIndex < 0 || tocIndex >= bookMetadataCache->getTocCount()) {
|
||||
Serial.printf("[%lu] [EBP] getSpineIndexForTocIndex: tocIndex %d out of range\n", millis(), tocIndex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// the toc entry should have an href that matches the spine item
|
||||
// so we can find the spine index by looking for the href
|
||||
for (int i = 0; i < spine.size(); i++) {
|
||||
if (spine[i].second == toc[tocIndex].href) {
|
||||
return i;
|
||||
}
|
||||
const int spineIndex = bookMetadataCache->getTocEntry(tocIndex).spineIndex;
|
||||
if (spineIndex < 0) {
|
||||
Serial.printf("[%lu] [EBP] Section not found for TOC index %d\n", millis(), tocIndex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
Serial.printf("[%lu] [EBP] Section not found\n", millis());
|
||||
// not found - default to the start of the book
|
||||
return 0;
|
||||
return spineIndex;
|
||||
}
|
||||
|
||||
int Epub::getTocIndexForSpineIndex(const int spineIndex) const {
|
||||
if (spineIndex < 0 || spineIndex >= spine.size()) {
|
||||
Serial.printf("[%lu] [EBP] getTocIndexForSpineIndex: spineIndex %d out of range\n", millis(), spineIndex);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// the toc entry should have an href that matches the spine item
|
||||
// so we can find the toc index by looking for the href
|
||||
for (int i = 0; i < toc.size(); i++) {
|
||||
if (toc[i].href == spine[spineIndex].second) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
Serial.printf("[%lu] [EBP] TOC item not found\n", millis());
|
||||
return -1;
|
||||
}
|
||||
int Epub::getTocIndexForSpineIndex(const int spineIndex) const { return getSpineItem(spineIndex).tocIndex; }
|
||||
|
||||
size_t Epub::getBookSize() const {
|
||||
if (spine.empty()) {
|
||||
if (!bookMetadataCache || !bookMetadataCache->isLoaded() || bookMetadataCache->getSpineCount() == 0) {
|
||||
return 0;
|
||||
}
|
||||
return getCumulativeSpineItemSize(getSpineItemsCount() - 1);
|
||||
}
|
||||
|
||||
// Calculate progress in book
|
||||
uint8_t Epub::calculateProgress(const int currentSpineIndex, const float currentSpineRead) {
|
||||
size_t bookSize = getBookSize();
|
||||
uint8_t Epub::calculateProgress(const int currentSpineIndex, const float currentSpineRead) const {
|
||||
const size_t bookSize = getBookSize();
|
||||
if (bookSize == 0) {
|
||||
return 0;
|
||||
}
|
||||
size_t prevChapterSize = (currentSpineIndex >= 1) ? getCumulativeSpineItemSize(currentSpineIndex - 1) : 0;
|
||||
size_t curChapterSize = getCumulativeSpineItemSize(currentSpineIndex) - prevChapterSize;
|
||||
size_t sectionProgSize = currentSpineRead * curChapterSize;
|
||||
const size_t prevChapterSize = (currentSpineIndex >= 1) ? getCumulativeSpineItemSize(currentSpineIndex - 1) : 0;
|
||||
const size_t curChapterSize = getCumulativeSpineItemSize(currentSpineIndex) - prevChapterSize;
|
||||
const size_t sectionProgSize = currentSpineRead * curChapterSize;
|
||||
return round(static_cast<float>(prevChapterSize + sectionProgSize) / bookSize * 100.0);
|
||||
}
|
||||
|
||||
@@ -1,38 +1,30 @@
|
||||
#pragma once
|
||||
#include <Print.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "Epub/EpubTocEntry.h"
|
||||
#include "Epub/BookMetadataCache.h"
|
||||
|
||||
class ZipFile;
|
||||
|
||||
class Epub {
|
||||
// the title read from the EPUB meta data
|
||||
std::string title;
|
||||
// the cover image
|
||||
std::string coverImageItem;
|
||||
// the ncx file
|
||||
std::string tocNcxItem;
|
||||
// where is the EPUBfile?
|
||||
std::string filepath;
|
||||
// the spine of the EPUB file
|
||||
std::vector<std::pair<std::string, std::string>> spine;
|
||||
// the file size of the spine items (proxy to book progress)
|
||||
std::vector<size_t> cumulativeSpineItemSize;
|
||||
// the toc of the EPUB file
|
||||
std::vector<EpubTocEntry> toc;
|
||||
// the base path for items in the EPUB file
|
||||
std::string contentBasePath;
|
||||
// Uniq cache key based on filepath
|
||||
std::string cachePath;
|
||||
// Spine and TOC cache
|
||||
std::unique_ptr<BookMetadataCache> bookMetadataCache;
|
||||
|
||||
bool findContentOpfFile(std::string* contentOpfFile) const;
|
||||
bool parseContentOpf(const std::string& contentOpfFilePath);
|
||||
bool parseTocNcxFile();
|
||||
void initializeSpineItemSizes();
|
||||
bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata);
|
||||
bool parseTocNcxFile() const;
|
||||
static bool getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size);
|
||||
|
||||
public:
|
||||
explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) {
|
||||
@@ -47,19 +39,20 @@ class Epub {
|
||||
const std::string& getCachePath() const;
|
||||
const std::string& getPath() const;
|
||||
const std::string& getTitle() const;
|
||||
const std::string& getCoverImageItem() const;
|
||||
std::string getCoverBmpPath() const;
|
||||
bool generateCoverBmp() const;
|
||||
uint8_t* readItemContentsToBytes(const std::string& itemHref, size_t* size = nullptr,
|
||||
bool trailingNullByte = false) const;
|
||||
bool readItemContentsToStream(const std::string& itemHref, Print& out, size_t chunkSize) const;
|
||||
bool getItemSize(const std::string& itemHref, size_t* size) const;
|
||||
std::string& getSpineItem(int spineIndex);
|
||||
BookMetadataCache::SpineEntry getSpineItem(int spineIndex) const;
|
||||
BookMetadataCache::TocEntry getTocItem(int tocIndex) const;
|
||||
int getSpineItemsCount() const;
|
||||
size_t getCumulativeSpineItemSize(const int spineIndex) const;
|
||||
EpubTocEntry& getTocItem(int tocIndex);
|
||||
int getTocItemsCount() const;
|
||||
int getSpineIndexForTocIndex(int tocIndex) const;
|
||||
int getTocIndexForSpineIndex(int spineIndex) const;
|
||||
size_t getCumulativeSpineItemSize(int spineIndex) const;
|
||||
|
||||
size_t getBookSize() const;
|
||||
uint8_t calculateProgress(const int currentSpineIndex, const float currentSpineRead);
|
||||
uint8_t calculateProgress(const int currentSpineIndex, const float currentSpineRead) const;
|
||||
};
|
||||
|
||||
326
lib/Epub/Epub/BookMetadataCache.cpp
Normal file
326
lib/Epub/Epub/BookMetadataCache.cpp
Normal file
@@ -0,0 +1,326 @@
|
||||
#include "BookMetadataCache.h"
|
||||
|
||||
#include <HardwareSerial.h>
|
||||
#include <SD.h>
|
||||
#include <Serialization.h>
|
||||
#include <ZipFile.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "FsHelpers.h"
|
||||
|
||||
namespace {
|
||||
constexpr uint8_t BOOK_CACHE_VERSION = 1;
|
||||
constexpr char bookBinFile[] = "/book.bin";
|
||||
constexpr char tmpSpineBinFile[] = "/spine.bin.tmp";
|
||||
constexpr char tmpTocBinFile[] = "/toc.bin.tmp";
|
||||
} // namespace
|
||||
|
||||
/* ============= WRITING / BUILDING FUNCTIONS ================ */
|
||||
|
||||
bool BookMetadataCache::beginWrite() {
|
||||
buildMode = true;
|
||||
spineCount = 0;
|
||||
tocCount = 0;
|
||||
Serial.printf("[%lu] [BMC] Entering write mode\n", millis());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BookMetadataCache::beginContentOpfPass() {
|
||||
Serial.printf("[%lu] [BMC] Beginning content opf pass\n", millis());
|
||||
|
||||
// Open spine file for writing
|
||||
return FsHelpers::openFileForWrite("BMC", cachePath + tmpSpineBinFile, spineFile);
|
||||
}
|
||||
|
||||
bool BookMetadataCache::endContentOpfPass() {
|
||||
spineFile.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BookMetadataCache::beginTocPass() {
|
||||
Serial.printf("[%lu] [BMC] Beginning toc pass\n", millis());
|
||||
|
||||
// Open spine file for reading
|
||||
if (!FsHelpers::openFileForRead("BMC", cachePath + tmpSpineBinFile, spineFile)) {
|
||||
return false;
|
||||
}
|
||||
if (!FsHelpers::openFileForWrite("BMC", cachePath + tmpTocBinFile, tocFile)) {
|
||||
spineFile.close();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BookMetadataCache::endTocPass() {
|
||||
tocFile.close();
|
||||
spineFile.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BookMetadataCache::endWrite() {
|
||||
if (!buildMode) {
|
||||
Serial.printf("[%lu] [BMC] endWrite called but not in build mode\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
buildMode = false;
|
||||
Serial.printf("[%lu] [BMC] Wrote %d spine, %d TOC entries\n", millis(), spineCount, tocCount);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMetadata& metadata) {
|
||||
// Open all three files, writing to meta, reading from spine and toc
|
||||
if (!FsHelpers::openFileForWrite("BMC", cachePath + bookBinFile, bookFile)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FsHelpers::openFileForRead("BMC", cachePath + tmpSpineBinFile, spineFile)) {
|
||||
bookFile.close();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FsHelpers::openFileForRead("BMC", cachePath + tmpTocBinFile, tocFile)) {
|
||||
bookFile.close();
|
||||
spineFile.close();
|
||||
return false;
|
||||
}
|
||||
|
||||
constexpr size_t headerASize =
|
||||
sizeof(BOOK_CACHE_VERSION) + /* LUT Offset */ sizeof(size_t) + sizeof(spineCount) + sizeof(tocCount);
|
||||
const size_t metadataSize =
|
||||
metadata.title.size() + metadata.author.size() + metadata.coverItemHref.size() + sizeof(uint32_t) * 3;
|
||||
const size_t lutSize = sizeof(size_t) * spineCount + sizeof(size_t) * tocCount;
|
||||
const size_t lutOffset = headerASize + metadataSize;
|
||||
|
||||
// Header A
|
||||
serialization::writePod(bookFile, BOOK_CACHE_VERSION);
|
||||
serialization::writePod(bookFile, lutOffset);
|
||||
serialization::writePod(bookFile, spineCount);
|
||||
serialization::writePod(bookFile, tocCount);
|
||||
// Metadata
|
||||
serialization::writeString(bookFile, metadata.title);
|
||||
serialization::writeString(bookFile, metadata.author);
|
||||
serialization::writeString(bookFile, metadata.coverItemHref);
|
||||
|
||||
// Loop through spine entries, writing LUT positions
|
||||
spineFile.seek(0);
|
||||
for (int i = 0; i < spineCount; i++) {
|
||||
auto pos = spineFile.position();
|
||||
auto spineEntry = readSpineEntry(spineFile);
|
||||
serialization::writePod(bookFile, pos + lutOffset + lutSize);
|
||||
}
|
||||
|
||||
// Loop through toc entries, writing LUT positions
|
||||
tocFile.seek(0);
|
||||
for (int i = 0; i < tocCount; i++) {
|
||||
auto pos = tocFile.position();
|
||||
auto tocEntry = readTocEntry(tocFile);
|
||||
serialization::writePod(bookFile, pos + lutOffset + lutSize + spineFile.position());
|
||||
}
|
||||
|
||||
// LUTs complete
|
||||
// Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin
|
||||
|
||||
const ZipFile zip("/sd" + epubPath);
|
||||
size_t cumSize = 0;
|
||||
spineFile.seek(0);
|
||||
for (int i = 0; i < spineCount; i++) {
|
||||
auto spineEntry = readSpineEntry(spineFile);
|
||||
|
||||
tocFile.seek(0);
|
||||
for (int j = 0; j < tocCount; j++) {
|
||||
auto tocEntry = readTocEntry(tocFile);
|
||||
if (tocEntry.spineIndex == i) {
|
||||
spineEntry.tocIndex = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Not a huge deal if we don't fine a TOC entry for the spine entry, this is expected behaviour for EPUBs
|
||||
// Logging here is for debugging
|
||||
if (spineEntry.tocIndex == -1) {
|
||||
Serial.printf("[%lu] [BMC] Warning: Could not find TOC entry for spine item %d: %s\n", millis(), i,
|
||||
spineEntry.href.c_str());
|
||||
}
|
||||
|
||||
// Calculate size for cumulative size
|
||||
size_t itemSize = 0;
|
||||
const std::string path = FsHelpers::normalisePath(spineEntry.href);
|
||||
if (zip.getInflatedFileSize(path.c_str(), &itemSize)) {
|
||||
cumSize += itemSize;
|
||||
spineEntry.cumulativeSize = cumSize;
|
||||
} else {
|
||||
Serial.printf("[%lu] [BMC] Warning: Could not get size for spine item: %s\n", millis(), path.c_str());
|
||||
}
|
||||
|
||||
// Write out spine data to book.bin
|
||||
writeSpineEntry(bookFile, spineEntry);
|
||||
}
|
||||
|
||||
// Loop through toc entries from toc file writing to book.bin
|
||||
tocFile.seek(0);
|
||||
for (int i = 0; i < tocCount; i++) {
|
||||
auto tocEntry = readTocEntry(tocFile);
|
||||
writeTocEntry(bookFile, tocEntry);
|
||||
}
|
||||
|
||||
bookFile.close();
|
||||
spineFile.close();
|
||||
tocFile.close();
|
||||
|
||||
Serial.printf("[%lu] [BMC] Successfully built book.bin\n", millis());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BookMetadataCache::cleanupTmpFiles() const {
|
||||
if (SD.exists((cachePath + tmpSpineBinFile).c_str())) {
|
||||
SD.remove((cachePath + tmpSpineBinFile).c_str());
|
||||
}
|
||||
if (SD.exists((cachePath + tmpTocBinFile).c_str())) {
|
||||
SD.remove((cachePath + tmpTocBinFile).c_str());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t BookMetadataCache::writeSpineEntry(File& file, const SpineEntry& entry) const {
|
||||
const auto pos = file.position();
|
||||
serialization::writeString(file, entry.href);
|
||||
serialization::writePod(file, entry.cumulativeSize);
|
||||
serialization::writePod(file, entry.tocIndex);
|
||||
return pos;
|
||||
}
|
||||
|
||||
size_t BookMetadataCache::writeTocEntry(File& file, const TocEntry& entry) const {
|
||||
const auto pos = file.position();
|
||||
serialization::writeString(file, entry.title);
|
||||
serialization::writeString(file, entry.href);
|
||||
serialization::writeString(file, entry.anchor);
|
||||
serialization::writePod(file, entry.level);
|
||||
serialization::writePod(file, entry.spineIndex);
|
||||
return pos;
|
||||
}
|
||||
|
||||
// Note: for the LUT to be accurate, this **MUST** be called for all spine items before `addTocEntry` is ever called
|
||||
// this is because in this function we're marking positions of the items
|
||||
void BookMetadataCache::createSpineEntry(const std::string& href) {
|
||||
if (!buildMode || !spineFile) {
|
||||
Serial.printf("[%lu] [BMC] createSpineEntry called but not in build mode\n", millis());
|
||||
return;
|
||||
}
|
||||
|
||||
const SpineEntry entry(href, 0, -1);
|
||||
writeSpineEntry(spineFile, entry);
|
||||
spineCount++;
|
||||
}
|
||||
|
||||
void BookMetadataCache::createTocEntry(const std::string& title, const std::string& href, const std::string& anchor,
|
||||
const uint8_t level) {
|
||||
if (!buildMode || !tocFile || !spineFile) {
|
||||
Serial.printf("[%lu] [BMC] createTocEntry called but not in build mode\n", millis());
|
||||
return;
|
||||
}
|
||||
|
||||
int spineIndex = -1;
|
||||
// find spine index
|
||||
spineFile.seek(0);
|
||||
for (int i = 0; i < spineCount; i++) {
|
||||
auto spineEntry = readSpineEntry(spineFile);
|
||||
if (spineEntry.href == href) {
|
||||
spineIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (spineIndex == -1) {
|
||||
Serial.printf("[%lu] [BMC] addTocEntry: Could not find spine item for TOC href %s\n", millis(), href.c_str());
|
||||
}
|
||||
|
||||
const TocEntry entry(title, href, anchor, level, spineIndex);
|
||||
writeTocEntry(tocFile, entry);
|
||||
tocCount++;
|
||||
}
|
||||
|
||||
/* ============= READING / LOADING FUNCTIONS ================ */
|
||||
|
||||
bool BookMetadataCache::load() {
|
||||
if (!FsHelpers::openFileForRead("BMC", cachePath + bookBinFile, bookFile)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t version;
|
||||
serialization::readPod(bookFile, version);
|
||||
if (version != BOOK_CACHE_VERSION) {
|
||||
Serial.printf("[%lu] [BMC] Cache version mismatch: expected %d, got %d\n", millis(), BOOK_CACHE_VERSION, version);
|
||||
bookFile.close();
|
||||
return false;
|
||||
}
|
||||
|
||||
serialization::readPod(bookFile, lutOffset);
|
||||
serialization::readPod(bookFile, spineCount);
|
||||
serialization::readPod(bookFile, tocCount);
|
||||
|
||||
serialization::readString(bookFile, coreMetadata.title);
|
||||
serialization::readString(bookFile, coreMetadata.author);
|
||||
serialization::readString(bookFile, coreMetadata.coverItemHref);
|
||||
|
||||
loaded = true;
|
||||
Serial.printf("[%lu] [BMC] Loaded cache data: %d spine, %d TOC entries\n", millis(), spineCount, tocCount);
|
||||
return true;
|
||||
}
|
||||
|
||||
BookMetadataCache::SpineEntry BookMetadataCache::getSpineEntry(const int index) {
|
||||
if (!loaded) {
|
||||
Serial.printf("[%lu] [BMC] getSpineEntry called but cache not loaded\n", millis());
|
||||
return {};
|
||||
}
|
||||
|
||||
if (index < 0 || index >= static_cast<int>(spineCount)) {
|
||||
Serial.printf("[%lu] [BMC] getSpineEntry index %d out of range\n", millis(), index);
|
||||
return {};
|
||||
}
|
||||
|
||||
// Seek to spine LUT item, read from LUT and get out data
|
||||
bookFile.seek(lutOffset + sizeof(size_t) * index);
|
||||
size_t spineEntryPos;
|
||||
serialization::readPod(bookFile, spineEntryPos);
|
||||
bookFile.seek(spineEntryPos);
|
||||
return readSpineEntry(bookFile);
|
||||
}
|
||||
|
||||
BookMetadataCache::TocEntry BookMetadataCache::getTocEntry(const int index) {
|
||||
if (!loaded) {
|
||||
Serial.printf("[%lu] [BMC] getTocEntry called but cache not loaded\n", millis());
|
||||
return {};
|
||||
}
|
||||
|
||||
if (index < 0 || index >= static_cast<int>(tocCount)) {
|
||||
Serial.printf("[%lu] [BMC] getTocEntry index %d out of range\n", millis(), index);
|
||||
return {};
|
||||
}
|
||||
|
||||
// Seek to TOC LUT item, read from LUT and get out data
|
||||
bookFile.seek(lutOffset + sizeof(size_t) * spineCount + sizeof(size_t) * index);
|
||||
size_t tocEntryPos;
|
||||
serialization::readPod(bookFile, tocEntryPos);
|
||||
bookFile.seek(tocEntryPos);
|
||||
return readTocEntry(bookFile);
|
||||
}
|
||||
|
||||
BookMetadataCache::SpineEntry BookMetadataCache::readSpineEntry(File& file) const {
|
||||
SpineEntry entry;
|
||||
serialization::readString(file, entry.href);
|
||||
serialization::readPod(file, entry.cumulativeSize);
|
||||
serialization::readPod(file, entry.tocIndex);
|
||||
return entry;
|
||||
}
|
||||
|
||||
BookMetadataCache::TocEntry BookMetadataCache::readTocEntry(File& file) const {
|
||||
TocEntry entry;
|
||||
serialization::readString(file, entry.title);
|
||||
serialization::readString(file, entry.href);
|
||||
serialization::readString(file, entry.anchor);
|
||||
serialization::readPod(file, entry.level);
|
||||
serialization::readPod(file, entry.spineIndex);
|
||||
return entry;
|
||||
}
|
||||
87
lib/Epub/Epub/BookMetadataCache.h
Normal file
87
lib/Epub/Epub/BookMetadataCache.h
Normal file
@@ -0,0 +1,87 @@
|
||||
#pragma once
|
||||
|
||||
#include <SD.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
class BookMetadataCache {
|
||||
public:
|
||||
struct BookMetadata {
|
||||
std::string title;
|
||||
std::string author;
|
||||
std::string coverItemHref;
|
||||
};
|
||||
|
||||
struct SpineEntry {
|
||||
std::string href;
|
||||
size_t cumulativeSize;
|
||||
int16_t tocIndex;
|
||||
|
||||
SpineEntry() : cumulativeSize(0), tocIndex(-1) {}
|
||||
SpineEntry(std::string href, const size_t cumulativeSize, const int16_t tocIndex)
|
||||
: href(std::move(href)), cumulativeSize(cumulativeSize), tocIndex(tocIndex) {}
|
||||
};
|
||||
|
||||
struct TocEntry {
|
||||
std::string title;
|
||||
std::string href;
|
||||
std::string anchor;
|
||||
uint8_t level;
|
||||
int16_t spineIndex;
|
||||
|
||||
TocEntry() : level(0), spineIndex(-1) {}
|
||||
TocEntry(std::string title, std::string href, std::string anchor, const uint8_t level, const int16_t spineIndex)
|
||||
: title(std::move(title)),
|
||||
href(std::move(href)),
|
||||
anchor(std::move(anchor)),
|
||||
level(level),
|
||||
spineIndex(spineIndex) {}
|
||||
};
|
||||
|
||||
private:
|
||||
std::string cachePath;
|
||||
size_t lutOffset;
|
||||
uint16_t spineCount;
|
||||
uint16_t tocCount;
|
||||
bool loaded;
|
||||
bool buildMode;
|
||||
|
||||
File bookFile;
|
||||
// Temp file handles during build
|
||||
File spineFile;
|
||||
File tocFile;
|
||||
|
||||
size_t writeSpineEntry(File& file, const SpineEntry& entry) const;
|
||||
size_t writeTocEntry(File& file, const TocEntry& entry) const;
|
||||
SpineEntry readSpineEntry(File& file) const;
|
||||
TocEntry readTocEntry(File& file) const;
|
||||
|
||||
public:
|
||||
BookMetadata coreMetadata;
|
||||
|
||||
explicit BookMetadataCache(std::string cachePath)
|
||||
: cachePath(std::move(cachePath)), lutOffset(0), spineCount(0), tocCount(0), loaded(false), buildMode(false) {}
|
||||
~BookMetadataCache() = default;
|
||||
|
||||
// Building phase (stream to disk immediately)
|
||||
bool beginWrite();
|
||||
bool beginContentOpfPass();
|
||||
void createSpineEntry(const std::string& href);
|
||||
bool endContentOpfPass();
|
||||
bool beginTocPass();
|
||||
void createTocEntry(const std::string& title, const std::string& href, const std::string& anchor, uint8_t level);
|
||||
bool endTocPass();
|
||||
bool endWrite();
|
||||
bool cleanupTmpFiles() const;
|
||||
|
||||
// Post-processing to update mappings and sizes
|
||||
bool buildBookBin(const std::string& epubPath, const BookMetadata& metadata);
|
||||
|
||||
// Reading phase (read mode)
|
||||
bool load();
|
||||
SpineEntry getSpineEntry(int index);
|
||||
TocEntry getTocEntry(int index);
|
||||
int getSpineCount() const { return spineCount; }
|
||||
int getTocCount() const { return tocCount; }
|
||||
bool isLoaded() const { return loaded; }
|
||||
};
|
||||
@@ -1,13 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
class EpubTocEntry {
|
||||
public:
|
||||
std::string title;
|
||||
std::string href;
|
||||
std::string anchor;
|
||||
int level;
|
||||
EpubTocEntry(std::string title, std::string href, std::string anchor, const int level)
|
||||
: title(std::move(title)), href(std::move(href)), anchor(std::move(anchor)), level(level) {}
|
||||
};
|
||||
@@ -2,6 +2,26 @@
|
||||
|
||||
#include <SD.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
bool FsHelpers::openFileForRead(const char* moduleName, const std::string& path, File& file) {
|
||||
file = SD.open(path.c_str(), FILE_READ);
|
||||
if (!file) {
|
||||
Serial.printf("[%lu] [%s] Failed to open file for reading: %s\n", millis(), moduleName, path.c_str());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FsHelpers::openFileForWrite(const char* moduleName, const std::string& path, File& file) {
|
||||
file = SD.open(path.c_str(), FILE_WRITE, true);
|
||||
if (!file) {
|
||||
Serial.printf("[%lu] [%s] Failed to open file for writing: %s\n", millis(), moduleName, path.c_str());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FsHelpers::removeDir(const char* path) {
|
||||
// 1. Open the directory
|
||||
File dir = SD.open(path);
|
||||
@@ -34,3 +54,39 @@ bool FsHelpers::removeDir(const char* path) {
|
||||
|
||||
return SD.rmdir(path);
|
||||
}
|
||||
|
||||
std::string FsHelpers::normalisePath(const std::string& path) {
|
||||
std::vector<std::string> components;
|
||||
std::string component;
|
||||
|
||||
for (const auto c : path) {
|
||||
if (c == '/') {
|
||||
if (!component.empty()) {
|
||||
if (component == "..") {
|
||||
if (!components.empty()) {
|
||||
components.pop_back();
|
||||
}
|
||||
} else {
|
||||
components.push_back(component);
|
||||
}
|
||||
component.clear();
|
||||
}
|
||||
} else {
|
||||
component += c;
|
||||
}
|
||||
}
|
||||
|
||||
if (!component.empty()) {
|
||||
components.push_back(component);
|
||||
}
|
||||
|
||||
std::string result;
|
||||
for (const auto& c : components) {
|
||||
if (!result.empty()) {
|
||||
result += "/";
|
||||
}
|
||||
result += c;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
#pragma once
|
||||
#include <FS.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
class FsHelpers {
|
||||
public:
|
||||
static bool openFileForRead(const char* moduleName, const std::string& path, File& file);
|
||||
static bool openFileForWrite(const char* moduleName, const std::string& path, File& file);
|
||||
static bool removeDir(const char* path);
|
||||
static std::string normalisePath(const std::string& path);
|
||||
};
|
||||
|
||||
@@ -9,21 +9,21 @@ constexpr uint8_t PAGE_FILE_VERSION = 3;
|
||||
|
||||
void PageLine::render(GfxRenderer& renderer, const int fontId) { block->render(renderer, fontId, xPos, yPos); }
|
||||
|
||||
void PageLine::serialize(std::ostream& os) {
|
||||
serialization::writePod(os, xPos);
|
||||
serialization::writePod(os, yPos);
|
||||
void PageLine::serialize(File& file) {
|
||||
serialization::writePod(file, xPos);
|
||||
serialization::writePod(file, yPos);
|
||||
|
||||
// serialize TextBlock pointed to by PageLine
|
||||
block->serialize(os);
|
||||
block->serialize(file);
|
||||
}
|
||||
|
||||
std::unique_ptr<PageLine> PageLine::deserialize(std::istream& is) {
|
||||
std::unique_ptr<PageLine> PageLine::deserialize(File& file) {
|
||||
int16_t xPos;
|
||||
int16_t yPos;
|
||||
serialization::readPod(is, xPos);
|
||||
serialization::readPod(is, yPos);
|
||||
serialization::readPod(file, xPos);
|
||||
serialization::readPod(file, yPos);
|
||||
|
||||
auto tb = TextBlock::deserialize(is);
|
||||
auto tb = TextBlock::deserialize(file);
|
||||
return std::unique_ptr<PageLine>(new PageLine(std::move(tb), xPos, yPos));
|
||||
}
|
||||
|
||||
@@ -33,22 +33,22 @@ void Page::render(GfxRenderer& renderer, const int fontId) const {
|
||||
}
|
||||
}
|
||||
|
||||
void Page::serialize(std::ostream& os) const {
|
||||
serialization::writePod(os, PAGE_FILE_VERSION);
|
||||
void Page::serialize(File& file) const {
|
||||
serialization::writePod(file, PAGE_FILE_VERSION);
|
||||
|
||||
const uint32_t count = elements.size();
|
||||
serialization::writePod(os, count);
|
||||
serialization::writePod(file, count);
|
||||
|
||||
for (const auto& el : elements) {
|
||||
// Only PageLine exists currently
|
||||
serialization::writePod(os, static_cast<uint8_t>(TAG_PageLine));
|
||||
el->serialize(os);
|
||||
serialization::writePod(file, static_cast<uint8_t>(TAG_PageLine));
|
||||
el->serialize(file);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<Page> Page::deserialize(std::istream& is) {
|
||||
std::unique_ptr<Page> Page::deserialize(File& file) {
|
||||
uint8_t version;
|
||||
serialization::readPod(is, version);
|
||||
serialization::readPod(file, version);
|
||||
if (version != PAGE_FILE_VERSION) {
|
||||
Serial.printf("[%lu] [PGE] Deserialization failed: Unknown version %u\n", millis(), version);
|
||||
return nullptr;
|
||||
@@ -57,14 +57,14 @@ std::unique_ptr<Page> Page::deserialize(std::istream& is) {
|
||||
auto page = std::unique_ptr<Page>(new Page());
|
||||
|
||||
uint32_t count;
|
||||
serialization::readPod(is, count);
|
||||
serialization::readPod(file, count);
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
uint8_t tag;
|
||||
serialization::readPod(is, tag);
|
||||
serialization::readPod(file, tag);
|
||||
|
||||
if (tag == TAG_PageLine) {
|
||||
auto pl = PageLine::deserialize(is);
|
||||
auto pl = PageLine::deserialize(file);
|
||||
page->elements.push_back(std::move(pl));
|
||||
} else {
|
||||
Serial.printf("[%lu] [PGE] Deserialization failed: Unknown tag %u\n", millis(), tag);
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
#pragma once
|
||||
#include <FS.h>
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -16,7 +18,7 @@ class PageElement {
|
||||
explicit PageElement(const int16_t xPos, const int16_t yPos) : xPos(xPos), yPos(yPos) {}
|
||||
virtual ~PageElement() = default;
|
||||
virtual void render(GfxRenderer& renderer, int fontId) = 0;
|
||||
virtual void serialize(std::ostream& os) = 0;
|
||||
virtual void serialize(File& file) = 0;
|
||||
};
|
||||
|
||||
// a line from a block element
|
||||
@@ -27,8 +29,8 @@ class PageLine final : public PageElement {
|
||||
PageLine(std::shared_ptr<TextBlock> block, const int16_t xPos, const int16_t yPos)
|
||||
: PageElement(xPos, yPos), block(std::move(block)) {}
|
||||
void render(GfxRenderer& renderer, int fontId) override;
|
||||
void serialize(std::ostream& os) override;
|
||||
static std::unique_ptr<PageLine> deserialize(std::istream& is);
|
||||
void serialize(File& file) override;
|
||||
static std::unique_ptr<PageLine> deserialize(File& file);
|
||||
};
|
||||
|
||||
class Page {
|
||||
@@ -36,6 +38,6 @@ class Page {
|
||||
// the list of block index and line numbers on this page
|
||||
std::vector<std::shared_ptr<PageElement>> elements;
|
||||
void render(GfxRenderer& renderer, int fontId) const;
|
||||
void serialize(std::ostream& os) const;
|
||||
static std::unique_ptr<Page> deserialize(std::istream& is);
|
||||
void serialize(File& file) const;
|
||||
static std::unique_ptr<Page> deserialize(File& file);
|
||||
};
|
||||
|
||||
@@ -3,12 +3,11 @@
|
||||
#include <GfxRenderer.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
#include <cmath>
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "hyphenation/Hyphenator.h"
|
||||
|
||||
constexpr int MAX_COST = std::numeric_limits<int>::max();
|
||||
|
||||
void ParsedText::addWord(std::string word, const EpdFontStyle fontStyle) {
|
||||
@@ -20,183 +19,164 @@ void ParsedText::addWord(std::string word, const EpdFontStyle fontStyle) {
|
||||
|
||||
// Consumes data to minimize memory usage
|
||||
void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fontId, const int horizontalMargin,
|
||||
const std::function<void(std::shared_ptr<TextBlock>)>& processLine) {
|
||||
const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
|
||||
const bool includeLastLine) {
|
||||
if (words.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// horizontalMargin accounts for both left and right gutters, leaving the drawable width.
|
||||
const int pageWidth = renderer.getScreenWidth() - horizontalMargin;
|
||||
if (pageWidth <= 0) {
|
||||
words.clear();
|
||||
wordStyles.clear();
|
||||
return;
|
||||
const int spaceWidth = renderer.getSpaceWidth(fontId);
|
||||
const auto wordWidths = calculateWordWidths(renderer, fontId);
|
||||
const auto lineBreakIndices = computeLineBreaks(pageWidth, spaceWidth, wordWidths);
|
||||
const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;
|
||||
|
||||
for (size_t i = 0; i < lineCount; ++i) {
|
||||
extractLine(i, pageWidth, spaceWidth, wordWidths, lineBreakIndices, processLine);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId) {
|
||||
const size_t totalWordCount = words.size();
|
||||
|
||||
std::vector<uint16_t> wordWidths;
|
||||
wordWidths.reserve(totalWordCount);
|
||||
|
||||
// add em-space at the beginning of first word in paragraph to indent
|
||||
if (!extraParagraphSpacing) {
|
||||
std::string& first_word = words.front();
|
||||
first_word.insert(0, "\xe2\x80\x83");
|
||||
}
|
||||
|
||||
const int spaceWidth = renderer.getSpaceWidth(fontId);
|
||||
// Maintain classic prose indenting when extra paragraph spacing is disabled.
|
||||
const bool allowIndent = !extraParagraphSpacing && (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN);
|
||||
const bool allowHyphenation = hyphenationEnabled;
|
||||
const int indentWidth = allowIndent ? renderer.getTextWidth(fontId, "m", REGULAR) : 0;
|
||||
const int firstLinePageWidth = allowIndent ? std::max(pageWidth - indentWidth, 0) : pageWidth;
|
||||
auto pageWidthForLine = [&](const bool isFirstLine) -> int { return isFirstLine ? firstLinePageWidth : pageWidth; };
|
||||
auto wordsIt = words.begin();
|
||||
auto wordStylesIt = wordStyles.begin();
|
||||
|
||||
auto wordIt = words.begin();
|
||||
auto styleIt = wordStyles.begin();
|
||||
auto lineStartWordIt = wordIt;
|
||||
auto lineStartStyleIt = styleIt;
|
||||
while (wordsIt != words.end()) {
|
||||
wordWidths.push_back(renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt));
|
||||
|
||||
int lineWidthWithSpaces = 0;
|
||||
int lineWordWidthSum = 0;
|
||||
size_t lineWordCount = 0;
|
||||
std::vector<uint16_t> lineWordWidths;
|
||||
lineWordWidths.reserve(16);
|
||||
std::advance(wordsIt, 1);
|
||||
std::advance(wordStylesIt, 1);
|
||||
}
|
||||
|
||||
// Guard against malicious/invalid content generating unbounded line counts.
|
||||
size_t producedLines = 0;
|
||||
return wordWidths;
|
||||
}
|
||||
|
||||
std::vector<size_t> ParsedText::computeLineBreaks(const int pageWidth, const int spaceWidth,
|
||||
const std::vector<uint16_t>& wordWidths) const {
|
||||
const size_t totalWordCount = words.size();
|
||||
|
||||
// DP table to store the minimum badness (cost) of lines starting at index i
|
||||
std::vector<int> dp(totalWordCount);
|
||||
// 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i'
|
||||
std::vector<size_t> ans(totalWordCount);
|
||||
|
||||
// Base Case
|
||||
dp[totalWordCount - 1] = 0;
|
||||
ans[totalWordCount - 1] = totalWordCount - 1;
|
||||
|
||||
for (int i = totalWordCount - 2; i >= 0; --i) {
|
||||
int currlen = -spaceWidth;
|
||||
dp[i] = MAX_COST;
|
||||
|
||||
for (size_t j = i; j < totalWordCount; ++j) {
|
||||
// Current line length: previous width + space + current word width
|
||||
currlen += wordWidths[j] + spaceWidth;
|
||||
|
||||
if (currlen > pageWidth) {
|
||||
break;
|
||||
}
|
||||
|
||||
int cost;
|
||||
if (j == totalWordCount - 1) {
|
||||
cost = 0; // Last line
|
||||
} else {
|
||||
const int remainingSpace = pageWidth - currlen;
|
||||
// Use long long for the square to prevent overflow
|
||||
const long long cost_ll = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1];
|
||||
|
||||
if (cost_ll > MAX_COST) {
|
||||
cost = MAX_COST;
|
||||
} else {
|
||||
cost = static_cast<int>(cost_ll);
|
||||
}
|
||||
}
|
||||
|
||||
if (cost < dp[i]) {
|
||||
dp[i] = cost;
|
||||
ans[i] = j; // j is the index of the last word in this optimal line
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stores the index of the word that starts the next line (last_word_index + 1)
|
||||
std::vector<size_t> lineBreakIndices;
|
||||
size_t currentWordIndex = 0;
|
||||
constexpr size_t MAX_LINES = 1000;
|
||||
|
||||
// commitLine moves buffered words/styles into a TextBlock and delivers it upstream.
|
||||
auto commitLine = [&](const bool isLastLine) {
|
||||
if (lineWordCount == 0) {
|
||||
return;
|
||||
while (currentWordIndex < totalWordCount) {
|
||||
if (lineBreakIndices.size() >= MAX_LINES) {
|
||||
break;
|
||||
}
|
||||
|
||||
const bool isFirstLine = producedLines == 0;
|
||||
const int linePageWidth = pageWidthForLine(isFirstLine);
|
||||
size_t nextBreakIndex = ans[currentWordIndex] + 1;
|
||||
lineBreakIndices.push_back(nextBreakIndex);
|
||||
|
||||
std::list<std::string> lineWords;
|
||||
std::list<EpdFontStyle> lineStyles;
|
||||
auto wordEndIt = wordIt;
|
||||
auto styleEndIt = styleIt;
|
||||
|
||||
lineWords.splice(lineWords.begin(), words, lineStartWordIt, wordEndIt);
|
||||
lineStyles.splice(lineStyles.begin(), wordStyles, lineStartStyleIt, styleEndIt);
|
||||
|
||||
const int gaps = lineWordCount > 0 ? static_cast<int>(lineWordCount - 1) : 0;
|
||||
const int baseSpaceTotal = spaceWidth * gaps;
|
||||
const int spaceBudget = linePageWidth - lineWordWidthSum;
|
||||
|
||||
int spacing = spaceWidth;
|
||||
int spacingRemainder = 0;
|
||||
if (style == TextBlock::JUSTIFIED && !isLastLine && gaps > 0) {
|
||||
// Spread the remaining width evenly across the gaps for justification.
|
||||
const int additional = std::max(0, spaceBudget - baseSpaceTotal);
|
||||
spacing = spaceWidth + (gaps > 0 ? additional / gaps : 0);
|
||||
spacingRemainder = (gaps > 0) ? additional % gaps : 0;
|
||||
}
|
||||
|
||||
int renderedWidth = lineWordWidthSum;
|
||||
if (gaps > 0) {
|
||||
renderedWidth += spacing * gaps;
|
||||
}
|
||||
|
||||
uint16_t xpos = 0;
|
||||
if (style == TextBlock::RIGHT_ALIGN) {
|
||||
xpos = renderedWidth < linePageWidth ? linePageWidth - renderedWidth : 0;
|
||||
} else if (style == TextBlock::CENTER_ALIGN) {
|
||||
xpos = renderedWidth < linePageWidth ? (linePageWidth - renderedWidth) / 2 : 0;
|
||||
} else if (allowIndent && isFirstLine) {
|
||||
xpos = indentWidth;
|
||||
}
|
||||
|
||||
// Cache the x positions for each word so TextBlock can render without recomputing layout.
|
||||
std::list<uint16_t> lineXPos;
|
||||
for (size_t idx = 0; idx < lineWordWidths.size(); ++idx) {
|
||||
lineXPos.push_back(xpos);
|
||||
xpos += lineWordWidths[idx];
|
||||
if (idx + 1 < lineWordWidths.size()) {
|
||||
int gap = spacing;
|
||||
if (spacingRemainder > 0) {
|
||||
gap += 1;
|
||||
spacingRemainder--;
|
||||
}
|
||||
xpos += gap;
|
||||
}
|
||||
}
|
||||
|
||||
processLine(std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineStyles), style));
|
||||
|
||||
producedLines++;
|
||||
lineWordWidths.clear();
|
||||
lineWordWidthSum = 0;
|
||||
lineWidthWithSpaces = 0;
|
||||
lineWordCount = 0;
|
||||
lineStartWordIt = wordIt;
|
||||
lineStartStyleIt = styleIt;
|
||||
};
|
||||
|
||||
while (wordIt != words.end() && producedLines < MAX_LINES) {
|
||||
const int currentLinePageWidth = pageWidthForLine(producedLines == 0);
|
||||
|
||||
if (lineWordCount == 0) {
|
||||
lineStartWordIt = wordIt;
|
||||
lineStartStyleIt = styleIt;
|
||||
}
|
||||
|
||||
const int wordWidth = renderer.getTextWidth(fontId, wordIt->c_str(), *styleIt);
|
||||
const int gapWidth = (lineWordCount > 0) ? spaceWidth : 0;
|
||||
const int candidateWidth = lineWidthWithSpaces + gapWidth + wordWidth;
|
||||
|
||||
if (candidateWidth <= currentLinePageWidth) {
|
||||
lineWordWidths.push_back(static_cast<uint16_t>(wordWidth));
|
||||
lineWordWidthSum += wordWidth;
|
||||
lineWidthWithSpaces = candidateWidth;
|
||||
lineWordCount++;
|
||||
++wordIt;
|
||||
++styleIt;
|
||||
continue;
|
||||
}
|
||||
|
||||
const int availableWidth = currentLinePageWidth - lineWidthWithSpaces - gapWidth;
|
||||
if (lineWordCount > 0 && availableWidth <= 0) {
|
||||
commitLine(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (allowHyphenation && lineWordCount > 0 && availableWidth > 0) {
|
||||
// Try hyphenating the next word so the current line stays compact.
|
||||
HyphenationResult split;
|
||||
if (Hyphenator::splitWord(renderer, fontId, *wordIt, *styleIt, availableWidth, &split, false)) {
|
||||
*wordIt = std::move(split.head);
|
||||
auto nextWordIt = std::next(wordIt);
|
||||
auto nextStyleIt = std::next(styleIt);
|
||||
words.insert(nextWordIt, std::move(split.tail));
|
||||
wordStyles.insert(nextStyleIt, *styleIt);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (lineWordCount == 0) {
|
||||
HyphenationResult split;
|
||||
// Single overlong words get force-split so they can be displayed within the margins.
|
||||
if (Hyphenator::splitWord(renderer, fontId, *wordIt, *styleIt, currentLinePageWidth, &split, true)) {
|
||||
*wordIt = std::move(split.head);
|
||||
auto nextWordIt = std::next(wordIt);
|
||||
auto nextStyleIt = std::next(styleIt);
|
||||
words.insert(nextWordIt, std::move(split.tail));
|
||||
wordStyles.insert(nextStyleIt, *styleIt);
|
||||
continue;
|
||||
}
|
||||
|
||||
lineWordWidths.push_back(static_cast<uint16_t>(wordWidth));
|
||||
lineWordWidthSum += wordWidth;
|
||||
lineWidthWithSpaces = candidateWidth;
|
||||
lineWordCount = 1;
|
||||
++wordIt;
|
||||
++styleIt;
|
||||
commitLine(wordIt == words.end());
|
||||
continue;
|
||||
}
|
||||
|
||||
// No more tricks available; flush the collected words and move on.
|
||||
commitLine(false);
|
||||
currentWordIndex = nextBreakIndex;
|
||||
}
|
||||
|
||||
if (lineWordCount > 0 && producedLines < MAX_LINES) {
|
||||
commitLine(true);
|
||||
return lineBreakIndices;
|
||||
}
|
||||
|
||||
void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spaceWidth,
|
||||
const std::vector<uint16_t>& wordWidths, const std::vector<size_t>& lineBreakIndices,
|
||||
const std::function<void(std::shared_ptr<TextBlock>)>& processLine) {
|
||||
const size_t lineBreak = lineBreakIndices[breakIndex];
|
||||
const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0;
|
||||
const size_t lineWordCount = lineBreak - lastBreakAt;
|
||||
|
||||
// Calculate total word width for this line
|
||||
int lineWordWidthSum = 0;
|
||||
for (size_t i = lastBreakAt; i < lineBreak; i++) {
|
||||
lineWordWidthSum += wordWidths[i];
|
||||
}
|
||||
|
||||
words.clear();
|
||||
wordStyles.clear();
|
||||
// Calculate spacing
|
||||
const int spareSpace = pageWidth - lineWordWidthSum;
|
||||
|
||||
int spacing = spaceWidth;
|
||||
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
|
||||
|
||||
if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
|
||||
spacing = spareSpace / (lineWordCount - 1);
|
||||
}
|
||||
|
||||
// Calculate initial x position
|
||||
uint16_t xpos = 0;
|
||||
if (style == TextBlock::RIGHT_ALIGN) {
|
||||
xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
|
||||
} else if (style == TextBlock::CENTER_ALIGN) {
|
||||
xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
|
||||
}
|
||||
|
||||
// Pre-calculate X positions for words
|
||||
std::list<uint16_t> lineXPos;
|
||||
for (size_t i = lastBreakAt; i < lineBreak; i++) {
|
||||
const uint16_t currentWordWidth = wordWidths[i];
|
||||
lineXPos.push_back(xpos);
|
||||
xpos += currentWordWidth + spacing;
|
||||
}
|
||||
|
||||
// Iterators always start at the beginning as we are moving content with splice below
|
||||
auto wordEndIt = words.begin();
|
||||
auto wordStyleEndIt = wordStyles.begin();
|
||||
std::advance(wordEndIt, lineWordCount);
|
||||
std::advance(wordStyleEndIt, lineWordCount);
|
||||
|
||||
// *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
|
||||
std::list<std::string> lineWords;
|
||||
lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
|
||||
std::list<EpdFontStyle> lineWordStyles;
|
||||
lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt);
|
||||
|
||||
processLine(std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style));
|
||||
}
|
||||
@@ -2,11 +2,11 @@
|
||||
|
||||
#include <EpdFontFamily.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "blocks/TextBlock.h"
|
||||
|
||||
@@ -19,6 +19,12 @@ class ParsedText {
|
||||
bool extraParagraphSpacing;
|
||||
bool hyphenationEnabled;
|
||||
|
||||
std::vector<size_t> computeLineBreaks(int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths) const;
|
||||
void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths,
|
||||
const std::vector<size_t>& lineBreakIndices,
|
||||
const std::function<void(std::shared_ptr<TextBlock>)>& processLine);
|
||||
std::vector<uint16_t> calculateWordWidths(const GfxRenderer& renderer, int fontId);
|
||||
|
||||
public:
|
||||
explicit ParsedText(const TextBlock::BLOCK_STYLE style, const bool extraParagraphSpacing,
|
||||
const bool hyphenationEnabled)
|
||||
@@ -28,7 +34,9 @@ class ParsedText {
|
||||
void addWord(std::string word, EpdFontStyle fontStyle);
|
||||
void setStyle(const TextBlock::BLOCK_STYLE style) { this->style = style; }
|
||||
TextBlock::BLOCK_STYLE getStyle() const { return style; }
|
||||
size_t size() const { return words.size(); }
|
||||
bool isEmpty() const { return words.empty(); }
|
||||
void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, int horizontalMargin,
|
||||
const std::function<void(std::shared_ptr<TextBlock>)>& processLine);
|
||||
const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
|
||||
bool includeLastLine = true);
|
||||
};
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
#include "Section.h"
|
||||
|
||||
#include <FsHelpers.h>
|
||||
#include <SD.h>
|
||||
#include <Serialization.h>
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "FsHelpers.h"
|
||||
#include "Page.h"
|
||||
#include "parsers/ChapterHtmlSlimParser.h"
|
||||
|
||||
@@ -16,7 +14,10 @@ constexpr uint8_t SECTION_FILE_VERSION = 6;
|
||||
void Section::onPageComplete(std::unique_ptr<Page> page) {
|
||||
const auto filePath = cachePath + "/page_" + std::to_string(pageCount) + ".bin";
|
||||
|
||||
std::ofstream outputFile("/sd" + filePath);
|
||||
File outputFile;
|
||||
if (!FsHelpers::openFileForWrite("SCT", filePath, outputFile)) {
|
||||
return;
|
||||
}
|
||||
page->serialize(outputFile);
|
||||
outputFile.close();
|
||||
|
||||
@@ -28,7 +29,10 @@ void Section::onPageComplete(std::unique_ptr<Page> page) {
|
||||
void Section::writeCacheMetadata(const int fontId, const float lineCompression, const int marginTop,
|
||||
const int marginRight, const int marginBottom, const int marginLeft,
|
||||
const bool extraParagraphSpacing, const bool hyphenationEnabled) const {
|
||||
std::ofstream outputFile(("/sd" + cachePath + "/section.bin").c_str());
|
||||
File outputFile;
|
||||
if (!FsHelpers::openFileForWrite("SCT", cachePath + "/section.bin", outputFile)) {
|
||||
return;
|
||||
}
|
||||
serialization::writePod(outputFile, SECTION_FILE_VERSION);
|
||||
serialization::writePod(outputFile, fontId);
|
||||
serialization::writePod(outputFile, lineCompression);
|
||||
@@ -45,17 +49,12 @@ void Section::writeCacheMetadata(const int fontId, const float lineCompression,
|
||||
bool Section::loadCacheMetadata(const int fontId, const float lineCompression, const int marginTop,
|
||||
const int marginRight, const int marginBottom, const int marginLeft,
|
||||
const bool extraParagraphSpacing, const bool hyphenationEnabled) {
|
||||
if (!SD.exists(cachePath.c_str())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto sectionFilePath = cachePath + "/section.bin";
|
||||
if (!SD.exists(sectionFilePath.c_str())) {
|
||||
File inputFile;
|
||||
if (!FsHelpers::openFileForRead("SCT", sectionFilePath, inputFile)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ifstream inputFile(("/sd" + sectionFilePath).c_str());
|
||||
|
||||
// Match parameters
|
||||
{
|
||||
uint8_t version;
|
||||
@@ -120,15 +119,14 @@ bool Section::clearCache() const {
|
||||
bool Section::persistPageDataToSD(const int fontId, const float lineCompression, const int marginTop,
|
||||
const int marginRight, const int marginBottom, const int marginLeft,
|
||||
const bool extraParagraphSpacing, const bool hyphenationEnabled) {
|
||||
const auto localPath = epub->getSpineItem(spineIndex);
|
||||
|
||||
// TODO: Should we get rid of this file all together?
|
||||
// It currently saves us a bit of memory by allowing for all the inflation bits to be released
|
||||
// before loading the XML parser
|
||||
const auto localPath = epub->getSpineItem(spineIndex).href;
|
||||
const auto tmpHtmlPath = epub->getCachePath() + "/.tmp_" + std::to_string(spineIndex) + ".html";
|
||||
File f = SD.open(tmpHtmlPath.c_str(), FILE_WRITE, true);
|
||||
bool success = epub->readItemContentsToStream(localPath, f, 1024);
|
||||
f.close();
|
||||
File tmpHtml;
|
||||
if (!FsHelpers::openFileForWrite("SCT", tmpHtmlPath, tmpHtml)) {
|
||||
return false;
|
||||
}
|
||||
bool success = epub->readItemContentsToStream(localPath, tmpHtml, 1024);
|
||||
tmpHtml.close();
|
||||
|
||||
if (!success) {
|
||||
Serial.printf("[%lu] [SCT] Failed to stream item contents to temp file\n", millis());
|
||||
@@ -137,10 +135,8 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression,
|
||||
|
||||
Serial.printf("[%lu] [SCT] Streamed temp HTML to %s\n", millis(), tmpHtmlPath.c_str());
|
||||
|
||||
const auto sdTmpHtmlPath = "/sd" + tmpHtmlPath;
|
||||
|
||||
ChapterHtmlSlimParser visitor(sdTmpHtmlPath.c_str(), renderer, fontId, lineCompression, marginTop, marginRight,
|
||||
marginBottom, marginLeft, extraParagraphSpacing, hyphenationEnabled,
|
||||
ChapterHtmlSlimParser visitor(tmpHtmlPath, renderer, fontId, lineCompression, marginTop, marginRight, marginBottom,
|
||||
marginLeft, extraParagraphSpacing, hyphenationEnabled,
|
||||
[this](std::unique_ptr<Page> page) { this->onPageComplete(std::move(page)); });
|
||||
success = visitor.parseAndBuildPages();
|
||||
|
||||
@@ -157,13 +153,12 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression,
|
||||
}
|
||||
|
||||
std::unique_ptr<Page> Section::loadPageFromSD() const {
|
||||
const auto filePath = "/sd" + cachePath + "/page_" + std::to_string(currentPage) + ".bin";
|
||||
if (!SD.exists(filePath.c_str() + 3)) {
|
||||
Serial.printf("[%lu] [SCT] Page file does not exist: %s\n", millis(), filePath.c_str());
|
||||
const auto filePath = cachePath + "/page_" + std::to_string(currentPage) + ".bin";
|
||||
|
||||
File inputFile;
|
||||
if (!FsHelpers::openFileForRead("SCT", filePath, inputFile)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::ifstream inputFile(filePath);
|
||||
auto page = Page::deserialize(inputFile);
|
||||
inputFile.close();
|
||||
return page;
|
||||
|
||||
@@ -21,9 +21,10 @@ class Section {
|
||||
int currentPage = 0;
|
||||
|
||||
explicit Section(const std::shared_ptr<Epub>& epub, const int spineIndex, GfxRenderer& renderer)
|
||||
: epub(epub), spineIndex(spineIndex), renderer(renderer) {
|
||||
cachePath = epub->getCachePath() + "/" + std::to_string(spineIndex);
|
||||
}
|
||||
: epub(epub),
|
||||
spineIndex(spineIndex),
|
||||
renderer(renderer),
|
||||
cachePath(epub->getCachePath() + "/" + std::to_string(spineIndex)) {}
|
||||
~Section() = default;
|
||||
bool loadCacheMetadata(int fontId, float lineCompression, int marginTop, int marginRight, int marginBottom,
|
||||
int marginLeft, bool extraParagraphSpacing, bool hyphenationEnabled);
|
||||
|
||||
@@ -17,27 +17,27 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int
|
||||
}
|
||||
}
|
||||
|
||||
void TextBlock::serialize(std::ostream& os) const {
|
||||
void TextBlock::serialize(File& file) const {
|
||||
// words
|
||||
const uint32_t wc = words.size();
|
||||
serialization::writePod(os, wc);
|
||||
for (const auto& w : words) serialization::writeString(os, w);
|
||||
serialization::writePod(file, wc);
|
||||
for (const auto& w : words) serialization::writeString(file, w);
|
||||
|
||||
// wordXpos
|
||||
const uint32_t xc = wordXpos.size();
|
||||
serialization::writePod(os, xc);
|
||||
for (auto x : wordXpos) serialization::writePod(os, x);
|
||||
serialization::writePod(file, xc);
|
||||
for (auto x : wordXpos) serialization::writePod(file, x);
|
||||
|
||||
// wordStyles
|
||||
const uint32_t sc = wordStyles.size();
|
||||
serialization::writePod(os, sc);
|
||||
for (auto s : wordStyles) serialization::writePod(os, s);
|
||||
serialization::writePod(file, sc);
|
||||
for (auto s : wordStyles) serialization::writePod(file, s);
|
||||
|
||||
// style
|
||||
serialization::writePod(os, style);
|
||||
serialization::writePod(file, style);
|
||||
}
|
||||
|
||||
std::unique_ptr<TextBlock> TextBlock::deserialize(std::istream& is) {
|
||||
std::unique_ptr<TextBlock> TextBlock::deserialize(File& file) {
|
||||
uint32_t wc, xc, sc;
|
||||
std::list<std::string> words;
|
||||
std::list<uint16_t> wordXpos;
|
||||
@@ -45,22 +45,22 @@ std::unique_ptr<TextBlock> TextBlock::deserialize(std::istream& is) {
|
||||
BLOCK_STYLE style;
|
||||
|
||||
// words
|
||||
serialization::readPod(is, wc);
|
||||
serialization::readPod(file, wc);
|
||||
words.resize(wc);
|
||||
for (auto& w : words) serialization::readString(is, w);
|
||||
for (auto& w : words) serialization::readString(file, w);
|
||||
|
||||
// wordXpos
|
||||
serialization::readPod(is, xc);
|
||||
serialization::readPod(file, xc);
|
||||
wordXpos.resize(xc);
|
||||
for (auto& x : wordXpos) serialization::readPod(is, x);
|
||||
for (auto& x : wordXpos) serialization::readPod(file, x);
|
||||
|
||||
// wordStyles
|
||||
serialization::readPod(is, sc);
|
||||
serialization::readPod(file, sc);
|
||||
wordStyles.resize(sc);
|
||||
for (auto& s : wordStyles) serialization::readPod(is, s);
|
||||
for (auto& s : wordStyles) serialization::readPod(file, s);
|
||||
|
||||
// style
|
||||
serialization::readPod(is, style);
|
||||
serialization::readPod(file, style);
|
||||
|
||||
return std::unique_ptr<TextBlock>(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
#include <EpdFontFamily.h>
|
||||
#include <FS.h>
|
||||
|
||||
#include <list>
|
||||
#include <memory>
|
||||
@@ -35,6 +36,6 @@ class TextBlock final : public Block {
|
||||
// given a renderer works out where to break the words into lines
|
||||
void render(const GfxRenderer& renderer, int fontId, int x, int y) const;
|
||||
BlockType getType() override { return TEXT_BLOCK; }
|
||||
void serialize(std::ostream& os) const;
|
||||
static std::unique_ptr<TextBlock> deserialize(std::istream& is);
|
||||
void serialize(File& file) const;
|
||||
static std::unique_ptr<TextBlock> deserialize(File& file);
|
||||
};
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "ChapterHtmlSlimParser.h"
|
||||
|
||||
#include <FsHelpers.h>
|
||||
#include <GfxRenderer.h>
|
||||
#include <HardwareSerial.h>
|
||||
#include <expat.h>
|
||||
@@ -10,13 +11,13 @@
|
||||
const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"};
|
||||
constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]);
|
||||
|
||||
const char* BLOCK_TAGS[] = {"p", "li", "div", "br"};
|
||||
const char* BLOCK_TAGS[] = {"p", "li", "div", "br", "blockquote"};
|
||||
constexpr int NUM_BLOCK_TAGS = sizeof(BLOCK_TAGS) / sizeof(BLOCK_TAGS[0]);
|
||||
|
||||
const char* BOLD_TAGS[] = {"b"};
|
||||
const char* BOLD_TAGS[] = {"b", "strong"};
|
||||
constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]);
|
||||
|
||||
const char* ITALIC_TAGS[] = {"i"};
|
||||
const char* ITALIC_TAGS[] = {"i", "em"};
|
||||
constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]);
|
||||
|
||||
const char* IMAGE_TAGS[] = {"img"};
|
||||
@@ -143,6 +144,17 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
||||
|
||||
self->partWordBuffer[self->partWordBufferIndex++] = s[i];
|
||||
}
|
||||
|
||||
// If we have > 750 words buffered up, perform the layout and consume out all but the last line
|
||||
// There should be enough here to build out 1-2 full pages and doing this will free up a lot of
|
||||
// memory.
|
||||
// Spotted when reading Intermezzo, there are some really long text blocks in there.
|
||||
if (self->currentTextBlock->size() > 750) {
|
||||
Serial.printf("[%lu] [EHP] Text block too long, splitting into multiple pages\n", millis());
|
||||
self->currentTextBlock->layoutAndExtractLines(
|
||||
self->renderer, self->fontId, self->marginLeft + self->marginRight,
|
||||
[self](const std::shared_ptr<TextBlock>& textBlock) { self->addLineToPage(textBlock); }, false);
|
||||
}
|
||||
}
|
||||
|
||||
void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) {
|
||||
@@ -203,48 +215,59 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() {
|
||||
return false;
|
||||
}
|
||||
|
||||
XML_SetUserData(parser, this);
|
||||
XML_SetElementHandler(parser, startElement, endElement);
|
||||
XML_SetCharacterDataHandler(parser, characterData);
|
||||
|
||||
FILE* file = fopen(filepath, "r");
|
||||
if (!file) {
|
||||
Serial.printf("[%lu] [EHP] Couldn't open file %s\n", millis(), filepath);
|
||||
File file;
|
||||
if (!FsHelpers::openFileForRead("EHP", filepath, file)) {
|
||||
XML_ParserFree(parser);
|
||||
return false;
|
||||
}
|
||||
|
||||
XML_SetUserData(parser, this);
|
||||
XML_SetElementHandler(parser, startElement, endElement);
|
||||
XML_SetCharacterDataHandler(parser, characterData);
|
||||
|
||||
do {
|
||||
void* const buf = XML_GetBuffer(parser, 1024);
|
||||
if (!buf) {
|
||||
Serial.printf("[%lu] [EHP] Couldn't allocate memory for buffer\n", millis());
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
fclose(file);
|
||||
file.close();
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t len = fread(buf, 1, 1024, file);
|
||||
const size_t len = file.read(static_cast<uint8_t*>(buf), 1024);
|
||||
|
||||
if (ferror(file)) {
|
||||
if (len == 0) {
|
||||
Serial.printf("[%lu] [EHP] File read error\n", millis());
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
fclose(file);
|
||||
file.close();
|
||||
return false;
|
||||
}
|
||||
|
||||
done = feof(file);
|
||||
done = file.available() == 0;
|
||||
|
||||
if (XML_ParseBuffer(parser, static_cast<int>(len), done) == XML_STATUS_ERROR) {
|
||||
Serial.printf("[%lu] [EHP] Parse error at line %lu:\n%s\n", millis(), XML_GetCurrentLineNumber(parser),
|
||||
XML_ErrorString(XML_GetErrorCode(parser)));
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
fclose(file);
|
||||
file.close();
|
||||
return false;
|
||||
}
|
||||
} while (!done);
|
||||
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
fclose(file);
|
||||
file.close();
|
||||
|
||||
// Process last page if there is still text
|
||||
if (currentTextBlock) {
|
||||
|
||||
@@ -15,7 +15,7 @@ class GfxRenderer;
|
||||
#define MAX_WORD_SIZE 200
|
||||
|
||||
class ChapterHtmlSlimParser {
|
||||
const char* filepath;
|
||||
const std::string& filepath;
|
||||
GfxRenderer& renderer;
|
||||
std::function<void(std::unique_ptr<Page>)> completePageFn;
|
||||
int depth = 0;
|
||||
@@ -46,7 +46,7 @@ class ChapterHtmlSlimParser {
|
||||
static void XMLCALL endElement(void* userData, const XML_Char* name);
|
||||
|
||||
public:
|
||||
explicit ChapterHtmlSlimParser(const char* filepath, GfxRenderer& renderer, const int fontId,
|
||||
explicit ChapterHtmlSlimParser(const std::string& filepath, GfxRenderer& renderer, const int fontId,
|
||||
const float lineCompression, const int marginTop, const int marginRight,
|
||||
const int marginBottom, const int marginLeft, const bool extraParagraphSpacing,
|
||||
const bool hyphenationEnabled,
|
||||
|
||||
@@ -14,12 +14,13 @@ bool ContainerParser::setup() {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ContainerParser::teardown() {
|
||||
ContainerParser::~ContainerParser() {
|
||||
if (parser) {
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t ContainerParser::write(const uint8_t data) { return write(&data, 1); }
|
||||
|
||||
@@ -23,9 +23,9 @@ class ContainerParser final : public Print {
|
||||
std::string fullPath;
|
||||
|
||||
explicit ContainerParser(const size_t xmlSize) : remainingSize(xmlSize) {}
|
||||
~ContainerParser() override;
|
||||
|
||||
bool setup();
|
||||
bool teardown();
|
||||
|
||||
size_t write(uint8_t) override;
|
||||
size_t write(const uint8_t* buffer, size_t size) override;
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
#include "ContentOpfParser.h"
|
||||
|
||||
#include <FsHelpers.h>
|
||||
#include <HardwareSerial.h>
|
||||
#include <Serialization.h>
|
||||
#include <ZipFile.h>
|
||||
|
||||
#include "../BookMetadataCache.h"
|
||||
|
||||
namespace {
|
||||
constexpr const char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml";
|
||||
}
|
||||
constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml";
|
||||
constexpr char itemCacheFile[] = "/.items.bin";
|
||||
} // namespace
|
||||
|
||||
bool ContentOpfParser::setup() {
|
||||
parser = XML_ParserCreate(nullptr);
|
||||
@@ -20,12 +25,20 @@ bool ContentOpfParser::setup() {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ContentOpfParser::teardown() {
|
||||
ContentOpfParser::~ContentOpfParser() {
|
||||
if (parser) {
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
}
|
||||
return true;
|
||||
if (tempItemStore) {
|
||||
tempItemStore.close();
|
||||
}
|
||||
if (SD.exists((cachePath + itemCacheFile).c_str())) {
|
||||
SD.remove((cachePath + itemCacheFile).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
size_t ContentOpfParser::write(const uint8_t data) { return write(&data, 1); }
|
||||
@@ -41,6 +54,9 @@ size_t ContentOpfParser::write(const uint8_t* buffer, const size_t size) {
|
||||
|
||||
if (!buf) {
|
||||
Serial.printf("[%lu] [COF] Couldn't allocate memory for buffer\n", millis());
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
return 0;
|
||||
@@ -52,6 +68,9 @@ size_t ContentOpfParser::write(const uint8_t* buffer, const size_t size) {
|
||||
if (XML_ParseBuffer(parser, static_cast<int>(toRead), remainingSize == toRead) == XML_STATUS_ERROR) {
|
||||
Serial.printf("[%lu] [COF] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser),
|
||||
XML_ErrorString(XML_GetErrorCode(parser)));
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
return 0;
|
||||
@@ -86,11 +105,21 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
|
||||
|
||||
if (self->state == IN_PACKAGE && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) {
|
||||
self->state = IN_MANIFEST;
|
||||
if (!FsHelpers::openFileForWrite("COF", self->cachePath + itemCacheFile, self->tempItemStore)) {
|
||||
Serial.printf(
|
||||
"[%lu] [COF] Couldn't open temp items file for writing. This is probably going to be a fatal error.\n",
|
||||
millis());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (self->state == IN_PACKAGE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) {
|
||||
self->state = IN_SPINE;
|
||||
if (!FsHelpers::openFileForRead("COF", self->cachePath + itemCacheFile, self->tempItemStore)) {
|
||||
Serial.printf(
|
||||
"[%lu] [COF] Couldn't open temp items file for reading. This is probably going to be a fatal error.\n",
|
||||
millis());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -127,7 +156,13 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
|
||||
}
|
||||
}
|
||||
|
||||
self->items[itemId] = href;
|
||||
// Write items down to SD card
|
||||
serialization::writeString(self->tempItemStore, itemId);
|
||||
serialization::writeString(self->tempItemStore, href);
|
||||
|
||||
if (itemId == self->coverItemId) {
|
||||
self->coverItemHref = href;
|
||||
}
|
||||
|
||||
if (mediaType == MEDIA_TYPE_NCX) {
|
||||
if (self->tocNcxPath.empty()) {
|
||||
@@ -140,14 +175,29 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
|
||||
return;
|
||||
}
|
||||
|
||||
if (self->state == IN_SPINE && (strcmp(name, "itemref") == 0 || strcmp(name, "opf:itemref") == 0)) {
|
||||
for (int i = 0; atts[i]; i += 2) {
|
||||
if (strcmp(atts[i], "idref") == 0) {
|
||||
self->spineRefs.emplace_back(atts[i + 1]);
|
||||
break;
|
||||
// NOTE: This relies on spine appearing after item manifest (which is pretty safe as it's part of the EPUB spec)
|
||||
// Only run the spine parsing if there's a cache to add it to
|
||||
if (self->cache) {
|
||||
if (self->state == IN_SPINE && (strcmp(name, "itemref") == 0 || strcmp(name, "opf:itemref") == 0)) {
|
||||
for (int i = 0; atts[i]; i += 2) {
|
||||
if (strcmp(atts[i], "idref") == 0) {
|
||||
const std::string idref = atts[i + 1];
|
||||
// Resolve the idref to href using items map
|
||||
self->tempItemStore.seek(0);
|
||||
std::string itemId;
|
||||
std::string href;
|
||||
while (self->tempItemStore.available()) {
|
||||
serialization::readString(self->tempItemStore, itemId);
|
||||
serialization::readString(self->tempItemStore, href);
|
||||
if (itemId == idref) {
|
||||
self->cache->createSpineEntry(href);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,11 +216,13 @@ void XMLCALL ContentOpfParser::endElement(void* userData, const XML_Char* name)
|
||||
|
||||
if (self->state == IN_SPINE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) {
|
||||
self->state = IN_PACKAGE;
|
||||
self->tempItemStore.close();
|
||||
return;
|
||||
}
|
||||
|
||||
if (self->state == IN_MANIFEST && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) {
|
||||
self->state = IN_PACKAGE;
|
||||
self->tempItemStore.close();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
#pragma once
|
||||
#include <Print.h>
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "Epub.h"
|
||||
#include "expat.h"
|
||||
|
||||
class BookMetadataCache;
|
||||
|
||||
class ContentOpfParser final : public Print {
|
||||
enum ParserState {
|
||||
START,
|
||||
@@ -16,10 +16,14 @@ class ContentOpfParser final : public Print {
|
||||
IN_SPINE,
|
||||
};
|
||||
|
||||
const std::string& cachePath;
|
||||
const std::string& baseContentPath;
|
||||
size_t remainingSize;
|
||||
XML_Parser parser = nullptr;
|
||||
ParserState state = START;
|
||||
BookMetadataCache* cache;
|
||||
File tempItemStore;
|
||||
std::string coverItemId;
|
||||
|
||||
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
|
||||
static void characterData(void* userData, const XML_Char* s, int len);
|
||||
@@ -28,15 +32,14 @@ class ContentOpfParser final : public Print {
|
||||
public:
|
||||
std::string title;
|
||||
std::string tocNcxPath;
|
||||
std::string coverItemId;
|
||||
std::map<std::string, std::string> items;
|
||||
std::vector<std::string> spineRefs;
|
||||
std::string coverItemHref;
|
||||
|
||||
explicit ContentOpfParser(const std::string& baseContentPath, const size_t xmlSize)
|
||||
: baseContentPath(baseContentPath), remainingSize(xmlSize) {}
|
||||
explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize,
|
||||
BookMetadataCache* cache)
|
||||
: cachePath(cachePath), baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {}
|
||||
~ContentOpfParser() override;
|
||||
|
||||
bool setup();
|
||||
bool teardown();
|
||||
|
||||
size_t write(uint8_t) override;
|
||||
size_t write(const uint8_t* buffer, size_t size) override;
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
#include <HardwareSerial.h>
|
||||
|
||||
#include "../BookMetadataCache.h"
|
||||
|
||||
bool TocNcxParser::setup() {
|
||||
parser = XML_ParserCreate(nullptr);
|
||||
if (!parser) {
|
||||
@@ -15,12 +17,14 @@ bool TocNcxParser::setup() {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TocNcxParser::teardown() {
|
||||
TocNcxParser::~TocNcxParser() {
|
||||
if (parser) {
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t TocNcxParser::write(const uint8_t data) { return write(&data, 1); }
|
||||
@@ -35,6 +39,11 @@ size_t TocNcxParser::write(const uint8_t* buffer, const size_t size) {
|
||||
void* const buf = XML_GetBuffer(parser, 1024);
|
||||
if (!buf) {
|
||||
Serial.printf("[%lu] [TOC] Couldn't allocate memory for buffer\n", millis());
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -44,6 +53,11 @@ size_t TocNcxParser::write(const uint8_t* buffer, const size_t size) {
|
||||
if (XML_ParseBuffer(parser, static_cast<int>(toRead), remainingSize == toRead) == XML_STATUS_ERROR) {
|
||||
Serial.printf("[%lu] [TOC] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser),
|
||||
XML_ErrorString(XML_GetErrorCode(parser)));
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -154,8 +168,9 @@ void XMLCALL TocNcxParser::endElement(void* userData, const XML_Char* name) {
|
||||
href = href.substr(0, pos);
|
||||
}
|
||||
|
||||
// Push to vector
|
||||
self->toc.emplace_back(self->currentLabel, href, anchor, self->currentDepth);
|
||||
if (self->cache) {
|
||||
self->cache->createTocEntry(self->currentLabel, href, anchor, self->currentDepth);
|
||||
}
|
||||
|
||||
// Clear them so we don't re-add them if there are weird XML structures
|
||||
self->currentLabel.clear();
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
#pragma once
|
||||
#include <Print.h>
|
||||
#include <expat.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Epub/EpubTocEntry.h"
|
||||
#include "expat.h"
|
||||
class BookMetadataCache;
|
||||
|
||||
class TocNcxParser final : public Print {
|
||||
enum ParserState { START, IN_NCX, IN_NAV_MAP, IN_NAV_POINT, IN_NAV_LABEL, IN_NAV_LABEL_TEXT, IN_CONTENT };
|
||||
@@ -14,23 +13,22 @@ class TocNcxParser final : public Print {
|
||||
size_t remainingSize;
|
||||
XML_Parser parser = nullptr;
|
||||
ParserState state = START;
|
||||
BookMetadataCache* cache;
|
||||
|
||||
std::string currentLabel;
|
||||
std::string currentSrc;
|
||||
size_t currentDepth = 0;
|
||||
uint8_t currentDepth = 0;
|
||||
|
||||
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
|
||||
static void characterData(void* userData, const XML_Char* s, int len);
|
||||
static void endElement(void* userData, const XML_Char* name);
|
||||
|
||||
public:
|
||||
std::vector<EpubTocEntry> toc;
|
||||
|
||||
explicit TocNcxParser(const std::string& baseContentPath, const size_t xmlSize)
|
||||
: baseContentPath(baseContentPath), remainingSize(xmlSize) {}
|
||||
explicit TocNcxParser(const std::string& baseContentPath, const size_t xmlSize, BookMetadataCache* cache)
|
||||
: baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {}
|
||||
~TocNcxParser() override;
|
||||
|
||||
bool setup();
|
||||
bool teardown();
|
||||
|
||||
size_t write(uint8_t) override;
|
||||
size_t write(const uint8_t* buffer, size_t size) override;
|
||||
|
||||
Reference in New Issue
Block a user