Add expat and swap out EPUB HTML parser (#2)

* Add expat and swap out ERB HTML parser

* Increase EpubHtmlParserSlim file buffer to 1024 bytes

* Cleanup TextBlock functions

* Do not break words when leaving spans
This commit is contained in:
Dave Allie
2025-12-06 20:57:24 +11:00
committed by GitHub
parent ad8cee12ab
commit dd6e649d74
32 changed files with 15969 additions and 269 deletions

View File

@@ -5,11 +5,11 @@
#include <fstream>
#include "EpubHtmlParser.h"
#include "EpubHtmlParserSlim.h"
#include "Page.h"
void Section::onPageComplete(const Page* page) {
Serial.printf("Page %d complete\n", pageCount);
Serial.printf("Page %d complete - free mem: %lu\n", pageCount, ESP.getFreeHeap());
const auto filePath = cachePath + "/page_" + std::to_string(pageCount) + ".bin";
@@ -75,11 +75,11 @@ bool Section::persistPageDataToSD() {
}
const auto sdTmpHtmlPath = "/sd" + tmpHtmlPath;
auto visitor =
EpubHtmlParser(sdTmpHtmlPath.c_str(), renderer, [this](const Page* page) { this->onPageComplete(page); });
// TODO: Come back and see if mem used here can be lowered?
auto visitor =
EpubHtmlParserSlim(sdTmpHtmlPath.c_str(), renderer, [this](const Page* page) { this->onPageComplete(page); });
const bool success = visitor.parseAndBuildPages();
SD.remove(tmpHtmlPath.c_str());
if (!success) {
Serial.println("Failed to parse and build pages");