crosspoint-reader/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
cottongin 8fa01bc83a
Some checks failed
CI / build (push) Failing after 2m16s
fix: prevent Serial.printf from blocking when USB disconnected
On ESP32-C3 with USB CDC, Serial.printf() blocks indefinitely when USB
is not connected. This caused device freezes when booted without USB.

Solution: Call Serial.setTxTimeoutMs(0) after Serial.begin() to make
all Serial output non-blocking.

Also added if (Serial) guards to high-traffic logging paths in
EpubReaderActivity as belt-and-suspenders protection.

Includes documentation of the debugging process and Serial call inventory.

Also applies clang-format to fix pre-existing formatting issues.
2026-01-28 16:02:13 -05:00

788 lines
30 KiB
C++

#include "ChapterHtmlSlimParser.h"
#include <GfxRenderer.h>
#include <HardwareSerial.h>
#include <SDCardManager.h>
#include <expat.h>
#include "../../Epub.h"
#include "../Page.h"
#include "../converters/ImageDecoderFactory.h"
#include "../converters/ImageToFramebufferDecoder.h"
const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"};
constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]);
// Minimum file size (in bytes) to show progress bar - smaller chapters don't benefit from it
constexpr size_t MIN_SIZE_FOR_PROGRESS = 50 * 1024; // 50KB
const char* BLOCK_TAGS[] = {"p", "li", "div", "br", "blockquote"};
constexpr int NUM_BLOCK_TAGS = sizeof(BLOCK_TAGS) / sizeof(BLOCK_TAGS[0]);
const char* BOLD_TAGS[] = {"b", "strong"};
constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]);
const char* ITALIC_TAGS[] = {"i", "em"};
constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]);
const char* UNDERLINE_TAGS[] = {"u", "ins"};
constexpr int NUM_UNDERLINE_TAGS = sizeof(UNDERLINE_TAGS) / sizeof(UNDERLINE_TAGS[0]);
// Include "image" for SVG <image> elements (common in Calibre-generated covers)
const char* IMAGE_TAGS[] = {"img", "image"};
constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
const char* SKIP_TAGS[] = {"head"};
constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]);
bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n' || c == '\t'; }
// given the start and end of a tag, check to see if it matches a known tag
bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) {
for (int i = 0; i < possible_tag_count; i++) {
if (strcmp(tag_name, possible_tags[i]) == 0) {
return true;
}
}
return false;
}
// Create a BlockStyle from CSS style properties
BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle) {
BlockStyle blockStyle;
blockStyle.marginTop = static_cast<int8_t>(cssStyle.marginTop + cssStyle.paddingTop);
blockStyle.marginBottom = static_cast<int8_t>(cssStyle.marginBottom + cssStyle.paddingBottom);
blockStyle.paddingTop = cssStyle.paddingTop;
blockStyle.paddingBottom = cssStyle.paddingBottom;
blockStyle.textIndent = static_cast<int16_t>(cssStyle.indentPixels);
return blockStyle;
}
// Update effective bold/italic/underline based on block style and inline style stack
void ChapterHtmlSlimParser::updateEffectiveInlineStyle() {
// Start with block-level styles
effectiveBold = currentBlockStyle.hasFontWeight() && currentBlockStyle.fontWeight == CssFontWeight::Bold;
effectiveItalic = currentBlockStyle.hasFontStyle() && currentBlockStyle.fontStyle == CssFontStyle::Italic;
effectiveUnderline =
currentBlockStyle.hasTextDecoration() && currentBlockStyle.decoration == CssTextDecoration::Underline;
// Apply inline style stack in order
for (const auto& entry : inlineStyleStack) {
if (entry.hasBold) {
effectiveBold = entry.bold;
}
if (entry.hasItalic) {
effectiveItalic = entry.italic;
}
if (entry.hasUnderline) {
effectiveUnderline = entry.underline;
}
}
}
// Flush the contents of partWordBuffer to currentTextBlock
void ChapterHtmlSlimParser::flushPartWordBuffer() {
if (partWordBufferIndex == 0) return;
// Determine font style using effective styles
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
if (effectiveBold && effectiveItalic) {
fontStyle = EpdFontFamily::BOLD_ITALIC;
} else if (effectiveBold) {
fontStyle = EpdFontFamily::BOLD;
} else if (effectiveItalic) {
fontStyle = EpdFontFamily::ITALIC;
}
// Flush the buffer
partWordBuffer[partWordBufferIndex] = '\0';
currentTextBlock->addWord(partWordBuffer, fontStyle, effectiveUnderline);
partWordBufferIndex = 0;
}
// start a new text block if needed
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) {
if (currentTextBlock) {
// already have a text block running and it is empty - just reuse it
if (currentTextBlock->isEmpty()) {
currentTextBlock->setStyle(style);
currentTextBlock->setBlockStyle(blockStyle);
return;
}
makePages();
}
currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled, blockStyle));
}
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { startNewTextBlock(style, BlockStyle{}); }
void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
// Middle of skip
if (self->skipUntilDepth < self->depth) {
self->depth += 1;
return;
}
// Extract class and style attributes for CSS processing
std::string classAttr;
std::string styleAttr;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "class") == 0) {
classAttr = atts[i + 1];
} else if (strcmp(atts[i], "style") == 0) {
styleAttr = atts[i + 1];
}
}
}
// Special handling for tables - show placeholder text instead of dropping silently
if (strcmp(name, "table") == 0) {
// Add placeholder text
self->startNewTextBlock(TextBlock::CENTER_ALIGN);
if (self->currentTextBlock) {
self->currentTextBlock->addWord("[Table omitted]", EpdFontFamily::ITALIC);
}
// Skip table contents
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
if (matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS)) {
std::string src;
std::string alt;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
// Standard HTML img uses "src", SVG image uses "xlink:href" or "href"
if (strcmp(atts[i], "src") == 0 || strcmp(atts[i], "xlink:href") == 0 || strcmp(atts[i], "href") == 0) {
src = atts[i + 1];
} else if (strcmp(atts[i], "alt") == 0) {
alt = atts[i + 1];
}
}
if (!src.empty()) {
Serial.printf("[%lu] [EHP] Found image: src=%s\n", millis(), src.c_str());
// Get the spine item's href to resolve the relative path
size_t lastUnderscore = self->filepath.rfind('_');
if (lastUnderscore != std::string::npos && lastUnderscore > 0) {
std::string indexStr = self->filepath.substr(lastUnderscore + 1);
indexStr.resize(indexStr.find('.'));
int spineIndex = atoi(indexStr.c_str());
const auto& spineItem = self->epub->getSpineItem(spineIndex);
std::string htmlHref = spineItem.href;
size_t lastSlash = htmlHref.find_last_of('/');
std::string htmlDir = (lastSlash != std::string::npos) ? htmlHref.substr(0, lastSlash + 1) : "";
// Resolve the image path relative to the HTML file
std::string imageHref = src;
while (imageHref.find("../") == 0) {
imageHref = imageHref.substr(3);
if (!htmlDir.empty()) {
size_t dirSlash = htmlDir.find_last_of('/', htmlDir.length() - 2);
htmlDir = (dirSlash != std::string::npos) ? htmlDir.substr(0, dirSlash + 1) : "";
}
}
std::string resolvedPath = htmlDir + imageHref;
// Create a unique filename for the cached image
std::string ext;
size_t extPos = resolvedPath.rfind('.');
if (extPos != std::string::npos) {
ext = resolvedPath.substr(extPos);
}
std::string cachedImagePath = self->epub->getCachePath() + "/img_" + std::to_string(spineIndex) + "_" +
std::to_string(self->imageCounter++) + ext;
// Extract image to cache file
FsFile cachedImageFile;
bool extractSuccess = false;
if (SdMan.openFileForWrite("EHP", cachedImagePath, cachedImageFile)) {
extractSuccess = self->epub->readItemContentsToStream(resolvedPath, cachedImageFile, 4096);
cachedImageFile.flush();
cachedImageFile.close();
delay(50); // Give SD card time to sync
}
if (extractSuccess) {
// Get image dimensions
ImageDimensions dims = {0, 0};
ImageToFramebufferDecoder* decoder = ImageDecoderFactory::getDecoder(cachedImagePath);
if (decoder && decoder->getDimensions(cachedImagePath, dims)) {
Serial.printf("[%lu] [EHP] Image dimensions: %dx%d\n", millis(), dims.width, dims.height);
// Scale to fit viewport while maintaining aspect ratio
int maxWidth = self->viewportWidth;
int maxHeight = self->viewportHeight;
float scaleX = (dims.width > maxWidth) ? (float)maxWidth / dims.width : 1.0f;
float scaleY = (dims.height > maxHeight) ? (float)maxHeight / dims.height : 1.0f;
float scale = (scaleX < scaleY) ? scaleX : scaleY;
if (scale > 1.0f) scale = 1.0f;
int displayWidth = (int)(dims.width * scale);
int displayHeight = (int)(dims.height * scale);
Serial.printf("[%lu] [EHP] Display size: %dx%d (scale %.2f)\n", millis(), displayWidth, displayHeight,
scale);
// Create page for image
if (self->currentPage && !self->currentPage->elements.empty()) {
self->completePageFn(std::move(self->currentPage));
self->currentPage.reset(new Page());
if (!self->currentPage) {
Serial.printf("[%lu] [EHP] Failed to create new page\n", millis());
return;
}
self->currentPageNextY = 0;
} else if (!self->currentPage) {
self->currentPage.reset(new Page());
if (!self->currentPage) {
Serial.printf("[%lu] [EHP] Failed to create initial page\n", millis());
return;
}
self->currentPageNextY = 0;
}
// Create ImageBlock and add to page
auto imageBlock = std::make_shared<ImageBlock>(cachedImagePath, displayWidth, displayHeight);
if (!imageBlock) {
Serial.printf("[%lu] [EHP] Failed to create ImageBlock\n", millis());
return;
}
int xPos = (self->viewportWidth - displayWidth) / 2;
auto pageImage = std::make_shared<PageImage>(imageBlock, xPos, self->currentPageNextY);
if (!pageImage) {
Serial.printf("[%lu] [EHP] Failed to create PageImage\n", millis());
return;
}
self->currentPage->elements.push_back(pageImage);
self->currentPageNextY += displayHeight;
self->depth += 1;
return;
} else {
Serial.printf("[%lu] [EHP] Failed to get image dimensions\n", millis());
SdMan.remove(cachedImagePath.c_str());
}
} else {
Serial.printf("[%lu] [EHP] Failed to extract image\n", millis());
}
}
}
// Fallback: show placeholder text when image processing fails
// This handles progressive JPEGs, unsupported formats, memory issues, etc.
std::string placeholder;
if (!alt.empty()) {
placeholder = "[Image: " + alt + "]";
} else if (!src.empty()) {
// Extract filename from path for a more informative placeholder
size_t lastSlash = src.find_last_of('/');
std::string filename = (lastSlash != std::string::npos) ? src.substr(lastSlash + 1) : src;
placeholder = "[Image: " + filename + "]";
} else {
placeholder = "[Image unavailable]";
}
self->startNewTextBlock(TextBlock::CENTER_ALIGN);
self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth);
self->depth += 1;
self->characterData(userData, placeholder.c_str(), placeholder.length());
return;
}
}
if (matches(name, SKIP_TAGS, NUM_SKIP_TAGS)) {
// start skip
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
// Skip blocks with role="doc-pagebreak" and epub:type="pagebreak"
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "role") == 0 && strcmp(atts[i + 1], "doc-pagebreak") == 0 ||
strcmp(atts[i], "epub:type") == 0 && strcmp(atts[i + 1], "pagebreak") == 0) {
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
}
}
// Determine if this is a block element
bool isBlockElement = matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS);
// Compute CSS style for this element
CssStyle cssStyle;
if (self->cssParser) {
// Get combined tag + class styles
cssStyle = self->cssParser->resolveStyle(name, classAttr);
// Merge inline style (highest priority)
if (!styleAttr.empty()) {
CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr);
cssStyle.merge(inlineStyle);
}
}
if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
// Headers: center aligned, bold, apply CSS overrides
TextBlock::Style alignment = TextBlock::CENTER_ALIGN;
if (cssStyle.hasTextAlign()) {
switch (cssStyle.alignment) {
case TextAlign::Left:
alignment = TextBlock::LEFT_ALIGN;
break;
case TextAlign::Right:
alignment = TextBlock::RIGHT_ALIGN;
break;
case TextAlign::Center:
alignment = TextBlock::CENTER_ALIGN;
break;
case TextAlign::Justify:
alignment = TextBlock::JUSTIFIED;
break;
default:
break;
}
}
self->currentBlockStyle = cssStyle;
self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle));
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
self->updateEffectiveInlineStyle();
} else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) {
if (strcmp(name, "br") == 0) {
// Flush word preceding <br/> to currentTextBlock before calling startNewTextBlock
// This fixes issue where <br/> incorrectly wrapped the preceding word to a new line
self->flushPartWordBuffer();
self->startNewTextBlock(self->currentTextBlock->getStyle());
} else {
// Determine alignment from CSS or default
auto alignment = static_cast<TextBlock::Style>(self->paragraphAlignment);
if (cssStyle.hasTextAlign()) {
switch (cssStyle.alignment) {
case TextAlign::Left:
alignment = TextBlock::LEFT_ALIGN;
break;
case TextAlign::Right:
alignment = TextBlock::RIGHT_ALIGN;
break;
case TextAlign::Center:
alignment = TextBlock::CENTER_ALIGN;
break;
case TextAlign::Justify:
alignment = TextBlock::JUSTIFIED;
break;
default:
break;
}
}
self->currentBlockStyle = cssStyle;
self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle));
self->updateEffectiveInlineStyle();
if (strcmp(name, "li") == 0) {
self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR);
}
}
} else if (matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS)) {
self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth);
// Push inline style entry for underline tag
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
entry.hasUnderline = true;
entry.underline = true;
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
}
if (cssStyle.hasFontStyle()) {
entry.hasItalic = true;
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
} else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) {
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
// Push inline style entry for bold tag
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
entry.hasBold = true;
entry.bold = true;
if (cssStyle.hasFontStyle()) {
entry.hasItalic = true;
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
} else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) {
self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth);
// Push inline style entry for italic tag
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
entry.hasItalic = true;
entry.italic = true;
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
} else if (strcmp(name, "span") == 0 || !isBlockElement) {
// Handle span and other inline elements for CSS styling
if (cssStyle.hasFontWeight() || cssStyle.hasFontStyle() || cssStyle.hasTextDecoration()) {
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
}
if (cssStyle.hasFontStyle()) {
entry.hasItalic = true;
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
}
}
self->depth += 1;
}
void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char* s, const int len) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
// Middle of skip
if (self->skipUntilDepth < self->depth) {
return;
}
// Capture byte offset of this character data for page position tracking
if (self->xmlParser) {
self->lastCharDataOffset = XML_GetCurrentByteIndex(self->xmlParser);
}
// Determine font style from depth-based tracking and CSS effective style
const bool isBold = self->boldUntilDepth < self->depth || self->effectiveBold;
const bool isItalic = self->italicUntilDepth < self->depth || self->effectiveItalic;
const bool isUnderline = self->underlineUntilDepth < self->depth || self->effectiveUnderline;
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
if (isBold && isItalic) {
fontStyle = EpdFontFamily::BOLD_ITALIC;
} else if (isBold) {
fontStyle = EpdFontFamily::BOLD;
} else if (isItalic) {
fontStyle = EpdFontFamily::ITALIC;
}
for (int i = 0; i < len; i++) {
if (isWhitespace(s[i])) {
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
if (self->partWordBufferIndex > 0) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline);
self->partWordBufferIndex = 0;
}
// Skip the whitespace char
continue;
}
// Skip Zero Width No-Break Space / BOM (U+FEFF) = 0xEF 0xBB 0xBF
const XML_Char FEFF_BYTE_1 = static_cast<XML_Char>(0xEF);
const XML_Char FEFF_BYTE_2 = static_cast<XML_Char>(0xBB);
const XML_Char FEFF_BYTE_3 = static_cast<XML_Char>(0xBF);
if (s[i] == FEFF_BYTE_1) {
// Check if the next two bytes complete the 3-byte sequence
if ((i + 2 < len) && (s[i + 1] == FEFF_BYTE_2) && (s[i + 2] == FEFF_BYTE_3)) {
// Sequence 0xEF 0xBB 0xBF found!
i += 2; // Skip the next two bytes
continue; // Move to the next iteration
}
}
// If we're about to run out of space, then cut the word off and start a new one
if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline);
self->partWordBufferIndex = 0;
}
self->partWordBuffer[self->partWordBufferIndex++] = s[i];
}
// If we have > 750 words buffered up, perform the layout and consume out all but the last line
// There should be enough here to build out 1-2 full pages and doing this will free up a lot of
// memory.
// Spotted when reading Intermezzo, there are some really long text blocks in there.
if (self->currentTextBlock->size() > 750) {
Serial.printf("[%lu] [EHP] Text block too long, splitting into multiple pages\n", millis());
self->currentTextBlock->layoutAndExtractLines(
self->renderer, self->fontId, self->viewportWidth,
[self](const std::shared_ptr<TextBlock>& textBlock) { self->addLineToPage(textBlock); }, false);
}
}
void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
// Check if any style state will change after we decrement depth
// If so, we MUST flush the partWordBuffer with the CURRENT style first
// Note: depth hasn't been decremented yet, so we check against (depth - 1)
const bool willPopStyleStack =
!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth - 1;
const bool willClearBold = self->boldUntilDepth == self->depth - 1;
const bool willClearItalic = self->italicUntilDepth == self->depth - 1;
const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1;
const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline;
// Flush buffer with current style BEFORE any style changes
if (self->partWordBufferIndex > 0) {
// Flush if style will change OR if we're closing a block/structural element
const bool shouldFlush = styleWillChange || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) ||
matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) ||
matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) ||
matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || self->depth == 1;
if (shouldFlush) {
// Use combined depth-based and CSS-based style
const bool isBold = self->boldUntilDepth < self->depth || self->effectiveBold;
const bool isItalic = self->italicUntilDepth < self->depth || self->effectiveItalic;
const bool isUnderline = self->underlineUntilDepth < self->depth || self->effectiveUnderline;
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
if (isBold && isItalic) {
fontStyle = EpdFontFamily::BOLD_ITALIC;
} else if (isBold) {
fontStyle = EpdFontFamily::BOLD;
} else if (isItalic) {
fontStyle = EpdFontFamily::ITALIC;
}
self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline);
self->partWordBufferIndex = 0;
}
}
self->depth -= 1;
// Leaving skip
if (self->skipUntilDepth == self->depth) {
self->skipUntilDepth = INT_MAX;
}
// Leaving bold tag
if (self->boldUntilDepth == self->depth) {
self->boldUntilDepth = INT_MAX;
}
// Leaving italic tag
if (self->italicUntilDepth == self->depth) {
self->italicUntilDepth = INT_MAX;
}
// Leaving underline tag
if (self->underlineUntilDepth == self->depth) {
self->underlineUntilDepth = INT_MAX;
}
// Pop from inline style stack if we pushed an entry at this depth
// This handles all inline elements: b, i, u, span, etc.
if (!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth) {
self->inlineStyleStack.pop_back();
self->updateEffectiveInlineStyle();
}
// Clear block style when leaving block elements
if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
self->currentBlockStyle.reset();
self->updateEffectiveInlineStyle();
}
}
bool ChapterHtmlSlimParser::parseAndBuildPages() {
startNewTextBlock((TextBlock::Style)this->paragraphAlignment);
xmlParser = XML_ParserCreate(nullptr);
int done;
if (!xmlParser) {
Serial.printf("[%lu] [EHP] Couldn't allocate memory for parser\n", millis());
return false;
}
FsFile file;
if (!SdMan.openFileForRead("EHP", filepath, file)) {
XML_ParserFree(xmlParser);
xmlParser = nullptr;
return false;
}
// Get file size for progress calculation
const size_t totalSize = file.size();
size_t bytesRead = 0;
int lastProgress = -1;
// Initialize offset tracking - first page starts at offset 0
currentPageStartOffset = 0;
lastCharDataOffset = 0;
XML_SetUserData(xmlParser, this);
XML_SetElementHandler(xmlParser, startElement, endElement);
XML_SetCharacterDataHandler(xmlParser, characterData);
do {
void* const buf = XML_GetBuffer(xmlParser, 1024);
if (!buf) {
Serial.printf("[%lu] [EHP] Couldn't allocate memory for buffer\n", millis());
XML_StopParser(xmlParser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(xmlParser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(xmlParser, nullptr);
XML_ParserFree(xmlParser);
xmlParser = nullptr;
file.close();
return false;
}
const size_t len = file.read(buf, 1024);
if (len == 0 && file.available() > 0) {
Serial.printf("[%lu] [EHP] File read error\n", millis());
XML_StopParser(xmlParser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(xmlParser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(xmlParser, nullptr);
XML_ParserFree(xmlParser);
xmlParser = nullptr;
file.close();
return false;
}
// Update progress (call every 10% change to avoid too frequent updates)
// Only show progress for larger chapters where rendering overhead is worth it
bytesRead += len;
if (progressFn && totalSize >= MIN_SIZE_FOR_PROGRESS) {
const int progress = static_cast<int>((bytesRead * 100) / totalSize);
if (lastProgress / 10 != progress / 10) {
lastProgress = progress;
progressFn(progress);
}
}
done = file.available() == 0;
if (XML_ParseBuffer(xmlParser, static_cast<int>(len), done) == XML_STATUS_ERROR) {
Serial.printf("[%lu] [EHP] Parse error at line %lu:\n%s\n", millis(), XML_GetCurrentLineNumber(xmlParser),
XML_ErrorString(XML_GetErrorCode(xmlParser)));
XML_StopParser(xmlParser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(xmlParser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(xmlParser, nullptr);
XML_ParserFree(xmlParser);
xmlParser = nullptr;
file.close();
return false;
}
} while (!done);
XML_StopParser(xmlParser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(xmlParser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(xmlParser, nullptr);
XML_ParserFree(xmlParser);
xmlParser = nullptr;
file.close();
// Process last page if there is still text
if (currentTextBlock) {
makePages();
// Set the content offset for the final page
if (currentPage) {
currentPage->firstContentOffset = static_cast<uint32_t>(currentPageStartOffset);
}
completePageFn(std::move(currentPage));
currentPage.reset();
currentTextBlock.reset();
}
return true;
}
void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr<TextBlock> line) {
const int lineHeight = renderer.getLineHeight(fontId) * lineCompression;
if (currentPageNextY + lineHeight > viewportHeight) {
// Set the content offset for the page being completed
if (currentPage) {
currentPage->firstContentOffset = static_cast<uint32_t>(currentPageStartOffset);
}
completePageFn(std::move(currentPage));
// Start new page - offset will be set when first content is added
currentPage.reset(new Page());
currentPageStartOffset = lastCharDataOffset; // Use offset from when content was parsed
currentPageNextY = 0;
}
currentPage->elements.push_back(std::make_shared<PageLine>(line, 0, currentPageNextY));
currentPageNextY += lineHeight;
}
void ChapterHtmlSlimParser::makePages() {
if (!currentTextBlock) {
Serial.printf("[%lu] [EHP] !! No text block to make pages for !!\n", millis());
return;
}
if (!currentPage) {
currentPage.reset(new Page());
// Use offset captured during character data parsing
currentPageStartOffset = lastCharDataOffset;
currentPageNextY = 0;
}
const int lineHeight = renderer.getLineHeight(fontId) * lineCompression;
// Apply marginTop before the paragraph
const BlockStyle& blockStyle = currentTextBlock->getBlockStyle();
if (blockStyle.marginTop > 0) {
currentPageNextY += lineHeight * blockStyle.marginTop;
}
currentTextBlock->layoutAndExtractLines(
renderer, fontId, viewportWidth,
[this](const std::shared_ptr<TextBlock>& textBlock) { addLineToPage(textBlock); });
// Apply marginBottom after the paragraph
if (blockStyle.marginBottom > 0) {
currentPageNextY += lineHeight * blockStyle.marginBottom;
}
// Extra paragraph spacing if enabled (default behavior)
if (extraParagraphSpacing) {
currentPageNextY += lineHeight / 2;
}
}