Files
crosspoint-reader-mod/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
cottongin c8ba4fe973 fix: Port upstream CSS-aware image sizing (PR #1002)
Parse CSS height/width into CssStyle for images and use aspect-ratio-
preserving logic when CSS dimensions are set. Falls back to viewport-fit
scaling when no CSS dimensions are present. Includes divide-by-zero
guards and viewport clamping with aspect ratio rescaling.

- Add imageHeight field to CssStyle/CssPropertyFlags
- Parse CSS height declarations into imageHeight
- Add imageHeight + width to cache serialization (bump cache v2->v3)
- Replace viewport-fit-only image scaling with CSS-aware sizing

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-19 14:21:31 -05:00

1411 lines
54 KiB
C++

#include "ChapterHtmlSlimParser.h"
#include <FsHelpers.h>
#include <GfxRenderer.h>
#include <HalStorage.h>
#include <Logging.h>
#include <expat.h>
#include <algorithm>
#include "../../Epub.h"
#include "../Page.h"
#include "../converters/ImageDecoderFactory.h"
#include "../converters/ImageToFramebufferDecoder.h"
#include "../htmlEntities.h"
const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"};
constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]);
// Minimum file size (in bytes) to show indexing popup - smaller chapters don't benefit from it
constexpr size_t MIN_SIZE_FOR_POPUP = 10 * 1024; // 10KB
constexpr size_t PARSE_BUFFER_SIZE = 1024;
const char* BLOCK_TAGS[] = {"p", "li", "div", "br", "blockquote"};
constexpr int NUM_BLOCK_TAGS = sizeof(BLOCK_TAGS) / sizeof(BLOCK_TAGS[0]);
const char* BOLD_TAGS[] = {"b", "strong"};
constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]);
const char* ITALIC_TAGS[] = {"i", "em"};
constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]);
const char* UNDERLINE_TAGS[] = {"u", "ins"};
constexpr int NUM_UNDERLINE_TAGS = sizeof(UNDERLINE_TAGS) / sizeof(UNDERLINE_TAGS[0]);
const char* IMAGE_TAGS[] = {"img"};
constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
const char* SKIP_TAGS[] = {"head"};
constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]);
// Table tags that are transparent containers (just depth tracking, no special handling)
const char* TABLE_TRANSPARENT_TAGS[] = {"thead", "tbody", "tfoot", "colgroup"};
constexpr int NUM_TABLE_TRANSPARENT_TAGS = sizeof(TABLE_TRANSPARENT_TAGS) / sizeof(TABLE_TRANSPARENT_TAGS[0]);
// Table tags to skip entirely (their children produce no useful output)
const char* TABLE_SKIP_TAGS[] = {"caption"};
constexpr int NUM_TABLE_SKIP_TAGS = sizeof(TABLE_SKIP_TAGS) / sizeof(TABLE_SKIP_TAGS[0]);
bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n' || c == '\t'; }
// Parse an HTML width attribute value into a CssLength.
// "200" -> 200px, "50%" -> 50 percent. Returns false if the value can't be parsed.
static bool parseHtmlWidthAttr(const char* value, CssLength& out) {
char* end = nullptr;
const float num = strtof(value, &end);
if (end == value || num < 0) return false;
if (*end == '%') {
out = CssLength(num, CssUnit::Percent);
} else {
out = CssLength(num, CssUnit::Pixels);
}
return true;
}
// given the start and end of a tag, check to see if it matches a known tag
bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) {
for (int i = 0; i < possible_tag_count; i++) {
if (strcmp(tag_name, possible_tags[i]) == 0) {
return true;
}
}
return false;
}
bool isHeaderOrBlock(const char* name) {
return matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS);
}
// Update effective bold/italic/underline based on block style and inline style stack
void ChapterHtmlSlimParser::updateEffectiveInlineStyle() {
// Start with block-level styles
effectiveBold = currentCssStyle.hasFontWeight() && currentCssStyle.fontWeight == CssFontWeight::Bold;
effectiveItalic = currentCssStyle.hasFontStyle() && currentCssStyle.fontStyle == CssFontStyle::Italic;
effectiveUnderline =
currentCssStyle.hasTextDecoration() && currentCssStyle.textDecoration == CssTextDecoration::Underline;
// Apply inline style stack in order
for (const auto& entry : inlineStyleStack) {
if (entry.hasBold) {
effectiveBold = entry.bold;
}
if (entry.hasItalic) {
effectiveItalic = entry.italic;
}
if (entry.hasUnderline) {
effectiveUnderline = entry.underline;
}
}
}
// flush the contents of partWordBuffer to currentTextBlock
void ChapterHtmlSlimParser::flushPartWordBuffer() {
// Determine font style from depth-based tracking and CSS effective style
const bool isBold = boldUntilDepth < depth || effectiveBold;
const bool isItalic = italicUntilDepth < depth || effectiveItalic;
const bool isUnderline = underlineUntilDepth < depth || effectiveUnderline;
// Combine style flags using bitwise OR
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
if (isBold) {
fontStyle = static_cast<EpdFontFamily::Style>(fontStyle | EpdFontFamily::BOLD);
}
if (isItalic) {
fontStyle = static_cast<EpdFontFamily::Style>(fontStyle | EpdFontFamily::ITALIC);
}
if (isUnderline) {
fontStyle = static_cast<EpdFontFamily::Style>(fontStyle | EpdFontFamily::UNDERLINE);
}
// flush the buffer
partWordBuffer[partWordBufferIndex] = '\0';
// Handle double-encoded &nbsp; entities (e.g. &amp;nbsp; in source -> literal "&nbsp;" after
// XML parsing). Common in Wikipedia and other generated EPUBs. Replace with a space so the text
// renders cleanly. The space stays within the word, preserving non-breaking behavior.
std::string flushedWord(partWordBuffer);
size_t entityPos = 0;
while ((entityPos = flushedWord.find("&nbsp;", entityPos)) != std::string::npos) {
flushedWord.replace(entityPos, 6, " ");
entityPos += 1;
}
currentTextBlock->addWord(flushedWord, fontStyle, false, nextWordContinues);
partWordBufferIndex = 0;
nextWordContinues = false;
}
// start a new text block if needed
void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) {
// When inside a table cell, don't lay out to the page -- insert a forced line break
// within the cell's ParsedText so that block elements (p, div, br) create visual breaks.
if (inTable) {
if (partWordBufferIndex > 0) {
flushPartWordBuffer();
}
if (currentTextBlock && !currentTextBlock->isEmpty()) {
currentTextBlock->addLineBreak();
}
nextWordContinues = false;
return;
}
nextWordContinues = false; // New block = new paragraph, no continuation
if (currentTextBlock) {
// already have a text block running and it is empty - just reuse it
if (currentTextBlock->isEmpty()) {
// Merge with existing block style to accumulate CSS styling from parent block elements.
// This handles cases like <div style="margin-bottom:2em"><h1>text</h1></div> where the
// div's margin should be preserved, even though it has no direct text content.
currentTextBlock->setBlockStyle(currentTextBlock->getBlockStyle().getCombinedBlockStyle(blockStyle));
return;
}
makePages();
}
currentTextBlock.reset(new ParsedText(extraParagraphSpacing, hyphenationEnabled, blockStyle));
}
void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
// Middle of skip
if (self->skipUntilDepth < self->depth) {
self->depth += 1;
return;
}
// Extract class and style attributes for CSS processing
std::string classAttr;
std::string styleAttr;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "class") == 0) {
classAttr = atts[i + 1];
} else if (strcmp(atts[i], "style") == 0) {
styleAttr = atts[i + 1];
}
}
}
auto centeredBlockStyle = BlockStyle();
centeredBlockStyle.textAlignDefined = true;
centeredBlockStyle.alignment = CssTextAlign::Center;
// --- Table handling ---
if (strcmp(name, "table") == 0) {
if (self->inTable) {
// Nested table: skip it entirely for v1
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
// Flush any pending content before the table
if (self->currentTextBlock && !self->currentTextBlock->isEmpty()) {
self->makePages();
}
self->inTable = true;
self->tableData.reset(new TableData());
// Create a safe empty currentTextBlock so character data outside cells
// (e.g. whitespace between tags) doesn't crash
auto tableBlockStyle = BlockStyle();
tableBlockStyle.alignment = CssTextAlign::Left;
self->currentTextBlock.reset(
new ParsedText(self->extraParagraphSpacing, self->hyphenationEnabled, tableBlockStyle));
self->depth += 1;
return;
}
// Table structure tags (only when inside a table)
if (self->inTable) {
if (strcmp(name, "tr") == 0) {
self->tableData->rows.push_back(TableRow());
self->depth += 1;
return;
}
// <col> — capture width hint for column sizing
if (strcmp(name, "col") == 0) {
CssLength widthHint;
bool hasHint = false;
// Parse HTML width attribute
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "width") == 0) {
hasHint = parseHtmlWidthAttr(atts[i + 1], widthHint);
break;
}
}
}
// CSS width (inline style) overrides HTML attribute
if (self->cssParser) {
std::string styleAttr;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "style") == 0) {
styleAttr = atts[i + 1];
break;
}
}
}
if (!styleAttr.empty()) {
CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr);
if (inlineStyle.hasWidth()) {
widthHint = inlineStyle.width;
hasHint = true;
}
}
}
if (hasHint) {
self->tableData->colWidthHints.push_back(widthHint);
} else {
// Push a zero-value placeholder to maintain index alignment
self->tableData->colWidthHints.push_back(CssLength());
}
self->depth += 1;
return;
}
if (strcmp(name, "td") == 0 || strcmp(name, "th") == 0) {
const bool isHeader = strcmp(name, "th") == 0;
// Parse colspan and width attributes
int colspan = 1;
CssLength cellWidthHint;
bool hasCellWidthHint = false;
std::string cellStyleAttr;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "colspan") == 0) {
colspan = atoi(atts[i + 1]);
if (colspan < 1) colspan = 1;
} else if (strcmp(atts[i], "width") == 0) {
hasCellWidthHint = parseHtmlWidthAttr(atts[i + 1], cellWidthHint);
} else if (strcmp(atts[i], "style") == 0) {
cellStyleAttr = atts[i + 1];
}
}
}
// CSS width (inline style or stylesheet) overrides HTML attribute
if (self->cssParser) {
std::string classAttr;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "class") == 0) {
classAttr = atts[i + 1];
break;
}
}
}
CssStyle cellCssStyle = self->cssParser->resolveStyle(name, classAttr);
if (!cellStyleAttr.empty()) {
CssStyle inlineStyle = CssParser::parseInlineStyle(cellStyleAttr);
cellCssStyle.applyOver(inlineStyle);
}
if (cellCssStyle.hasWidth()) {
cellWidthHint = cellCssStyle.width;
hasCellWidthHint = true;
}
}
// Ensure there's a row to add cells to
if (self->tableData->rows.empty()) {
self->tableData->rows.push_back(TableRow());
}
// Create a new ParsedText for this cell (characterData will flow into it)
auto cellBlockStyle = BlockStyle();
cellBlockStyle.alignment = CssTextAlign::Left;
cellBlockStyle.textAlignDefined = true;
// Explicitly disable paragraph indent for table cells
cellBlockStyle.textIndent = 0;
cellBlockStyle.textIndentDefined = true;
self->currentTextBlock.reset(
new ParsedText(self->extraParagraphSpacing, self->hyphenationEnabled, cellBlockStyle));
self->nextWordContinues = false;
// Track the cell
auto& currentRow = self->tableData->rows.back();
currentRow.cells.push_back(TableCell());
currentRow.cells.back().isHeader = isHeader;
currentRow.cells.back().colspan = colspan;
if (hasCellWidthHint) {
currentRow.cells.back().widthHint = cellWidthHint;
currentRow.cells.back().hasWidthHint = true;
}
// Apply bold for header cells
if (isHeader) {
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
self->updateEffectiveInlineStyle();
}
self->depth += 1;
return;
}
// Transparent table container tags
if (matches(name, TABLE_TRANSPARENT_TAGS, NUM_TABLE_TRANSPARENT_TAGS)) {
self->depth += 1;
return;
}
// Skip colgroup, col, caption
if (matches(name, TABLE_SKIP_TAGS, NUM_TABLE_SKIP_TAGS)) {
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
// Other tags inside table cells (p, div, span, b, i, etc.) fall through
// to the normal handling below. startNewTextBlock is a no-op when inTable.
}
if (matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS)) {
std::string src;
std::string alt;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "src") == 0) {
src = atts[i + 1];
} else if (strcmp(atts[i], "alt") == 0) {
alt = atts[i + 1];
}
}
if (!src.empty()) {
LOG_DBG("EHP", "Found image: src=%s", src.c_str());
{
// Resolve the image path relative to the HTML file
std::string resolvedPath = FsHelpers::normalisePath(self->contentBase + src);
// Check format support before any file I/O
ImageToFramebufferDecoder* decoder = ImageDecoderFactory::getDecoder(resolvedPath);
if (decoder) {
// Create a unique filename for the cached image
std::string ext;
size_t extPos = resolvedPath.rfind('.');
if (extPos != std::string::npos) {
ext = resolvedPath.substr(extPos);
}
std::string cachedImagePath = self->imageBasePath + std::to_string(self->imageCounter++) + ext;
// Extract image to cache file
FsFile cachedImageFile;
bool extractSuccess = false;
if (Storage.openFileForWrite("EHP", cachedImagePath, cachedImageFile)) {
extractSuccess = self->epub->readItemContentsToStream(resolvedPath, cachedImageFile, 4096);
cachedImageFile.flush();
cachedImageFile.close();
delay(50); // Give SD card time to sync
}
if (extractSuccess) {
// Get image dimensions
ImageDimensions dims = {0, 0};
if (decoder->getDimensions(cachedImagePath, dims)) {
LOG_DBG("EHP", "Image dimensions: %dx%d", dims.width, dims.height);
int displayWidth = 0;
int displayHeight = 0;
const float emSize =
static_cast<float>(self->renderer.getLineHeight(self->fontId)) * self->lineCompression;
CssStyle imgStyle = self->cssParser ? self->cssParser->resolveStyle("img", classAttr) : CssStyle{};
if (!styleAttr.empty()) {
imgStyle.applyOver(CssParser::parseInlineStyle(styleAttr));
}
const bool hasCssHeight = imgStyle.hasImageHeight();
const bool hasCssWidth = imgStyle.hasWidth();
if (hasCssHeight && dims.width > 0 && dims.height > 0) {
displayHeight = static_cast<int>(
imgStyle.imageHeight.toPixels(emSize, static_cast<float>(self->viewportHeight)) + 0.5f);
if (displayHeight < 1) displayHeight = 1;
displayWidth =
static_cast<int>(displayHeight * (static_cast<float>(dims.width) / dims.height) + 0.5f);
if (displayWidth > self->viewportWidth) {
displayWidth = self->viewportWidth;
displayHeight =
static_cast<int>(displayWidth * (static_cast<float>(dims.height) / dims.width) + 0.5f);
if (displayHeight < 1) displayHeight = 1;
}
if (displayWidth < 1) displayWidth = 1;
LOG_DBG("EHP", "Display size from CSS height: %dx%d", displayWidth, displayHeight);
} else if (hasCssWidth && !hasCssHeight && dims.width > 0 && dims.height > 0) {
displayWidth = static_cast<int>(
imgStyle.width.toPixels(emSize, static_cast<float>(self->viewportWidth)) + 0.5f);
if (displayWidth > self->viewportWidth) displayWidth = self->viewportWidth;
if (displayWidth < 1) displayWidth = 1;
displayHeight =
static_cast<int>(displayWidth * (static_cast<float>(dims.height) / dims.width) + 0.5f);
if (displayHeight > self->viewportHeight) {
displayHeight = self->viewportHeight;
displayWidth =
static_cast<int>(displayHeight * (static_cast<float>(dims.width) / dims.height) + 0.5f);
if (displayWidth < 1) displayWidth = 1;
}
if (displayHeight < 1) displayHeight = 1;
LOG_DBG("EHP", "Display size from CSS width: %dx%d", displayWidth, displayHeight);
} else {
int maxWidth = self->viewportWidth;
int maxHeight = self->viewportHeight;
float scaleX = (dims.width > maxWidth) ? (float)maxWidth / dims.width : 1.0f;
float scaleY = (dims.height > maxHeight) ? (float)maxHeight / dims.height : 1.0f;
float scale = (scaleX < scaleY) ? scaleX : scaleY;
if (scale > 1.0f) scale = 1.0f;
displayWidth = (int)(dims.width * scale);
displayHeight = (int)(dims.height * scale);
LOG_DBG("EHP", "Display size: %dx%d (scale %.2f)", displayWidth, displayHeight, scale);
}
// Create page for image - only break if image won't fit remaining space
if (self->currentPage && !self->currentPage->elements.empty() &&
(self->currentPageNextY + displayHeight > self->viewportHeight)) {
self->completePageFn(std::move(self->currentPage));
self->currentPage.reset(new Page());
if (!self->currentPage) {
LOG_ERR("EHP", "Failed to create new page");
return;
}
self->currentPageNextY = 0;
} else if (!self->currentPage) {
self->currentPage.reset(new Page());
if (!self->currentPage) {
LOG_ERR("EHP", "Failed to create initial page");
return;
}
self->currentPageNextY = 0;
}
// Create ImageBlock and add to page
auto imageBlock = std::make_shared<ImageBlock>(cachedImagePath, displayWidth, displayHeight);
if (!imageBlock) {
LOG_ERR("EHP", "Failed to create ImageBlock");
return;
}
int xPos = (self->viewportWidth - displayWidth) / 2;
auto pageImage = std::make_shared<PageImage>(imageBlock, xPos, self->currentPageNextY);
if (!pageImage) {
LOG_ERR("EHP", "Failed to create PageImage");
return;
}
self->currentPage->elements.push_back(pageImage);
self->currentPageNextY += displayHeight;
self->depth += 1;
return;
} else {
LOG_ERR("EHP", "Failed to get image dimensions");
Storage.remove(cachedImagePath.c_str());
}
} else {
LOG_ERR("EHP", "Failed to extract image");
}
} // if (decoder)
}
}
// Fallback to alt text if image processing fails
if (!alt.empty()) {
alt = "[Image: " + alt + "]";
self->startNewTextBlock(centeredBlockStyle);
self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth);
self->depth += 1;
self->characterData(userData, alt.c_str(), alt.length());
// Skip any child content (skip until parent as we pre-advanced depth above)
self->skipUntilDepth = self->depth - 1;
return;
}
// No alt text, skip
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
}
if (matches(name, SKIP_TAGS, NUM_SKIP_TAGS)) {
// start skip
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
// Skip blocks with role="doc-pagebreak" and epub:type="pagebreak"
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "role") == 0 && strcmp(atts[i + 1], "doc-pagebreak") == 0 ||
strcmp(atts[i], "epub:type") == 0 && strcmp(atts[i + 1], "pagebreak") == 0) {
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
}
}
// Compute CSS style for this element
CssStyle cssStyle;
if (self->cssParser) {
// Get combined tag + class styles
cssStyle = self->cssParser->resolveStyle(name, classAttr);
// Merge inline style (highest priority)
if (!styleAttr.empty()) {
CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr);
cssStyle.applyOver(inlineStyle);
}
}
const float emSize = static_cast<float>(self->renderer.getLineHeight(self->fontId)) * self->lineCompression;
const auto userAlignmentBlockStyle = BlockStyle::fromCssStyle(
cssStyle, emSize, static_cast<CssTextAlign>(self->paragraphAlignment), self->viewportWidth);
if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
self->currentCssStyle = cssStyle;
auto headerBlockStyle = BlockStyle::fromCssStyle(cssStyle, emSize, CssTextAlign::Center, self->viewportWidth);
headerBlockStyle.textAlignDefined = true;
if (self->embeddedStyle && cssStyle.hasTextAlign()) {
headerBlockStyle.alignment = cssStyle.textAlign;
}
self->startNewTextBlock(headerBlockStyle);
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
self->updateEffectiveInlineStyle();
} else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) {
if (strcmp(name, "br") == 0) {
if (self->partWordBufferIndex > 0) {
self->flushPartWordBuffer();
}
self->startNewTextBlock(self->currentTextBlock->getBlockStyle());
} else if (strcmp(name, "li") == 0) {
self->currentCssStyle = cssStyle;
self->startNewTextBlock(userAlignmentBlockStyle);
self->updateEffectiveInlineStyle();
self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR);
self->listItemUntilDepth = std::min(self->listItemUntilDepth, self->depth);
} else if (strcmp(name, "p") == 0 && self->listItemUntilDepth < self->depth) {
// Inside a <li> element - don't start a new text block for <p>
// This prevents bullet points from appearing on their own line
self->currentCssStyle = cssStyle;
self->updateEffectiveInlineStyle();
} else {
self->currentCssStyle = cssStyle;
self->startNewTextBlock(userAlignmentBlockStyle);
self->updateEffectiveInlineStyle();
}
} else if (matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS)) {
// Flush buffer before style change so preceding text gets current style
if (self->partWordBufferIndex > 0) {
self->flushPartWordBuffer();
self->nextWordContinues = true;
}
self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth);
// Push inline style entry for underline tag
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
entry.hasUnderline = true;
entry.underline = true;
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
}
if (cssStyle.hasFontStyle()) {
entry.hasItalic = true;
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
} else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) {
// Flush buffer before style change so preceding text gets current style
if (self->partWordBufferIndex > 0) {
self->flushPartWordBuffer();
self->nextWordContinues = true;
}
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
// Push inline style entry for bold tag
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
entry.hasBold = true;
entry.bold = true;
if (cssStyle.hasFontStyle()) {
entry.hasItalic = true;
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
} else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) {
// Flush buffer before style change so preceding text gets current style
if (self->partWordBufferIndex > 0) {
self->flushPartWordBuffer();
self->nextWordContinues = true;
}
self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth);
// Push inline style entry for italic tag
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
entry.hasItalic = true;
entry.italic = true;
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
} else if (strcmp(name, "span") == 0 || !isHeaderOrBlock(name)) {
// Handle span and other inline elements for CSS styling
if (cssStyle.hasFontWeight() || cssStyle.hasFontStyle() || cssStyle.hasTextDecoration()) {
// Flush buffer before style change so preceding text gets current style
if (self->partWordBufferIndex > 0) {
self->flushPartWordBuffer();
self->nextWordContinues = true;
}
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
}
if (cssStyle.hasFontStyle()) {
entry.hasItalic = true;
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
}
}
// Unprocessed tag, just increasing depth and continue forward
self->depth += 1;
}
void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char* s, const int len) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
// Middle of skip
if (self->skipUntilDepth < self->depth) {
return;
}
for (int i = 0; i < len; i++) {
if (isWhitespace(s[i])) {
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
if (self->partWordBufferIndex > 0) {
self->flushPartWordBuffer();
}
// Whitespace is a real word boundary — reset continuation state
self->nextWordContinues = false;
// Skip the whitespace char
continue;
}
// Detect U+00A0 (non-breaking space): UTF-8 encoding is 0xC2 0xA0
// Render a visible space without allowing a line break around it.
if (static_cast<uint8_t>(s[i]) == 0xC2 && i + 1 < len && static_cast<uint8_t>(s[i + 1]) == 0xA0) {
// Flush any pending text so style is applied correctly.
if (self->partWordBufferIndex > 0) {
self->flushPartWordBuffer();
}
// Add a standalone space that attaches to the previous word.
self->partWordBuffer[0] = ' ';
self->partWordBuffer[1] = '\0';
self->partWordBufferIndex = 1;
self->nextWordContinues = true; // Attach space to previous word (no break).
self->flushPartWordBuffer();
// Ensure the next real word attaches to this space (no break).
self->nextWordContinues = true;
i++; // Skip the second byte (0xA0)
continue;
}
// Skip Zero Width No-Break Space / BOM (U+FEFF) = 0xEF 0xBB 0xBF
const XML_Char FEFF_BYTE_1 = static_cast<XML_Char>(0xEF);
const XML_Char FEFF_BYTE_2 = static_cast<XML_Char>(0xBB);
const XML_Char FEFF_BYTE_3 = static_cast<XML_Char>(0xBF);
if (s[i] == FEFF_BYTE_1) {
// Check if the next two bytes complete the 3-byte sequence
if ((i + 2 < len) && (s[i + 1] == FEFF_BYTE_2) && (s[i + 2] == FEFF_BYTE_3)) {
// Sequence 0xEF 0xBB 0xBF found!
i += 2; // Skip the next two bytes
continue; // Move to the next iteration
}
}
// If we're about to run out of space, then cut the word off and start a new one
if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
self->flushPartWordBuffer();
}
self->partWordBuffer[self->partWordBufferIndex++] = s[i];
}
// If we have > 750 words buffered up, perform the layout and consume out all but the last line
// There should be enough here to build out 1-2 full pages and doing this will free up a lot of
// memory.
// Spotted when reading Intermezzo, there are some really long text blocks in there.
// Skip this when inside a table - cell content is buffered for later layout.
if (!self->inTable && self->currentTextBlock->size() > 750) {
LOG_DBG("EHP", "Text block too long, splitting into multiple pages");
self->currentTextBlock->layoutAndExtractLines(
self->renderer, self->fontId, self->viewportWidth,
[self](const std::shared_ptr<TextBlock>& textBlock) { self->addLineToPage(textBlock); }, false);
}
}
void XMLCALL ChapterHtmlSlimParser::defaultHandlerExpand(void* userData, const XML_Char* s, const int len) {
// Check if this looks like an entity reference (&...;)
if (len >= 3 && s[0] == '&' && s[len - 1] == ';') {
const char* utf8Value = lookupHtmlEntity(s, len);
if (utf8Value != nullptr) {
// Known entity: expand to its UTF-8 value
characterData(userData, utf8Value, strlen(utf8Value));
return;
}
// Unknown entity: preserve original &...; sequence
characterData(userData, s, len);
return;
}
// Not an entity we recognize - skip it
}
void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
// Check if any style state will change after we decrement depth
// If so, we MUST flush the partWordBuffer with the CURRENT style first
// Note: depth hasn't been decremented yet, so we check against (depth - 1)
const bool willPopStyleStack =
!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth - 1;
const bool willClearBold = self->boldUntilDepth == self->depth - 1;
const bool willClearItalic = self->italicUntilDepth == self->depth - 1;
const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1;
const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline;
const bool headerOrBlockTag = isHeaderOrBlock(name);
const bool isTableCellTag = strcmp(name, "td") == 0 || strcmp(name, "th") == 0;
const bool isTableTag = strcmp(name, "table") == 0;
// Flush buffer with current style BEFORE any style changes
if (self->partWordBufferIndex > 0) {
// Flush if style will change OR if we're closing a block/structural element
const bool isInlineTag = !headerOrBlockTag && !isTableTag && !isTableCellTag &&
!matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) && self->depth != 1;
const bool shouldFlush = styleWillChange || headerOrBlockTag || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) ||
matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) ||
matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || isTableTag || isTableCellTag ||
matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1;
if (shouldFlush) {
self->flushPartWordBuffer();
// If closing an inline element, the next word fragment continues the same visual word
if (isInlineTag) {
self->nextWordContinues = true;
}
}
}
// --- Table cell/row/table close handling ---
if (self->inTable) {
if (isTableCellTag) {
// Save the current cell content into the table data
if (self->tableData && !self->tableData->rows.empty()) {
auto& currentRow = self->tableData->rows.back();
if (!currentRow.cells.empty()) {
currentRow.cells.back().content = std::move(self->currentTextBlock);
}
}
// Create a safe empty ParsedText so character data between cells doesn't crash
auto safeBlockStyle = BlockStyle();
safeBlockStyle.alignment = CssTextAlign::Left;
self->currentTextBlock.reset(
new ParsedText(self->extraParagraphSpacing, self->hyphenationEnabled, safeBlockStyle));
self->nextWordContinues = false;
}
if (isTableTag) {
// Process the entire buffered table
self->depth -= 1;
// Clean up style state for this depth
if (self->skipUntilDepth == self->depth) self->skipUntilDepth = INT_MAX;
if (self->boldUntilDepth == self->depth) self->boldUntilDepth = INT_MAX;
if (self->italicUntilDepth == self->depth) self->italicUntilDepth = INT_MAX;
if (self->underlineUntilDepth == self->depth) self->underlineUntilDepth = INT_MAX;
if (self->listItemUntilDepth == self->depth) self->listItemUntilDepth = INT_MAX;
if (!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth) {
self->inlineStyleStack.pop_back();
self->updateEffectiveInlineStyle();
}
self->processTable();
self->inTable = false;
self->tableData.reset();
// Restore a fresh text block for content after the table
auto paragraphAlignmentBlockStyle = BlockStyle();
paragraphAlignmentBlockStyle.textAlignDefined = true;
const auto align = (self->paragraphAlignment == static_cast<uint8_t>(CssTextAlign::None))
? CssTextAlign::Justify
: static_cast<CssTextAlign>(self->paragraphAlignment);
paragraphAlignmentBlockStyle.alignment = align;
self->currentTextBlock.reset(
new ParsedText(self->extraParagraphSpacing, self->hyphenationEnabled, paragraphAlignmentBlockStyle));
return; // depth already decremented, skip the normal endElement cleanup
}
}
self->depth -= 1;
// Leaving skip
if (self->skipUntilDepth == self->depth) {
self->skipUntilDepth = INT_MAX;
}
// Leaving bold tag
if (self->boldUntilDepth == self->depth) {
self->boldUntilDepth = INT_MAX;
}
// Leaving italic tag
if (self->italicUntilDepth == self->depth) {
self->italicUntilDepth = INT_MAX;
}
// Leaving underline tag
if (self->underlineUntilDepth == self->depth) {
self->underlineUntilDepth = INT_MAX;
}
// Leaving list item
if (self->listItemUntilDepth == self->depth) {
self->listItemUntilDepth = INT_MAX;
}
// Pop from inline style stack if we pushed an entry at this depth
// This handles all inline elements: b, i, u, span, etc.
if (!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth) {
self->inlineStyleStack.pop_back();
self->updateEffectiveInlineStyle();
}
// Clear block style when leaving header or block elements
if (headerOrBlockTag) {
self->currentCssStyle.reset();
self->updateEffectiveInlineStyle();
}
}
bool ChapterHtmlSlimParser::parseAndBuildPages() {
unsigned long chapterStartTime = millis();
auto paragraphAlignmentBlockStyle = BlockStyle();
paragraphAlignmentBlockStyle.textAlignDefined = true;
// Resolve None sentinel to Justify for initial block (no CSS context yet)
const auto align = (this->paragraphAlignment == static_cast<uint8_t>(CssTextAlign::None))
? CssTextAlign::Justify
: static_cast<CssTextAlign>(this->paragraphAlignment);
paragraphAlignmentBlockStyle.alignment = align;
startNewTextBlock(paragraphAlignmentBlockStyle);
const XML_Parser parser = XML_ParserCreate(nullptr);
int done;
if (!parser) {
LOG_ERR("EHP", "Couldn't allocate memory for parser");
return false;
}
// Handle HTML entities (like &nbsp;) that aren't in XML spec or DTD
// Using DefaultHandlerExpand preserves normal entity expansion from DOCTYPE
XML_SetDefaultHandlerExpand(parser, defaultHandlerExpand);
FsFile file;
if (!Storage.openFileForRead("EHP", filepath, file)) {
XML_ParserFree(parser);
return false;
}
// Get file size to decide whether to show indexing popup.
if (popupFn && file.size() >= MIN_SIZE_FOR_POPUP) {
popupFn();
}
XML_SetUserData(parser, this);
XML_SetElementHandler(parser, startElement, endElement);
XML_SetCharacterDataHandler(parser, characterData);
do {
void* const buf = XML_GetBuffer(parser, PARSE_BUFFER_SIZE);
if (!buf) {
LOG_ERR("EHP", "Couldn't allocate memory for buffer");
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(parser, nullptr);
XML_ParserFree(parser);
file.close();
return false;
}
const size_t len = file.read(buf, PARSE_BUFFER_SIZE);
if (len == 0 && file.available() > 0) {
LOG_ERR("EHP", "File read error");
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(parser, nullptr);
XML_ParserFree(parser);
file.close();
return false;
}
done = file.available() == 0;
if (XML_ParseBuffer(parser, static_cast<int>(len), done) == XML_STATUS_ERROR) {
LOG_ERR("EHP", "Parse error at line %lu:\n%s", XML_GetCurrentLineNumber(parser),
XML_ErrorString(XML_GetErrorCode(parser)));
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(parser, nullptr);
XML_ParserFree(parser);
file.close();
return false;
}
} while (!done);
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(parser, nullptr);
XML_ParserFree(parser);
file.close();
// Process last page if there is still text
if (currentTextBlock) {
makePages();
completePageFn(std::move(currentPage));
currentPage.reset();
currentTextBlock.reset();
}
LOG_DBG("EHP", "Chapter parsed in %lu ms", millis() - chapterStartTime);
return true;
}
void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr<TextBlock> line) {
const int lineHeight = renderer.getLineHeight(fontId) * lineCompression;
if (currentPageNextY + lineHeight > viewportHeight) {
completePageFn(std::move(currentPage));
currentPage.reset(new Page());
currentPageNextY = 0;
}
// Apply horizontal left inset (margin + padding) as x position offset
const int16_t xOffset = line->getBlockStyle().leftInset();
currentPage->elements.push_back(std::make_shared<PageLine>(line, xOffset, currentPageNextY));
currentPageNextY += lineHeight;
}
void ChapterHtmlSlimParser::makePages() {
if (!currentTextBlock) {
LOG_ERR("EHP", "!! No text block to make pages for !!");
return;
}
if (!currentPage) {
currentPage.reset(new Page());
currentPageNextY = 0;
}
const int lineHeight = renderer.getLineHeight(fontId) * lineCompression;
// Apply top spacing before the paragraph (stored in pixels)
const BlockStyle& blockStyle = currentTextBlock->getBlockStyle();
if (blockStyle.marginTop > 0) {
currentPageNextY += blockStyle.marginTop;
}
if (blockStyle.paddingTop > 0) {
currentPageNextY += blockStyle.paddingTop;
}
// Calculate effective width accounting for horizontal margins/padding
const int horizontalInset = blockStyle.totalHorizontalInset();
const uint16_t effectiveWidth =
(horizontalInset < viewportWidth) ? static_cast<uint16_t>(viewportWidth - horizontalInset) : viewportWidth;
currentTextBlock->layoutAndExtractLines(
renderer, fontId, effectiveWidth,
[this](const std::shared_ptr<TextBlock>& textBlock) { addLineToPage(textBlock); });
// Apply bottom spacing after the paragraph (stored in pixels)
if (blockStyle.marginBottom > 0) {
currentPageNextY += blockStyle.marginBottom;
}
if (blockStyle.paddingBottom > 0) {
currentPageNextY += blockStyle.paddingBottom;
}
// Extra paragraph spacing if enabled (default behavior)
if (extraParagraphSpacing) {
currentPageNextY += lineHeight / 2;
}
}
// ---------------------------------------------------------------------------
// Table processing
// ---------------------------------------------------------------------------
// Cell padding in pixels (horizontal space between grid line and cell text)
static constexpr int TABLE_CELL_PAD_X = 4;
// Vertical cell padding — asymmetric because font metrics include internal leading (whitespace
// above glyphs), so the top already has built-in visual space. Less explicit padding on top,
// more on bottom, produces visually balanced results.
static constexpr int TABLE_CELL_PAD_TOP = 1;
static constexpr int TABLE_CELL_PAD_BOTTOM = 3;
// Minimum usable column width in pixels (below this text is unreadable)
static constexpr int TABLE_MIN_COL_WIDTH = 30;
// Grid line width in pixels
static constexpr int TABLE_GRID_LINE_PX = 1;
void ChapterHtmlSlimParser::addTableRowToPage(std::shared_ptr<PageTableRow> row) {
if (!currentPage) {
currentPage.reset(new Page());
currentPageNextY = 0;
}
const int16_t rowH = row->getHeight();
// If this row doesn't fit on the current page, start a new one
if (currentPageNextY + rowH > viewportHeight) {
completePageFn(std::move(currentPage));
currentPage.reset(new Page());
currentPageNextY = 0;
}
row->xPos = 0;
row->yPos = currentPageNextY;
currentPage->elements.push_back(std::move(row));
currentPageNextY += rowH;
}
void ChapterHtmlSlimParser::processTable() {
if (!tableData || tableData->rows.empty()) {
return;
}
if (!currentPage) {
currentPage.reset(new Page());
currentPageNextY = 0;
}
const int lh = static_cast<int>(renderer.getLineHeight(fontId) * lineCompression);
// 1. Determine logical column count using colspan.
// Each cell occupies cell.colspan logical columns. The total for a row is the sum of colspans.
size_t numCols = 0;
for (const auto& row : tableData->rows) {
size_t rowLogicalCols = 0;
for (const auto& cell : row.cells) {
rowLogicalCols += static_cast<size_t>(cell.colspan);
}
numCols = std::max(numCols, rowLogicalCols);
}
if (numCols == 0) {
return;
}
// 2. Measure natural width of each cell and compute per-column max natural width.
// Only non-spanning cells (colspan==1) contribute to individual column widths.
// Spanning cells use the combined width of their spanned columns.
std::vector<uint16_t> colNaturalWidth(numCols, 0);
for (const auto& row : tableData->rows) {
size_t logicalCol = 0;
for (const auto& cell : row.cells) {
if (cell.colspan == 1 && cell.content && !cell.content->isEmpty()) {
if (logicalCol < numCols) {
const uint16_t w = cell.content->getNaturalWidth(renderer, fontId);
if (w > colNaturalWidth[logicalCol]) {
colNaturalWidth[logicalCol] = w;
}
}
}
logicalCol += static_cast<size_t>(cell.colspan);
}
}
// 3. Calculate column widths to fit viewport.
// Available width = viewport - outer borders - internal column borders - cell padding
const int totalGridLines = static_cast<int>(numCols) + 1; // left + between columns + right
const int totalPadding = static_cast<int>(numCols) * TABLE_CELL_PAD_X * 2;
const int availableForContent = viewportWidth - totalGridLines * TABLE_GRID_LINE_PX - totalPadding;
// 3a. Resolve width hints per column.
// Priority: <col> hints > max cell hint (colspan=1 only).
// Percentages are relative to availableForContent.
const float emSize = static_cast<float>(lh);
const float containerW = static_cast<float>(std::max(availableForContent, 0));
std::vector<int> colHintedWidth(numCols, -1); // -1 = no hint
// From <col> tags
for (size_t c = 0; c < numCols && c < tableData->colWidthHints.size(); ++c) {
const auto& hint = tableData->colWidthHints[c];
if (hint.value > 0) {
int px = static_cast<int>(hint.toPixels(emSize, containerW));
if (px > 0) {
colHintedWidth[c] = std::max(px, TABLE_MIN_COL_WIDTH);
}
}
}
// From <td>/<th> cell width hints (only override if no <col> hint exists for this column)
for (const auto& row : tableData->rows) {
size_t logicalCol = 0;
for (const auto& cell : row.cells) {
if (cell.colspan == 1 && cell.hasWidthHint && logicalCol < numCols) {
if (colHintedWidth[logicalCol] < 0) { // no <col> hint yet
int px = static_cast<int>(cell.widthHint.toPixels(emSize, containerW));
if (px > colHintedWidth[logicalCol]) {
colHintedWidth[logicalCol] = std::max(px, TABLE_MIN_COL_WIDTH);
}
}
}
logicalCol += static_cast<size_t>(cell.colspan);
}
}
// 3b. Distribute column widths: hinted columns get their hint, unhinted use auto-sizing.
std::vector<uint16_t> colWidths(numCols, 0);
if (availableForContent <= 0) {
const uint16_t equalWidth = static_cast<uint16_t>(viewportWidth / numCols);
for (size_t c = 0; c < numCols; ++c) {
colWidths[c] = equalWidth;
}
} else {
// First, assign hinted columns and track how much space they consume
int hintedSpaceUsed = 0;
size_t unhintedCount = 0;
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] > 0) {
hintedSpaceUsed += colHintedWidth[c];
} else {
unhintedCount++;
}
}
// If hinted columns exceed available space, scale them down proportionally
if (hintedSpaceUsed > availableForContent && hintedSpaceUsed > 0) {
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] > 0) {
colHintedWidth[c] = colHintedWidth[c] * availableForContent / hintedSpaceUsed;
colHintedWidth[c] = std::max(colHintedWidth[c], TABLE_MIN_COL_WIDTH);
}
}
// Recalculate
hintedSpaceUsed = 0;
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] > 0) {
hintedSpaceUsed += colHintedWidth[c];
}
}
}
// Assign hinted columns
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] > 0) {
colWidths[c] = static_cast<uint16_t>(colHintedWidth[c]);
}
}
// Distribute remaining space among unhinted columns using the existing algorithm
const int remainingForUnhinted = std::max(availableForContent - hintedSpaceUsed, 0);
if (unhintedCount > 0 && remainingForUnhinted > 0) {
// Compute total natural width of unhinted columns
int totalNaturalUnhinted = 0;
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] <= 0) {
totalNaturalUnhinted += colNaturalWidth[c];
}
}
if (totalNaturalUnhinted <= remainingForUnhinted) {
// All unhinted content fits — distribute extra space equally among unhinted columns
const int extraSpace = remainingForUnhinted - totalNaturalUnhinted;
const int perColExtra = extraSpace / static_cast<int>(unhintedCount);
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] <= 0) {
colWidths[c] = static_cast<uint16_t>(colNaturalWidth[c] + perColExtra);
}
}
} else {
// Unhinted content exceeds remaining space — two-pass fair-share among unhinted columns
const int equalShare = remainingForUnhinted / static_cast<int>(unhintedCount);
int spaceUsedByFitting = 0;
int naturalOfWide = 0;
size_t wideCount = 0;
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] <= 0) {
if (static_cast<int>(colNaturalWidth[c]) <= equalShare) {
colWidths[c] = colNaturalWidth[c];
spaceUsedByFitting += colNaturalWidth[c];
} else {
naturalOfWide += colNaturalWidth[c];
wideCount++;
}
}
}
const int wideSpace = remainingForUnhinted - spaceUsedByFitting;
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] <= 0 && static_cast<int>(colNaturalWidth[c]) > equalShare) {
if (naturalOfWide > 0 && wideCount > 1) {
int proportional = static_cast<int>(colNaturalWidth[c]) * wideSpace / naturalOfWide;
colWidths[c] = static_cast<uint16_t>(std::max(proportional, TABLE_MIN_COL_WIDTH));
} else {
colWidths[c] = static_cast<uint16_t>(std::max(wideSpace, TABLE_MIN_COL_WIDTH));
}
}
}
}
} else if (unhintedCount > 0) {
// No remaining space for unhinted columns — give them minimum width
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] <= 0) {
colWidths[c] = static_cast<uint16_t>(TABLE_MIN_COL_WIDTH);
}
}
}
}
// Compute column x-offsets (cumulative: border + padding + content width + padding + border ...)
std::vector<uint16_t> colXOffsets(numCols, 0);
int xAccum = TABLE_GRID_LINE_PX; // start after left border
for (size_t c = 0; c < numCols; ++c) {
colXOffsets[c] = static_cast<uint16_t>(xAccum);
xAccum += TABLE_CELL_PAD_X + colWidths[c] + TABLE_CELL_PAD_X + TABLE_GRID_LINE_PX;
}
const int16_t totalTableWidth = static_cast<int16_t>(xAccum);
// Helper: compute the combined content width for a cell spanning multiple columns.
// This includes the content widths plus the internal grid lines and padding between spanned columns.
auto spanContentWidth = [&](size_t startCol, int colspan) -> uint16_t {
int width = 0;
for (int s = 0; s < colspan && startCol + s < numCols; ++s) {
width += colWidths[startCol + s];
if (s > 0) {
// Add internal padding and grid line between spanned columns
width += TABLE_CELL_PAD_X * 2 + TABLE_GRID_LINE_PX;
}
}
return static_cast<uint16_t>(std::max(width, 0));
};
// Helper: compute the full cell width (including padding on both sides) for a spanning cell.
auto spanFullCellWidth = [&](size_t startCol, int colspan) -> uint16_t {
if (colspan <= 0 || startCol >= numCols) return 0;
const size_t endCol = std::min(startCol + static_cast<size_t>(colspan), numCols) - 1;
// From the left edge of startCol's cell to the right edge of endCol's cell
const int leftEdge = colXOffsets[startCol];
const int rightEdge = colXOffsets[endCol] + TABLE_CELL_PAD_X + colWidths[endCol] + TABLE_CELL_PAD_X;
return static_cast<uint16_t>(rightEdge - leftEdge);
};
// 4. Lay out each row: map cells to logical columns, create PageTableRow
for (auto& row : tableData->rows) {
// Build cell data for this row, one entry per CELL (not per logical column).
// Each PageTableCellData gets the correct x-offset and combined column width.
std::vector<PageTableCellData> cellDataVec;
size_t maxLinesInRow = 1;
size_t logicalCol = 0;
for (size_t ci = 0; ci < row.cells.size() && logicalCol < numCols; ++ci) {
auto& cell = row.cells[ci];
const int cs = cell.colspan;
PageTableCellData cellData;
cellData.xOffset = colXOffsets[logicalCol];
cellData.columnWidth = spanFullCellWidth(logicalCol, cs);
if (cell.content && !cell.content->isEmpty()) {
// Center-align cells that span the full table width (common for section headers/titles)
if (cs >= static_cast<int>(numCols)) {
BlockStyle centeredStyle = cell.content->getBlockStyle();
centeredStyle.alignment = CssTextAlign::Center;
centeredStyle.textAlignDefined = true;
cell.content->setBlockStyle(centeredStyle);
}
const uint16_t contentWidth = spanContentWidth(logicalCol, cs);
std::vector<std::shared_ptr<TextBlock>> cellLines;
cell.content->layoutAndExtractLines(
renderer, fontId, contentWidth,
[&cellLines](const std::shared_ptr<TextBlock>& textBlock) { cellLines.push_back(textBlock); });
if (cellLines.size() > maxLinesInRow) {
maxLinesInRow = cellLines.size();
}
cellData.lines = std::move(cellLines);
}
cellDataVec.push_back(std::move(cellData));
logicalCol += static_cast<size_t>(cs);
}
// Fill remaining logical columns with empty cells (rows shorter than numCols)
while (logicalCol < numCols) {
PageTableCellData emptyCell;
emptyCell.xOffset = colXOffsets[logicalCol];
emptyCell.columnWidth = static_cast<uint16_t>(TABLE_CELL_PAD_X + colWidths[logicalCol] + TABLE_CELL_PAD_X);
cellDataVec.push_back(std::move(emptyCell));
logicalCol++;
}
// Row height = max lines * lineHeight + top/bottom border + asymmetric vertical padding
const int16_t rowHeight =
static_cast<int16_t>(static_cast<int>(maxLinesInRow) * lh + 2 + TABLE_CELL_PAD_TOP + TABLE_CELL_PAD_BOTTOM);
auto pageTableRow = std::make_shared<PageTableRow>(std::move(cellDataVec), rowHeight, totalTableWidth,
static_cast<int16_t>(lh), 0, 0);
addTableRowToPage(std::move(pageTableRow));
}
// Add a small gap after the table
if (extraParagraphSpacing) {
currentPageNextY += lh / 2;
}
}