feat: Add column-aligned table rendering for EPUBs

Replace the "[Table omitted]" placeholder with full table rendering:

- Two-pass layout: buffer table content during SAX parsing, then
  calculate column widths and lay out cells after </table> closes
- Colspan support for cells spanning multiple columns
- Forced line breaks within cells (<br>, <p>, <div> etc.)
- Center-align full-width spanning rows (section headers/titles)
- Width hints from HTML attributes and CSS (col, td, th width)
- Two-pass fair-share column width distribution that prevents
  narrow columns from being excessively squeezed
- Double-encoded &nbsp; entity handling
- PageTableRow with grid-line rendering and serialization support
- Asymmetric vertical cell padding to balance font leading

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
cottongin
2026-02-15 14:40:36 -05:00
parent 1383d75c84
commit 3096d6066b
9 changed files with 895 additions and 20 deletions

View File

@@ -1,8 +1,17 @@
#include "Page.h" #include "Page.h"
#include <GfxRenderer.h>
#include <Logging.h> #include <Logging.h>
#include <Serialization.h> #include <Serialization.h>
// Cell padding in pixels (must match TABLE_CELL_PAD_* in ChapterHtmlSlimParser.cpp)
static constexpr int TABLE_CELL_PADDING_X = 4;
static constexpr int TABLE_CELL_PADDING_TOP = 1;
// ---------------------------------------------------------------------------
// PageLine
// ---------------------------------------------------------------------------
void PageLine::render(GfxRenderer& renderer, const int fontId, const int xOffset, const int yOffset) { void PageLine::render(GfxRenderer& renderer, const int fontId, const int xOffset, const int yOffset) {
block->render(renderer, fontId, xPos + xOffset, yPos + yOffset); block->render(renderer, fontId, xPos + xOffset, yPos + yOffset);
} }
@@ -25,6 +34,115 @@ std::unique_ptr<PageLine> PageLine::deserialize(FsFile& file) {
return std::unique_ptr<PageLine>(new PageLine(std::move(tb), xPos, yPos)); return std::unique_ptr<PageLine>(new PageLine(std::move(tb), xPos, yPos));
} }
// ---------------------------------------------------------------------------
// PageTableRow
// ---------------------------------------------------------------------------
void PageTableRow::render(GfxRenderer& renderer, const int fontId, const int xOffset, const int yOffset) {
const int baseX = xPos + xOffset;
const int baseY = yPos + yOffset;
// Draw horizontal borders (top and bottom of this row)
renderer.drawLine(baseX, baseY, baseX + totalWidth, baseY);
renderer.drawLine(baseX, baseY + rowHeight, baseX + totalWidth, baseY + rowHeight);
// Draw vertical borders and render cell contents
// Left edge
renderer.drawLine(baseX, baseY, baseX, baseY + rowHeight);
for (const auto& cell : cells) {
// Right vertical border for this cell
const int cellRightX = baseX + cell.xOffset + cell.columnWidth;
renderer.drawLine(cellRightX, baseY, cellRightX, baseY + rowHeight);
// Render each text line within the cell
const int cellTextX = baseX + cell.xOffset + TABLE_CELL_PADDING_X;
int cellLineY = baseY + 1 + TABLE_CELL_PADDING_TOP; // 1px border + top padding
for (const auto& line : cell.lines) {
line->render(renderer, fontId, cellTextX, cellLineY);
cellLineY += lineHeight;
}
}
}
bool PageTableRow::serialize(FsFile& file) {
serialization::writePod(file, xPos);
serialization::writePod(file, yPos);
serialization::writePod(file, rowHeight);
serialization::writePod(file, totalWidth);
serialization::writePod(file, lineHeight);
const uint16_t cellCount = static_cast<uint16_t>(cells.size());
serialization::writePod(file, cellCount);
for (const auto& cell : cells) {
serialization::writePod(file, cell.xOffset);
serialization::writePod(file, cell.columnWidth);
const uint16_t lineCount = static_cast<uint16_t>(cell.lines.size());
serialization::writePod(file, lineCount);
for (const auto& line : cell.lines) {
if (!line->serialize(file)) {
return false;
}
}
}
return true;
}
std::unique_ptr<PageTableRow> PageTableRow::deserialize(FsFile& file) {
int16_t xPos, yPos, rowHeight, totalWidth, lineHeight;
serialization::readPod(file, xPos);
serialization::readPod(file, yPos);
serialization::readPod(file, rowHeight);
serialization::readPod(file, totalWidth);
serialization::readPod(file, lineHeight);
uint16_t cellCount;
serialization::readPod(file, cellCount);
// Sanity check
if (cellCount > 100) {
LOG_ERR("PTR", "Deserialization failed: cell count %u exceeds maximum", cellCount);
return nullptr;
}
std::vector<PageTableCellData> cells;
cells.resize(cellCount);
for (uint16_t c = 0; c < cellCount; ++c) {
serialization::readPod(file, cells[c].xOffset);
serialization::readPod(file, cells[c].columnWidth);
uint16_t lineCount;
serialization::readPod(file, lineCount);
if (lineCount > 1000) {
LOG_ERR("PTR", "Deserialization failed: line count %u in cell %u exceeds maximum", lineCount, c);
return nullptr;
}
cells[c].lines.reserve(lineCount);
for (uint16_t l = 0; l < lineCount; ++l) {
auto tb = TextBlock::deserialize(file);
if (!tb) {
return nullptr;
}
cells[c].lines.push_back(std::move(tb));
}
}
return std::unique_ptr<PageTableRow>(
new PageTableRow(std::move(cells), rowHeight, totalWidth, lineHeight, xPos, yPos));
}
// ---------------------------------------------------------------------------
// Page
// ---------------------------------------------------------------------------
void Page::render(GfxRenderer& renderer, const int fontId, const int xOffset, const int yOffset) const { void Page::render(GfxRenderer& renderer, const int fontId, const int xOffset, const int yOffset) const {
for (auto& element : elements) { for (auto& element : elements) {
element->render(renderer, fontId, xOffset, yOffset); element->render(renderer, fontId, xOffset, yOffset);
@@ -36,8 +154,7 @@ bool Page::serialize(FsFile& file) const {
serialization::writePod(file, count); serialization::writePod(file, count);
for (const auto& el : elements) { for (const auto& el : elements) {
// Only PageLine exists currently serialization::writePod(file, static_cast<uint8_t>(el->getTag()));
serialization::writePod(file, static_cast<uint8_t>(TAG_PageLine));
if (!el->serialize(file)) { if (!el->serialize(file)) {
return false; return false;
} }
@@ -59,6 +176,13 @@ std::unique_ptr<Page> Page::deserialize(FsFile& file) {
if (tag == TAG_PageLine) { if (tag == TAG_PageLine) {
auto pl = PageLine::deserialize(file); auto pl = PageLine::deserialize(file);
page->elements.push_back(std::move(pl)); page->elements.push_back(std::move(pl));
} else if (tag == TAG_PageTableRow) {
auto tr = PageTableRow::deserialize(file);
if (!tr) {
LOG_ERR("PGE", "Deserialization failed for PageTableRow at element %u", i);
return nullptr;
}
page->elements.push_back(std::move(tr));
} else { } else {
LOG_ERR("PGE", "Deserialization failed: Unknown tag %u", tag); LOG_ERR("PGE", "Deserialization failed: Unknown tag %u", tag);
return nullptr; return nullptr;

View File

@@ -8,6 +8,7 @@
enum PageElementTag : uint8_t { enum PageElementTag : uint8_t {
TAG_PageLine = 1, TAG_PageLine = 1,
TAG_PageTableRow = 2,
}; };
// represents something that has been added to a page // represents something that has been added to a page
@@ -17,6 +18,7 @@ class PageElement {
int16_t yPos; int16_t yPos;
explicit PageElement(const int16_t xPos, const int16_t yPos) : xPos(xPos), yPos(yPos) {} explicit PageElement(const int16_t xPos, const int16_t yPos) : xPos(xPos), yPos(yPos) {}
virtual ~PageElement() = default; virtual ~PageElement() = default;
virtual PageElementTag getTag() const = 0;
virtual void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) = 0; virtual void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) = 0;
virtual bool serialize(FsFile& file) = 0; virtual bool serialize(FsFile& file) = 0;
}; };
@@ -29,11 +31,42 @@ class PageLine final : public PageElement {
PageLine(std::shared_ptr<TextBlock> block, const int16_t xPos, const int16_t yPos) PageLine(std::shared_ptr<TextBlock> block, const int16_t xPos, const int16_t yPos)
: PageElement(xPos, yPos), block(std::move(block)) {} : PageElement(xPos, yPos), block(std::move(block)) {}
const std::shared_ptr<TextBlock>& getBlock() const { return block; } const std::shared_ptr<TextBlock>& getBlock() const { return block; }
PageElementTag getTag() const override { return TAG_PageLine; }
void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) override; void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) override;
bool serialize(FsFile& file) override; bool serialize(FsFile& file) override;
static std::unique_ptr<PageLine> deserialize(FsFile& file); static std::unique_ptr<PageLine> deserialize(FsFile& file);
}; };
/// Data for a single cell within a PageTableRow.
struct PageTableCellData {
std::vector<std::shared_ptr<TextBlock>> lines; // Laid-out text lines for this cell
uint16_t columnWidth = 0; // Width of this column in pixels
uint16_t xOffset = 0; // X offset of this cell within the row
};
/// A table row element that renders cells in a column-aligned grid with borders.
class PageTableRow final : public PageElement {
std::vector<PageTableCellData> cells;
int16_t rowHeight; // Total row height in pixels
int16_t totalWidth; // Total table width in pixels
int16_t lineHeight; // Height of one text line (for vertical positioning of cell lines)
public:
PageTableRow(std::vector<PageTableCellData> cells, int16_t rowHeight, int16_t totalWidth, int16_t lineHeight,
int16_t xPos, int16_t yPos)
: PageElement(xPos, yPos),
cells(std::move(cells)),
rowHeight(rowHeight),
totalWidth(totalWidth),
lineHeight(lineHeight) {}
int16_t getHeight() const { return rowHeight; }
PageElementTag getTag() const override { return TAG_PageTableRow; }
void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) override;
bool serialize(FsFile& file) override;
static std::unique_ptr<PageTableRow> deserialize(FsFile& file);
};
class Page { class Page {
public: public:
// the list of block index and line numbers on this page // the list of block index and line numbers on this page

View File

@@ -62,6 +62,13 @@ void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle,
} }
wordStyles.push_back(combinedStyle); wordStyles.push_back(combinedStyle);
wordContinues.push_back(attachToPrevious); wordContinues.push_back(attachToPrevious);
forceBreakAfter.push_back(false);
}
void ParsedText::addLineBreak() {
if (!words.empty()) {
forceBreakAfter.back() = true;
}
} }
// Consumes data to minimize memory usage // Consumes data to minimize memory usage
@@ -148,6 +155,11 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
const int effectivePageWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth; const int effectivePageWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth;
for (size_t j = i; j < totalWordCount; ++j) { for (size_t j = i; j < totalWordCount; ++j) {
// If the previous word has a forced line break, this line cannot include word j
if (j > static_cast<size_t>(i) && !forceBreakAfter.empty() && forceBreakAfter[j - 1]) {
break;
}
// Add space before word j, unless it's the first word on the line or a continuation // Add space before word j, unless it's the first word on the line or a continuation
const int gap = j > static_cast<size_t>(i) && !continuesVec[j] ? spaceWidth : 0; const int gap = j > static_cast<size_t>(i) && !continuesVec[j] ? spaceWidth : 0;
currlen += wordWidths[j] + gap; currlen += wordWidths[j] + gap;
@@ -156,8 +168,11 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
break; break;
} }
// Cannot break after word j if the next word attaches to it (continuation group) // Forced line break after word j overrides continuation (must end line here)
if (j + 1 < totalWordCount && continuesVec[j + 1]) { const bool mustBreakHere = !forceBreakAfter.empty() && forceBreakAfter[j];
// Cannot break after word j if the next word attaches to it (unless forced)
if (!mustBreakHere && j + 1 < totalWordCount && continuesVec[j + 1]) {
continue; continue;
} }
@@ -180,6 +195,11 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
dp[i] = cost; dp[i] = cost;
ans[i] = j; // j is the index of the last word in this optimal line ans[i] = j; // j is the index of the last word in this optimal line
} }
// After evaluating cost, enforce forced break - no more words on this line
if (mustBreakHere) {
break;
}
} }
// Handle oversized word: if no valid configuration found, force single-word line // Handle oversized word: if no valid configuration found, force single-word line
@@ -254,6 +274,11 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
// Consume as many words as possible for current line, splitting when prefixes fit // Consume as many words as possible for current line, splitting when prefixes fit
while (currentIndex < wordWidths.size()) { while (currentIndex < wordWidths.size()) {
// If the previous word has a forced line break, stop - this word starts a new line
if (currentIndex > lineStart && !forceBreakAfter.empty() && forceBreakAfter[currentIndex - 1]) {
break;
}
const bool isFirstWord = currentIndex == lineStart; const bool isFirstWord = currentIndex == lineStart;
const int spacing = isFirstWord || continuesVec[currentIndex] ? 0 : spaceWidth; const int spacing = isFirstWord || continuesVec[currentIndex] ? 0 : spaceWidth;
const int candidateWidth = spacing + wordWidths[currentIndex]; const int candidateWidth = spacing + wordWidths[currentIndex];
@@ -262,6 +287,11 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
if (lineWidth + candidateWidth <= effectivePageWidth) { if (lineWidth + candidateWidth <= effectivePageWidth) {
lineWidth += candidateWidth; lineWidth += candidateWidth;
++currentIndex; ++currentIndex;
// If the word we just added has a forced break, end this line now
if (!forceBreakAfter.empty() && forceBreakAfter[currentIndex - 1]) {
break;
}
continue; continue;
} }
@@ -287,7 +317,12 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
// Don't break before a continuation word (e.g., orphaned "?" after "question"). // Don't break before a continuation word (e.g., orphaned "?" after "question").
// Backtrack to the start of the continuation group so the whole group moves to the next line. // Backtrack to the start of the continuation group so the whole group moves to the next line.
// But don't backtrack past a forced break point.
while (currentIndex > lineStart + 1 && currentIndex < wordWidths.size() && continuesVec[currentIndex]) { while (currentIndex > lineStart + 1 && currentIndex < wordWidths.size() && continuesVec[currentIndex]) {
// Don't backtrack past a forced break
if (!forceBreakAfter.empty() && forceBreakAfter[currentIndex - 1]) {
break;
}
--currentIndex; --currentIndex;
} }
@@ -361,6 +396,13 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
wordContinues[wordIndex] = false; wordContinues[wordIndex] = false;
wordContinues.insert(wordContinues.begin() + wordIndex + 1, originalContinuedToNext); wordContinues.insert(wordContinues.begin() + wordIndex + 1, originalContinuedToNext);
// Forced break belongs to the original whole word; transfer it to the remainder (last part).
if (!forceBreakAfter.empty()) {
const bool originalForceBreak = forceBreakAfter[wordIndex];
forceBreakAfter[wordIndex] = false; // prefix doesn't force break
forceBreakAfter.insert(forceBreakAfter.begin() + wordIndex + 1, originalForceBreak);
}
// Update cached widths to reflect the new prefix/remainder pairing. // Update cached widths to reflect the new prefix/remainder pairing.
wordWidths[wordIndex] = static_cast<uint16_t>(chosenWidth); wordWidths[wordIndex] = static_cast<uint16_t>(chosenWidth);
const uint16_t remainderWidth = measureWordWidth(renderer, fontId, remainder, style); const uint16_t remainderWidth = measureWordWidth(renderer, fontId, remainder, style);
@@ -447,3 +489,22 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
processLine( processLine(
std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), blockStyle)); std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), blockStyle));
} }
uint16_t ParsedText::getNaturalWidth(const GfxRenderer& renderer, const int fontId) const {
if (words.empty()) {
return 0;
}
const int spaceWidth = renderer.getSpaceWidth(fontId);
int totalWidth = 0;
for (size_t i = 0; i < words.size(); ++i) {
totalWidth += measureWordWidth(renderer, fontId, words[i], wordStyles[i]);
// Add a space before this word unless it's the first word or a continuation
if (i > 0 && !wordContinues[i]) {
totalWidth += spaceWidth;
}
}
return static_cast<uint16_t>(std::min(totalWidth, static_cast<int>(UINT16_MAX)));
}

View File

@@ -15,7 +15,8 @@ class GfxRenderer;
class ParsedText { class ParsedText {
std::vector<std::string> words; std::vector<std::string> words;
std::vector<EpdFontFamily::Style> wordStyles; std::vector<EpdFontFamily::Style> wordStyles;
std::vector<bool> wordContinues; // true = word attaches to previous (no space before it) std::vector<bool> wordContinues; // true = word attaches to previous (no space before it)
std::vector<bool> forceBreakAfter; // true = mandatory line break after this word (e.g. <br> in table cells)
BlockStyle blockStyle; BlockStyle blockStyle;
bool extraParagraphSpacing; bool extraParagraphSpacing;
bool hyphenationEnabled; bool hyphenationEnabled;
@@ -40,6 +41,10 @@ class ParsedText {
~ParsedText() = default; ~ParsedText() = default;
void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false, bool attachToPrevious = false); void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false, bool attachToPrevious = false);
/// Mark a forced line break after the last word (e.g. for <br> within table cells).
/// If no words have been added yet, this is a no-op.
void addLineBreak();
void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; }
BlockStyle& getBlockStyle() { return blockStyle; } BlockStyle& getBlockStyle() { return blockStyle; }
size_t size() const { return words.size(); } size_t size() const { return words.size(); }
@@ -47,4 +52,9 @@ class ParsedText {
void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth, void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth,
const std::function<void(std::shared_ptr<TextBlock>)>& processLine, const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
bool includeLastLine = true); bool includeLastLine = true);
/// Returns the "natural" width of the content if it were laid out on a single line
/// (sum of word widths + space widths between non-continuation words).
/// Used by table layout to determine column widths before line-breaking.
uint16_t getNaturalWidth(const GfxRenderer& renderer, int fontId) const;
}; };

29
lib/Epub/Epub/TableData.h Normal file
View File

@@ -0,0 +1,29 @@
#pragma once
#include <memory>
#include <vector>
#include "ParsedText.h"
#include "css/CssStyle.h"
/// A single cell in a table row.
struct TableCell {
std::unique_ptr<ParsedText> content;
bool isHeader = false; // true for <th>, false for <td>
int colspan = 1; // number of logical columns this cell spans
CssLength widthHint; // width hint from HTML attribute or CSS (if hasWidthHint)
bool hasWidthHint = false;
};
/// A single row in a table.
struct TableRow {
std::vector<TableCell> cells;
};
/// Buffered table data collected during SAX parsing.
/// The entire table must be buffered before layout because column widths
/// depend on content across all rows.
struct TableData {
std::vector<TableRow> rows;
std::vector<CssLength> colWidthHints; // width hints from <col> tags, indexed by logical column
};

View File

@@ -413,6 +413,9 @@ CssStyle CssParser::parseDeclarations(const std::string& declBlock) {
style.defined.paddingTop = style.defined.paddingRight = style.defined.paddingBottom = style.defined.paddingTop = style.defined.paddingRight = style.defined.paddingBottom =
style.defined.paddingLeft = 1; style.defined.paddingLeft = 1;
} }
} else if (propName == "width") {
style.width = interpretLength(propValue);
style.defined.width = 1;
} }
} }

View File

@@ -69,6 +69,7 @@ struct CssPropertyFlags {
uint16_t paddingBottom : 1; uint16_t paddingBottom : 1;
uint16_t paddingLeft : 1; uint16_t paddingLeft : 1;
uint16_t paddingRight : 1; uint16_t paddingRight : 1;
uint16_t width : 1;
CssPropertyFlags() CssPropertyFlags()
: textAlign(0), : textAlign(0),
@@ -83,17 +84,19 @@ struct CssPropertyFlags {
paddingTop(0), paddingTop(0),
paddingBottom(0), paddingBottom(0),
paddingLeft(0), paddingLeft(0),
paddingRight(0) {} paddingRight(0),
width(0) {}
[[nodiscard]] bool anySet() const { [[nodiscard]] bool anySet() const {
return textAlign || fontStyle || fontWeight || textDecoration || textIndent || marginTop || marginBottom || return textAlign || fontStyle || fontWeight || textDecoration || textIndent || marginTop || marginBottom ||
marginLeft || marginRight || paddingTop || paddingBottom || paddingLeft || paddingRight; marginLeft || marginRight || paddingTop || paddingBottom || paddingLeft || paddingRight || width;
} }
void clearAll() { void clearAll() {
textAlign = fontStyle = fontWeight = textDecoration = textIndent = 0; textAlign = fontStyle = fontWeight = textDecoration = textIndent = 0;
marginTop = marginBottom = marginLeft = marginRight = 0; marginTop = marginBottom = marginLeft = marginRight = 0;
paddingTop = paddingBottom = paddingLeft = paddingRight = 0; paddingTop = paddingBottom = paddingLeft = paddingRight = 0;
width = 0;
} }
}; };
@@ -115,6 +118,7 @@ struct CssStyle {
CssLength paddingBottom; // Padding after CssLength paddingBottom; // Padding after
CssLength paddingLeft; // Padding left CssLength paddingLeft; // Padding left
CssLength paddingRight; // Padding right CssLength paddingRight; // Padding right
CssLength width; // Element width (used for table columns/cells)
CssPropertyFlags defined; // Tracks which properties were explicitly set CssPropertyFlags defined; // Tracks which properties were explicitly set
@@ -173,6 +177,10 @@ struct CssStyle {
paddingRight = base.paddingRight; paddingRight = base.paddingRight;
defined.paddingRight = 1; defined.paddingRight = 1;
} }
if (base.hasWidth()) {
width = base.width;
defined.width = 1;
}
} }
[[nodiscard]] bool hasTextAlign() const { return defined.textAlign; } [[nodiscard]] bool hasTextAlign() const { return defined.textAlign; }
@@ -188,6 +196,7 @@ struct CssStyle {
[[nodiscard]] bool hasPaddingBottom() const { return defined.paddingBottom; } [[nodiscard]] bool hasPaddingBottom() const { return defined.paddingBottom; }
[[nodiscard]] bool hasPaddingLeft() const { return defined.paddingLeft; } [[nodiscard]] bool hasPaddingLeft() const { return defined.paddingLeft; }
[[nodiscard]] bool hasPaddingRight() const { return defined.paddingRight; } [[nodiscard]] bool hasPaddingRight() const { return defined.paddingRight; }
[[nodiscard]] bool hasWidth() const { return defined.width; }
void reset() { void reset() {
textAlign = CssTextAlign::Left; textAlign = CssTextAlign::Left;
@@ -197,6 +206,7 @@ struct CssStyle {
textIndent = CssLength{}; textIndent = CssLength{};
marginTop = marginBottom = marginLeft = marginRight = CssLength{}; marginTop = marginBottom = marginLeft = marginRight = CssLength{};
paddingTop = paddingBottom = paddingLeft = paddingRight = CssLength{}; paddingTop = paddingBottom = paddingLeft = paddingRight = CssLength{};
width = CssLength{};
defined.clearAll(); defined.clearAll();
} }
}; };

View File

@@ -5,6 +5,8 @@
#include <Logging.h> #include <Logging.h>
#include <expat.h> #include <expat.h>
#include <algorithm>
#include "../Page.h" #include "../Page.h"
#include "../htmlEntities.h" #include "../htmlEntities.h"
@@ -32,8 +34,30 @@ constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
const char* SKIP_TAGS[] = {"head"}; const char* SKIP_TAGS[] = {"head"};
constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]); constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]);
// Table tags that are transparent containers (just depth tracking, no special handling)
const char* TABLE_TRANSPARENT_TAGS[] = {"thead", "tbody", "tfoot", "colgroup"};
constexpr int NUM_TABLE_TRANSPARENT_TAGS = sizeof(TABLE_TRANSPARENT_TAGS) / sizeof(TABLE_TRANSPARENT_TAGS[0]);
// Table tags to skip entirely (their children produce no useful output)
const char* TABLE_SKIP_TAGS[] = {"caption"};
constexpr int NUM_TABLE_SKIP_TAGS = sizeof(TABLE_SKIP_TAGS) / sizeof(TABLE_SKIP_TAGS[0]);
bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n' || c == '\t'; } bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n' || c == '\t'; }
// Parse an HTML width attribute value into a CssLength.
// "200" -> 200px, "50%" -> 50 percent. Returns false if the value can't be parsed.
static bool parseHtmlWidthAttr(const char* value, CssLength& out) {
char* end = nullptr;
const float num = strtof(value, &end);
if (end == value || num < 0) return false;
if (*end == '%') {
out = CssLength(num, CssUnit::Percent);
} else {
out = CssLength(num, CssUnit::Pixels);
}
return true;
}
// given the start and end of a tag, check to see if it matches a known tag // given the start and end of a tag, check to see if it matches a known tag
bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) { bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) {
for (int i = 0; i < possible_tag_count; i++) { for (int i = 0; i < possible_tag_count; i++) {
@@ -91,13 +115,37 @@ void ChapterHtmlSlimParser::flushPartWordBuffer() {
// flush the buffer // flush the buffer
partWordBuffer[partWordBufferIndex] = '\0'; partWordBuffer[partWordBufferIndex] = '\0';
currentTextBlock->addWord(partWordBuffer, fontStyle, false, nextWordContinues);
// Handle double-encoded &nbsp; entities (e.g. &amp;nbsp; in source -> literal "&nbsp;" after
// XML parsing). Common in Wikipedia and other generated EPUBs. Replace with a space so the text
// renders cleanly. The space stays within the word, preserving non-breaking behavior.
std::string flushedWord(partWordBuffer);
size_t entityPos = 0;
while ((entityPos = flushedWord.find("&nbsp;", entityPos)) != std::string::npos) {
flushedWord.replace(entityPos, 6, " ");
entityPos += 1;
}
currentTextBlock->addWord(flushedWord, fontStyle, false, nextWordContinues);
partWordBufferIndex = 0; partWordBufferIndex = 0;
nextWordContinues = false; nextWordContinues = false;
} }
// start a new text block if needed // start a new text block if needed
void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) { void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) {
// When inside a table cell, don't lay out to the page -- insert a forced line break
// within the cell's ParsedText so that block elements (p, div, br) create visual breaks.
if (inTable) {
if (partWordBufferIndex > 0) {
flushPartWordBuffer();
}
if (currentTextBlock && !currentTextBlock->isEmpty()) {
currentTextBlock->addLineBreak();
}
nextWordContinues = false;
return;
}
nextWordContinues = false; // New block = new paragraph, no continuation nextWordContinues = false; // New block = new paragraph, no continuation
if (currentTextBlock) { if (currentTextBlock) {
// already have a text block running and it is empty - just reuse it // already have a text block running and it is empty - just reuse it
@@ -140,21 +188,184 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
centeredBlockStyle.textAlignDefined = true; centeredBlockStyle.textAlignDefined = true;
centeredBlockStyle.alignment = CssTextAlign::Center; centeredBlockStyle.alignment = CssTextAlign::Center;
// Special handling for tables - show placeholder text instead of dropping silently // --- Table handling ---
if (strcmp(name, "table") == 0) { if (strcmp(name, "table") == 0) {
// Add placeholder text if (self->inTable) {
self->startNewTextBlock(centeredBlockStyle); // Nested table: skip it entirely for v1
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
// Flush any pending content before the table
if (self->currentTextBlock && !self->currentTextBlock->isEmpty()) {
self->makePages();
}
self->inTable = true;
self->tableData.reset(new TableData());
// Create a safe empty currentTextBlock so character data outside cells
// (e.g. whitespace between tags) doesn't crash
auto tableBlockStyle = BlockStyle();
tableBlockStyle.alignment = CssTextAlign::Left;
self->currentTextBlock.reset(new ParsedText(self->extraParagraphSpacing, self->hyphenationEnabled, tableBlockStyle));
self->italicUntilDepth = min(self->italicUntilDepth, self->depth);
// Advance depth before processing character data (like you would for an element with text)
self->depth += 1; self->depth += 1;
self->characterData(userData, "[Table omitted]", strlen("[Table omitted]"));
// Skip table contents (skip until parent as we pre-advanced depth above)
self->skipUntilDepth = self->depth - 1;
return; return;
} }
// Table structure tags (only when inside a table)
if (self->inTable) {
if (strcmp(name, "tr") == 0) {
self->tableData->rows.push_back(TableRow());
self->depth += 1;
return;
}
// <col> — capture width hint for column sizing
if (strcmp(name, "col") == 0) {
CssLength widthHint;
bool hasHint = false;
// Parse HTML width attribute
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "width") == 0) {
hasHint = parseHtmlWidthAttr(atts[i + 1], widthHint);
break;
}
}
}
// CSS width (inline style) overrides HTML attribute
if (self->cssParser) {
std::string styleAttr;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "style") == 0) {
styleAttr = atts[i + 1];
break;
}
}
}
if (!styleAttr.empty()) {
CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr);
if (inlineStyle.hasWidth()) {
widthHint = inlineStyle.width;
hasHint = true;
}
}
}
if (hasHint) {
self->tableData->colWidthHints.push_back(widthHint);
} else {
// Push a zero-value placeholder to maintain index alignment
self->tableData->colWidthHints.push_back(CssLength());
}
self->depth += 1;
return;
}
if (strcmp(name, "td") == 0 || strcmp(name, "th") == 0) {
const bool isHeader = strcmp(name, "th") == 0;
// Parse colspan and width attributes
int colspan = 1;
CssLength cellWidthHint;
bool hasCellWidthHint = false;
std::string cellStyleAttr;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "colspan") == 0) {
colspan = atoi(atts[i + 1]);
if (colspan < 1) colspan = 1;
} else if (strcmp(atts[i], "width") == 0) {
hasCellWidthHint = parseHtmlWidthAttr(atts[i + 1], cellWidthHint);
} else if (strcmp(atts[i], "style") == 0) {
cellStyleAttr = atts[i + 1];
}
}
}
// CSS width (inline style or stylesheet) overrides HTML attribute
if (self->cssParser) {
std::string classAttr;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "class") == 0) {
classAttr = atts[i + 1];
break;
}
}
}
CssStyle cellCssStyle = self->cssParser->resolveStyle(name, classAttr);
if (!cellStyleAttr.empty()) {
CssStyle inlineStyle = CssParser::parseInlineStyle(cellStyleAttr);
cellCssStyle.applyOver(inlineStyle);
}
if (cellCssStyle.hasWidth()) {
cellWidthHint = cellCssStyle.width;
hasCellWidthHint = true;
}
}
// Ensure there's a row to add cells to
if (self->tableData->rows.empty()) {
self->tableData->rows.push_back(TableRow());
}
// Create a new ParsedText for this cell (characterData will flow into it)
auto cellBlockStyle = BlockStyle();
cellBlockStyle.alignment = CssTextAlign::Left;
cellBlockStyle.textAlignDefined = true;
// Explicitly disable paragraph indent for table cells
cellBlockStyle.textIndent = 0;
cellBlockStyle.textIndentDefined = true;
self->currentTextBlock.reset(
new ParsedText(self->extraParagraphSpacing, self->hyphenationEnabled, cellBlockStyle));
self->nextWordContinues = false;
// Track the cell
auto& currentRow = self->tableData->rows.back();
currentRow.cells.push_back(TableCell());
currentRow.cells.back().isHeader = isHeader;
currentRow.cells.back().colspan = colspan;
if (hasCellWidthHint) {
currentRow.cells.back().widthHint = cellWidthHint;
currentRow.cells.back().hasWidthHint = true;
}
// Apply bold for header cells
if (isHeader) {
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
self->updateEffectiveInlineStyle();
}
self->depth += 1;
return;
}
// Transparent table container tags
if (matches(name, TABLE_TRANSPARENT_TAGS, NUM_TABLE_TRANSPARENT_TAGS)) {
self->depth += 1;
return;
}
// Skip colgroup, col, caption
if (matches(name, TABLE_SKIP_TAGS, NUM_TABLE_SKIP_TAGS)) {
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
// Other tags inside table cells (p, div, span, b, i, etc.) fall through
// to the normal handling below. startNewTextBlock is a no-op when inTable.
}
if (matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS)) { if (matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS)) {
// TODO: Start processing image tags // TODO: Start processing image tags
std::string alt = "[Image]"; std::string alt = "[Image]";
@@ -408,7 +619,8 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
// There should be enough here to build out 1-2 full pages and doing this will free up a lot of // There should be enough here to build out 1-2 full pages and doing this will free up a lot of
// memory. // memory.
// Spotted when reading Intermezzo, there are some really long text blocks in there. // Spotted when reading Intermezzo, there are some really long text blocks in there.
if (self->currentTextBlock->size() > 750) { // Skip this when inside a table - cell content is buffered for later layout.
if (!self->inTable && self->currentTextBlock->size() > 750) {
LOG_DBG("EHP", "Text block too long, splitting into multiple pages"); LOG_DBG("EHP", "Text block too long, splitting into multiple pages");
self->currentTextBlock->layoutAndExtractLines( self->currentTextBlock->layoutAndExtractLines(
self->renderer, self->fontId, self->viewportWidth, self->renderer, self->fontId, self->viewportWidth,
@@ -446,15 +658,17 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline; const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline;
const bool headerOrBlockTag = isHeaderOrBlock(name); const bool headerOrBlockTag = isHeaderOrBlock(name);
const bool isTableCellTag = strcmp(name, "td") == 0 || strcmp(name, "th") == 0;
const bool isTableTag = strcmp(name, "table") == 0;
// Flush buffer with current style BEFORE any style changes // Flush buffer with current style BEFORE any style changes
if (self->partWordBufferIndex > 0) { if (self->partWordBufferIndex > 0) {
// Flush if style will change OR if we're closing a block/structural element // Flush if style will change OR if we're closing a block/structural element
const bool isInlineTag = !headerOrBlockTag && strcmp(name, "table") != 0 && const bool isInlineTag = !headerOrBlockTag && !isTableTag && !isTableCellTag &&
!matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) && self->depth != 1; !matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) && self->depth != 1;
const bool shouldFlush = styleWillChange || headerOrBlockTag || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || const bool shouldFlush = styleWillChange || headerOrBlockTag || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) ||
matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) ||
matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || strcmp(name, "table") == 0 || matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || isTableTag || isTableCellTag ||
matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1; matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1;
if (shouldFlush) { if (shouldFlush) {
@@ -466,6 +680,57 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
} }
} }
// --- Table cell/row/table close handling ---
if (self->inTable) {
if (isTableCellTag) {
// Save the current cell content into the table data
if (self->tableData && !self->tableData->rows.empty()) {
auto& currentRow = self->tableData->rows.back();
if (!currentRow.cells.empty()) {
currentRow.cells.back().content = std::move(self->currentTextBlock);
}
}
// Create a safe empty ParsedText so character data between cells doesn't crash
auto safeBlockStyle = BlockStyle();
safeBlockStyle.alignment = CssTextAlign::Left;
self->currentTextBlock.reset(
new ParsedText(self->extraParagraphSpacing, self->hyphenationEnabled, safeBlockStyle));
self->nextWordContinues = false;
}
if (isTableTag) {
// Process the entire buffered table
self->depth -= 1;
// Clean up style state for this depth
if (self->skipUntilDepth == self->depth) self->skipUntilDepth = INT_MAX;
if (self->boldUntilDepth == self->depth) self->boldUntilDepth = INT_MAX;
if (self->italicUntilDepth == self->depth) self->italicUntilDepth = INT_MAX;
if (self->underlineUntilDepth == self->depth) self->underlineUntilDepth = INT_MAX;
if (!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth) {
self->inlineStyleStack.pop_back();
self->updateEffectiveInlineStyle();
}
self->processTable();
self->inTable = false;
self->tableData.reset();
// Restore a fresh text block for content after the table
auto paragraphAlignmentBlockStyle = BlockStyle();
paragraphAlignmentBlockStyle.textAlignDefined = true;
const auto align = (self->paragraphAlignment == static_cast<uint8_t>(CssTextAlign::None))
? CssTextAlign::Justify
: static_cast<CssTextAlign>(self->paragraphAlignment);
paragraphAlignmentBlockStyle.alignment = align;
self->currentTextBlock.reset(
new ParsedText(self->extraParagraphSpacing, self->hyphenationEnabled, paragraphAlignmentBlockStyle));
return; // depth already decremented, skip the normal endElement cleanup
}
}
self->depth -= 1; self->depth -= 1;
// Leaving skip // Leaving skip
@@ -653,3 +918,335 @@ void ChapterHtmlSlimParser::makePages() {
currentPageNextY += lineHeight / 2; currentPageNextY += lineHeight / 2;
} }
} }
// ---------------------------------------------------------------------------
// Table processing
// ---------------------------------------------------------------------------
// Cell padding in pixels (horizontal space between grid line and cell text)
static constexpr int TABLE_CELL_PAD_X = 4;
// Vertical cell padding — asymmetric because font metrics include internal leading (whitespace
// above glyphs), so the top already has built-in visual space. Less explicit padding on top,
// more on bottom, produces visually balanced results.
static constexpr int TABLE_CELL_PAD_TOP = 1;
static constexpr int TABLE_CELL_PAD_BOTTOM = 3;
// Minimum usable column width in pixels (below this text is unreadable)
static constexpr int TABLE_MIN_COL_WIDTH = 30;
// Grid line width in pixels
static constexpr int TABLE_GRID_LINE_PX = 1;
void ChapterHtmlSlimParser::addTableRowToPage(std::shared_ptr<PageTableRow> row) {
if (!currentPage) {
currentPage.reset(new Page());
currentPageNextY = 0;
}
const int16_t rowH = row->getHeight();
// If this row doesn't fit on the current page, start a new one
if (currentPageNextY + rowH > viewportHeight) {
completePageFn(std::move(currentPage));
currentPage.reset(new Page());
currentPageNextY = 0;
}
row->xPos = 0;
row->yPos = currentPageNextY;
currentPage->elements.push_back(std::move(row));
currentPageNextY += rowH;
}
void ChapterHtmlSlimParser::processTable() {
if (!tableData || tableData->rows.empty()) {
return;
}
if (!currentPage) {
currentPage.reset(new Page());
currentPageNextY = 0;
}
const int lh = static_cast<int>(renderer.getLineHeight(fontId) * lineCompression);
// 1. Determine logical column count using colspan.
// Each cell occupies cell.colspan logical columns. The total for a row is the sum of colspans.
size_t numCols = 0;
for (const auto& row : tableData->rows) {
size_t rowLogicalCols = 0;
for (const auto& cell : row.cells) {
rowLogicalCols += static_cast<size_t>(cell.colspan);
}
numCols = std::max(numCols, rowLogicalCols);
}
if (numCols == 0) {
return;
}
// 2. Measure natural width of each cell and compute per-column max natural width.
// Only non-spanning cells (colspan==1) contribute to individual column widths.
// Spanning cells use the combined width of their spanned columns.
std::vector<uint16_t> colNaturalWidth(numCols, 0);
for (const auto& row : tableData->rows) {
size_t logicalCol = 0;
for (const auto& cell : row.cells) {
if (cell.colspan == 1 && cell.content && !cell.content->isEmpty()) {
if (logicalCol < numCols) {
const uint16_t w = cell.content->getNaturalWidth(renderer, fontId);
if (w > colNaturalWidth[logicalCol]) {
colNaturalWidth[logicalCol] = w;
}
}
}
logicalCol += static_cast<size_t>(cell.colspan);
}
}
// 3. Calculate column widths to fit viewport.
// Available width = viewport - outer borders - internal column borders - cell padding
const int totalGridLines = static_cast<int>(numCols) + 1; // left + between columns + right
const int totalPadding = static_cast<int>(numCols) * TABLE_CELL_PAD_X * 2;
const int availableForContent = viewportWidth - totalGridLines * TABLE_GRID_LINE_PX - totalPadding;
// 3a. Resolve width hints per column.
// Priority: <col> hints > max cell hint (colspan=1 only).
// Percentages are relative to availableForContent.
const float emSize = static_cast<float>(lh);
const float containerW = static_cast<float>(std::max(availableForContent, 0));
std::vector<int> colHintedWidth(numCols, -1); // -1 = no hint
// From <col> tags
for (size_t c = 0; c < numCols && c < tableData->colWidthHints.size(); ++c) {
const auto& hint = tableData->colWidthHints[c];
if (hint.value > 0) {
int px = static_cast<int>(hint.toPixels(emSize, containerW));
if (px > 0) {
colHintedWidth[c] = std::max(px, TABLE_MIN_COL_WIDTH);
}
}
}
// From <td>/<th> cell width hints (only override if no <col> hint exists for this column)
for (const auto& row : tableData->rows) {
size_t logicalCol = 0;
for (const auto& cell : row.cells) {
if (cell.colspan == 1 && cell.hasWidthHint && logicalCol < numCols) {
if (colHintedWidth[logicalCol] < 0) { // no <col> hint yet
int px = static_cast<int>(cell.widthHint.toPixels(emSize, containerW));
if (px > colHintedWidth[logicalCol]) {
colHintedWidth[logicalCol] = std::max(px, TABLE_MIN_COL_WIDTH);
}
}
}
logicalCol += static_cast<size_t>(cell.colspan);
}
}
// 3b. Distribute column widths: hinted columns get their hint, unhinted use auto-sizing.
std::vector<uint16_t> colWidths(numCols, 0);
if (availableForContent <= 0) {
const uint16_t equalWidth = static_cast<uint16_t>(viewportWidth / numCols);
for (size_t c = 0; c < numCols; ++c) {
colWidths[c] = equalWidth;
}
} else {
// First, assign hinted columns and track how much space they consume
int hintedSpaceUsed = 0;
size_t unhintedCount = 0;
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] > 0) {
hintedSpaceUsed += colHintedWidth[c];
} else {
unhintedCount++;
}
}
// If hinted columns exceed available space, scale them down proportionally
if (hintedSpaceUsed > availableForContent && hintedSpaceUsed > 0) {
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] > 0) {
colHintedWidth[c] = colHintedWidth[c] * availableForContent / hintedSpaceUsed;
colHintedWidth[c] = std::max(colHintedWidth[c], TABLE_MIN_COL_WIDTH);
}
}
// Recalculate
hintedSpaceUsed = 0;
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] > 0) {
hintedSpaceUsed += colHintedWidth[c];
}
}
}
// Assign hinted columns
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] > 0) {
colWidths[c] = static_cast<uint16_t>(colHintedWidth[c]);
}
}
// Distribute remaining space among unhinted columns using the existing algorithm
const int remainingForUnhinted = std::max(availableForContent - hintedSpaceUsed, 0);
if (unhintedCount > 0 && remainingForUnhinted > 0) {
// Compute total natural width of unhinted columns
int totalNaturalUnhinted = 0;
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] <= 0) {
totalNaturalUnhinted += colNaturalWidth[c];
}
}
if (totalNaturalUnhinted <= remainingForUnhinted) {
// All unhinted content fits — distribute extra space equally among unhinted columns
const int extraSpace = remainingForUnhinted - totalNaturalUnhinted;
const int perColExtra = extraSpace / static_cast<int>(unhintedCount);
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] <= 0) {
colWidths[c] = static_cast<uint16_t>(colNaturalWidth[c] + perColExtra);
}
}
} else {
// Unhinted content exceeds remaining space — two-pass fair-share among unhinted columns
const int equalShare = remainingForUnhinted / static_cast<int>(unhintedCount);
int spaceUsedByFitting = 0;
int naturalOfWide = 0;
size_t wideCount = 0;
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] <= 0) {
if (static_cast<int>(colNaturalWidth[c]) <= equalShare) {
colWidths[c] = colNaturalWidth[c];
spaceUsedByFitting += colNaturalWidth[c];
} else {
naturalOfWide += colNaturalWidth[c];
wideCount++;
}
}
}
const int wideSpace = remainingForUnhinted - spaceUsedByFitting;
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] <= 0 && static_cast<int>(colNaturalWidth[c]) > equalShare) {
if (naturalOfWide > 0 && wideCount > 1) {
int proportional = static_cast<int>(colNaturalWidth[c]) * wideSpace / naturalOfWide;
colWidths[c] = static_cast<uint16_t>(std::max(proportional, TABLE_MIN_COL_WIDTH));
} else {
colWidths[c] = static_cast<uint16_t>(std::max(wideSpace, TABLE_MIN_COL_WIDTH));
}
}
}
}
} else if (unhintedCount > 0) {
// No remaining space for unhinted columns — give them minimum width
for (size_t c = 0; c < numCols; ++c) {
if (colHintedWidth[c] <= 0) {
colWidths[c] = static_cast<uint16_t>(TABLE_MIN_COL_WIDTH);
}
}
}
}
// Compute column x-offsets (cumulative: border + padding + content width + padding + border ...)
std::vector<uint16_t> colXOffsets(numCols, 0);
int xAccum = TABLE_GRID_LINE_PX; // start after left border
for (size_t c = 0; c < numCols; ++c) {
colXOffsets[c] = static_cast<uint16_t>(xAccum);
xAccum += TABLE_CELL_PAD_X + colWidths[c] + TABLE_CELL_PAD_X + TABLE_GRID_LINE_PX;
}
const int16_t totalTableWidth = static_cast<int16_t>(xAccum);
// Helper: compute the combined content width for a cell spanning multiple columns.
// This includes the content widths plus the internal grid lines and padding between spanned columns.
auto spanContentWidth = [&](size_t startCol, int colspan) -> uint16_t {
int width = 0;
for (int s = 0; s < colspan && startCol + s < numCols; ++s) {
width += colWidths[startCol + s];
if (s > 0) {
// Add internal padding and grid line between spanned columns
width += TABLE_CELL_PAD_X * 2 + TABLE_GRID_LINE_PX;
}
}
return static_cast<uint16_t>(std::max(width, 0));
};
// Helper: compute the full cell width (including padding on both sides) for a spanning cell.
auto spanFullCellWidth = [&](size_t startCol, int colspan) -> uint16_t {
if (colspan <= 0 || startCol >= numCols) return 0;
const size_t endCol = std::min(startCol + static_cast<size_t>(colspan), numCols) - 1;
// From the left edge of startCol's cell to the right edge of endCol's cell
const int leftEdge = colXOffsets[startCol];
const int rightEdge = colXOffsets[endCol] + TABLE_CELL_PAD_X + colWidths[endCol] + TABLE_CELL_PAD_X;
return static_cast<uint16_t>(rightEdge - leftEdge);
};
// 4. Lay out each row: map cells to logical columns, create PageTableRow
for (auto& row : tableData->rows) {
// Build cell data for this row, one entry per CELL (not per logical column).
// Each PageTableCellData gets the correct x-offset and combined column width.
std::vector<PageTableCellData> cellDataVec;
size_t maxLinesInRow = 1;
size_t logicalCol = 0;
for (size_t ci = 0; ci < row.cells.size() && logicalCol < numCols; ++ci) {
auto& cell = row.cells[ci];
const int cs = cell.colspan;
PageTableCellData cellData;
cellData.xOffset = colXOffsets[logicalCol];
cellData.columnWidth = spanFullCellWidth(logicalCol, cs);
if (cell.content && !cell.content->isEmpty()) {
// Center-align cells that span the full table width (common for section headers/titles)
if (cs >= static_cast<int>(numCols)) {
BlockStyle centeredStyle = cell.content->getBlockStyle();
centeredStyle.alignment = CssTextAlign::Center;
centeredStyle.textAlignDefined = true;
cell.content->setBlockStyle(centeredStyle);
}
const uint16_t contentWidth = spanContentWidth(logicalCol, cs);
std::vector<std::shared_ptr<TextBlock>> cellLines;
cell.content->layoutAndExtractLines(
renderer, fontId, contentWidth,
[&cellLines](const std::shared_ptr<TextBlock>& textBlock) { cellLines.push_back(textBlock); });
if (cellLines.size() > maxLinesInRow) {
maxLinesInRow = cellLines.size();
}
cellData.lines = std::move(cellLines);
}
cellDataVec.push_back(std::move(cellData));
logicalCol += static_cast<size_t>(cs);
}
// Fill remaining logical columns with empty cells (rows shorter than numCols)
while (logicalCol < numCols) {
PageTableCellData emptyCell;
emptyCell.xOffset = colXOffsets[logicalCol];
emptyCell.columnWidth = static_cast<uint16_t>(TABLE_CELL_PAD_X + colWidths[logicalCol] + TABLE_CELL_PAD_X);
cellDataVec.push_back(std::move(emptyCell));
logicalCol++;
}
// Row height = max lines * lineHeight + top/bottom border + asymmetric vertical padding
const int16_t rowHeight = static_cast<int16_t>(
static_cast<int>(maxLinesInRow) * lh + 2 + TABLE_CELL_PAD_TOP + TABLE_CELL_PAD_BOTTOM);
auto pageTableRow = std::make_shared<PageTableRow>(
std::move(cellDataVec), rowHeight, totalTableWidth, static_cast<int16_t>(lh), 0, 0);
addTableRowToPage(std::move(pageTableRow));
}
// Add a small gap after the table
if (extraParagraphSpacing) {
currentPageNextY += lh / 2;
}
}

View File

@@ -7,11 +7,13 @@
#include <memory> #include <memory>
#include "../ParsedText.h" #include "../ParsedText.h"
#include "../TableData.h"
#include "../blocks/TextBlock.h" #include "../blocks/TextBlock.h"
#include "../css/CssParser.h" #include "../css/CssParser.h"
#include "../css/CssStyle.h" #include "../css/CssStyle.h"
class Page; class Page;
class PageTableRow;
class GfxRenderer; class GfxRenderer;
#define MAX_WORD_SIZE 200 #define MAX_WORD_SIZE 200
@@ -57,10 +59,16 @@ class ChapterHtmlSlimParser {
bool effectiveItalic = false; bool effectiveItalic = false;
bool effectiveUnderline = false; bool effectiveUnderline = false;
// Table buffering state
bool inTable = false;
std::unique_ptr<TableData> tableData;
void updateEffectiveInlineStyle(); void updateEffectiveInlineStyle();
void startNewTextBlock(const BlockStyle& blockStyle); void startNewTextBlock(const BlockStyle& blockStyle);
void flushPartWordBuffer(); void flushPartWordBuffer();
void makePages(); void makePages();
void processTable();
void addTableRowToPage(std::shared_ptr<PageTableRow> row);
// XML callbacks // XML callbacks
static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts); static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts);
static void XMLCALL characterData(void* userData, const XML_Char* s, int len); static void XMLCALL characterData(void* userData, const XML_Char* s, int len);