feat: Basic table support (#980)
Some checks failed
CI (build) / clang-format (push) Has been cancelled
CI (build) / cppcheck (push) Has been cancelled
CI (build) / build (push) Has been cancelled
CI (build) / Test Status (push) Has been cancelled

I've been reading "Children of Time" over the last days and that book,
annyoingly, has some tabular content.
This content is relevant for the story so I needed some really basic way
to at least be able to read those tables.


This commit simply renders the contents of table cells as separate
paragraphs with a small header describing its position in the table. For
me, it's better than nothing.

## Summary

* **What is the goal of this PR?**

Implements really basic table support

* **What changes are included?**

  * Minimal changes to ChapterHtmlSlimParser
  * A demo book in test/epubs

## Additional Context

Here's some screenshots of the demo-book I provide with this PR.


![PXL_20260218_211446510](https://github.com/user-attachments/assets/49ef81b8-2fa0-4f0d-bb6f-4ef885be6772)


![PXL_20260218_211456379](https://github.com/user-attachments/assets/e7c82b35-b4a9-4a7d-9ec5-2b4bc2ff3514)

---

### AI Usage

While CrossPoint doesn't have restrictions on AI tools in contributing,
please be transparent about their usage as it
helps set the right context for reviewers.

Did you use AI tools to help write this code? _**PARTIALLY**_ 
_Little bit of guidance on what to touch, parts of the impl, rest
manually._
This commit is contained in:
Maik Allgöwer
2026-02-19 14:13:23 +01:00
committed by GitHub
parent 6527f43cb1
commit 103fac2ee1
3 changed files with 95 additions and 11 deletions

View File

@@ -53,6 +53,10 @@ bool isHeaderOrBlock(const char* name) {
return matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS);
}
bool isTableStructuralTag(const char* name) {
return strcmp(name, "table") == 0 || strcmp(name, "tr") == 0 || strcmp(name, "td") == 0 || strcmp(name, "th") == 0;
}
// Update effective bold/italic/underline based on block style and inline style stack
void ChapterHtmlSlimParser::updateEffectiveInlineStyle() {
// Start with block-level styles
@@ -145,18 +149,66 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
centeredBlockStyle.textAlignDefined = true;
centeredBlockStyle.alignment = CssTextAlign::Center;
// Special handling for tables - show placeholder text instead of dropping silently
// Special handling for tables/cells: flatten into per-cell paragraphs with a prefixed header.
if (strcmp(name, "table") == 0) {
// Add placeholder text
self->startNewTextBlock(centeredBlockStyle);
// skip nested tables
if (self->tableDepth > 0) {
self->tableDepth += 1;
return;
}
self->italicUntilDepth = min(self->italicUntilDepth, self->depth);
// Advance depth before processing character data (like you would for an element with text)
if (self->partWordBufferIndex > 0) {
self->flushPartWordBuffer();
}
self->tableDepth += 1;
self->tableRowIndex = 0;
self->tableColIndex = 0;
self->depth += 1;
self->characterData(userData, "[Table omitted]", strlen("[Table omitted]"));
return;
}
// Skip table contents (skip until parent as we pre-advanced depth above)
self->skipUntilDepth = self->depth - 1;
if (self->tableDepth == 1 && strcmp(name, "tr") == 0) {
self->tableRowIndex += 1;
self->tableColIndex = 0;
self->depth += 1;
return;
}
if (self->tableDepth == 1 && (strcmp(name, "td") == 0 || strcmp(name, "th") == 0)) {
if (self->partWordBufferIndex > 0) {
self->flushPartWordBuffer();
}
self->tableColIndex += 1;
auto tableCellBlockStyle = BlockStyle();
tableCellBlockStyle.textAlignDefined = true;
const auto align = (self->paragraphAlignment == static_cast<uint8_t>(CssTextAlign::None))
? CssTextAlign::Justify
: static_cast<CssTextAlign>(self->paragraphAlignment);
tableCellBlockStyle.alignment = align;
self->startNewTextBlock(tableCellBlockStyle);
const std::string headerText =
"Tab Row " + std::to_string(self->tableRowIndex) + ", Cell " + std::to_string(self->tableColIndex) + ":";
StyleStackEntry headerStyle;
headerStyle.depth = self->depth;
headerStyle.hasBold = true;
headerStyle.bold = false;
headerStyle.hasItalic = true;
headerStyle.italic = true;
headerStyle.hasUnderline = true;
headerStyle.underline = false;
self->inlineStyleStack.push_back(headerStyle);
self->updateEffectiveInlineStyle();
self->characterData(userData, headerText.c_str(), static_cast<int>(headerText.length()));
if (self->partWordBufferIndex > 0) {
self->flushPartWordBuffer();
}
self->nextWordContinues = false;
self->inlineStyleStack.pop_back();
self->updateEffectiveInlineStyle();
self->depth += 1;
return;
}
@@ -445,6 +497,11 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char* s, const int len) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
// Skip content of nested table
if (self->tableDepth > 1) {
return;
}
// Middle of skip
if (self->skipUntilDepth < self->depth) {
return;
@@ -548,15 +605,24 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline;
const bool headerOrBlockTag = isHeaderOrBlock(name);
const bool tableStructuralTag = isTableStructuralTag(name);
if (self->tableDepth > 1 && strcmp(name, "table") == 0) {
// get rid of all text inside the nested table
self->partWordBufferIndex = 0;
self->tableDepth -= 1;
LOG_DBG("EHP", "nested table detected, get rid of its content");
return;
}
// Flush buffer with current style BEFORE any style changes
if (self->partWordBufferIndex > 0) {
// Flush if style will change OR if we're closing a block/structural element
const bool isInlineTag = !headerOrBlockTag && strcmp(name, "table") != 0 &&
!matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) && self->depth != 1;
const bool isInlineTag =
!headerOrBlockTag && !tableStructuralTag && !matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) && self->depth != 1;
const bool shouldFlush = styleWillChange || headerOrBlockTag || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) ||
matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) ||
matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || strcmp(name, "table") == 0 ||
matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || tableStructuralTag ||
matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1;
if (shouldFlush) {
@@ -575,6 +641,21 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
self->skipUntilDepth = INT_MAX;
}
if (self->tableDepth == 1 && (strcmp(name, "td") == 0 || strcmp(name, "th") == 0)) {
self->nextWordContinues = false;
}
if (self->tableDepth == 1 && (strcmp(name, "tr") == 0)) {
self->nextWordContinues = false;
}
if (self->tableDepth == 1 && strcmp(name, "table") == 0) {
self->tableDepth -= 1;
self->tableRowIndex = 0;
self->tableColIndex = 0;
self->nextWordContinues = false;
}
// Leaving bold tag
if (self->boldUntilDepth == self->depth) {
self->boldUntilDepth = INT_MAX;

View File

@@ -62,6 +62,9 @@ class ChapterHtmlSlimParser {
bool effectiveBold = false;
bool effectiveItalic = false;
bool effectiveUnderline = false;
int tableDepth = 0;
int tableRowIndex = 0;
int tableColIndex = 0;
void updateEffectiveInlineStyle();
void startNewTextBlock(const BlockStyle& blockStyle);

BIN
test/epubs/test_tables.epub Normal file

Binary file not shown.