Skip pagebreak blocks when parsing epub file

These blocks break the flow and often contain the page number in them which
should not interrupt the flow of the content
This commit is contained in:
Dave Allie 2025-12-19 01:05:39 +11:00
parent 70dc0f018e
commit b9e46641cb
No known key found for this signature in database
GPG Key ID: F2FDDB3AD8D0276F

View File

@ -75,6 +75,18 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
return; return;
} }
// Skip blocks with role="doc-pagebreak" and epub:type="pagebreak"
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "role") == 0 && strcmp(atts[i + 1], "doc-pagebreak") == 0 ||
strcmp(atts[i], "epub:type") == 0 && strcmp(atts[i + 1], "pagebreak") == 0) {
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
}
}
if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
self->startNewTextBlock(TextBlock::CENTER_ALIGN); self->startNewTextBlock(TextBlock::CENTER_ALIGN);
self->boldUntilDepth = min(self->boldUntilDepth, self->depth); self->boldUntilDepth = min(self->boldUntilDepth, self->depth);