Files
crosspoint-reader-mod/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h
cottongin f2a2b03074 feat: add TOC boundary API and anchor page breaks to Section
Extend Section with TOC boundary tracking: buildTocBoundaries(),
getTocIndexForPage(), getPageForTocIndex(), getPageRangeForTocIndex(),
readAnchorMap(), and readCachedPageCount() for lightweight cache queries.

ChapterHtmlSlimParser now accepts a tocAnchors set and forces page breaks
at TOC anchor boundaries so each chapter starts on a fresh page.

Increment SECTION_FILE_VERSION to 19 for new TOC boundary data.

Ported from upstream PRs #1143 and #1172, adapted to mod architecture.

Made-with: Cursor
2026-03-08 04:49:43 -04:00

138 lines
5.0 KiB
C++

#pragma once
#include <expat.h>
#include <climits>
#include <functional>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "../FootnoteEntry.h"
#include "../ParsedText.h"
#include "../blocks/ImageBlock.h"
#include "../blocks/TextBlock.h"
#include "../css/CssParser.h"
#include "../css/CssStyle.h"
class Page;
class GfxRenderer;
class Epub;
#define MAX_WORD_SIZE 200
class ChapterHtmlSlimParser {
std::shared_ptr<Epub> epub;
const std::string& filepath;
GfxRenderer& renderer;
std::function<void(std::unique_ptr<Page>)> completePageFn;
std::function<void()> popupFn; // Popup callback
int depth = 0;
int skipUntilDepth = INT_MAX;
int boldUntilDepth = INT_MAX;
int italicUntilDepth = INT_MAX;
int underlineUntilDepth = INT_MAX;
// buffer for building up words from characters, will auto break if longer than this
// leave one char at end for null pointer
char partWordBuffer[MAX_WORD_SIZE + 1] = {};
int partWordBufferIndex = 0;
bool nextWordContinues = false; // true when next flushed word attaches to previous (inline element boundary)
std::unique_ptr<ParsedText> currentTextBlock = nullptr;
std::unique_ptr<Page> currentPage = nullptr;
int16_t currentPageNextY = 0;
int fontId;
float lineCompression;
bool extraParagraphSpacing;
uint8_t paragraphAlignment;
uint16_t viewportWidth;
uint16_t viewportHeight;
bool hyphenationEnabled;
const CssParser* cssParser;
bool embeddedStyle;
uint8_t imageRendering;
std::string contentBase;
std::string imageBasePath;
int imageCounter = 0;
// Style tracking (replaces depth-based approach)
struct StyleStackEntry {
int depth = 0;
bool hasBold = false, bold = false;
bool hasItalic = false, italic = false;
bool hasUnderline = false, underline = false;
};
std::vector<StyleStackEntry> inlineStyleStack;
CssStyle currentCssStyle;
bool effectiveBold = false;
bool effectiveItalic = false;
bool effectiveUnderline = false;
int tableDepth = 0;
int tableRowIndex = 0;
int tableColIndex = 0;
// Anchor-to-page mapping: tracks which page each HTML id attribute lands on
int completedPageCount = 0;
std::vector<std::pair<std::string, uint16_t>> anchorData;
std::string pendingAnchorId; // deferred until after previous text block is flushed
// TOC anchors: when a TOC anchor is encountered, force a page break so chapters start on a fresh page
std::set<std::string> tocAnchors;
std::map<std::string, uint16_t> tocAnchorPageMap;
// Footnote link tracking
bool insideFootnoteLink = false;
int footnoteLinkDepth = -1;
char currentFootnoteLinkText[24] = {};
int currentFootnoteLinkTextLen = 0;
char currentFootnoteLinkHref[64] = {};
std::vector<std::pair<int, FootnoteEntry>> pendingFootnotes; // <wordIndex, entry>
int wordsExtractedInBlock = 0;
void updateEffectiveInlineStyle();
void startNewTextBlock(const BlockStyle& blockStyle);
void flushPartWordBuffer();
void makePages();
// XML callbacks
static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts);
static void XMLCALL characterData(void* userData, const XML_Char* s, int len);
static void XMLCALL defaultHandlerExpand(void* userData, const XML_Char* s, int len);
static void XMLCALL endElement(void* userData, const XML_Char* name);
public:
explicit ChapterHtmlSlimParser(std::shared_ptr<Epub> epub, const std::string& filepath, GfxRenderer& renderer,
const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const uint8_t paragraphAlignment, const uint16_t viewportWidth,
const uint16_t viewportHeight, const bool hyphenationEnabled,
const std::function<void(std::unique_ptr<Page>)>& completePageFn,
const bool embeddedStyle, const std::string& contentBase,
const std::string& imageBasePath, const uint8_t imageRendering = 0,
std::set<std::string> tocAnchors = {},
const std::function<void()>& popupFn = nullptr, const CssParser* cssParser = nullptr)
: epub(epub),
filepath(filepath),
renderer(renderer),
fontId(fontId),
lineCompression(lineCompression),
extraParagraphSpacing(extraParagraphSpacing),
paragraphAlignment(paragraphAlignment),
viewportWidth(viewportWidth),
viewportHeight(viewportHeight),
hyphenationEnabled(hyphenationEnabled),
completePageFn(completePageFn),
popupFn(popupFn),
cssParser(cssParser),
embeddedStyle(embeddedStyle),
imageRendering(imageRendering),
contentBase(contentBase),
imageBasePath(imageBasePath),
tocAnchors(std::move(tocAnchors)) {}
~ChapterHtmlSlimParser() = default;
bool parseAndBuildPages();
void addLineToPage(std::shared_ptr<TextBlock> line);
const std::vector<std::pair<std::string, uint16_t>>& getAnchors() const { return anchorData; }
};