From 4d222567457d6d0ca84a5e3e49b44cef777cb17d Mon Sep 17 00:00:00 2001 From: Uri Tauber <142022451+Uri-Tauber@users.noreply.github.com> Date: Fri, 6 Mar 2026 20:10:45 +0200 Subject: [PATCH] feat: footnote anchor navigation (#1245) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary: Enable footnote anchor navigation in EPUB reader This PR extracts the core anchor-to-page mapping mechanism from PR #1143 (TOC fragment navigation) to provide immediate footnote navigation support. By merging this focused subset first, users get a complete footnote experience now while simplifying the eventual review and merge of the full #1143 PR. --- ## What this extracts from PR #1143 PR #1143 implements comprehensive TOC fragment navigation for EPUBs with multi-chapter spine files. This PR takes only the anchor resolution infrastructure: - Anchor-to-page mapping in section cache: During page layout, ChapterHtmlSlimParser records which page each HTML id attribute lands on, serializing the map into the .bin cache file. - Anchor resolution in `EpubReaderActivity`: When navigating to a footnote link with a fragment (e.g., `chapter2.xhtml#note1`), the reader resolves the anchor to a page number and jumps directly to it. - Section file format change: Bumped to version 15, adds anchor map offset in header. --- ## Simplified scope vs. PR #1143 To minimize conflicts and complexity, this PR differs from #1143 in key ways: * **Anchors tracked** * **Origin:** Only TOC anchors (passed via `std::set`) * **This branch:** All `id` attributes * **Page breaks** * **Origin**: Forces new page at TOC chapter boundaries * **This branch:** None — natural flow * **TOC integration** * **Origin**: `tocBoundaries`, `getTocIndexForPage()`, chapter skip * **This branch:** None — just footnote links * **Bug fix** * **This branch:** Fixed anchor page off-by-1/2 bug The anchor recording bug (recording page number before `makePages()` flushes previous block) was identified and fixed during this extraction. The fix uses a deferred `pendingAnchorId` pattern that records the anchor after page completion. --- ## Positioning for future merge Changes are structured to minimize conflicts when #1143 eventually merges: - `ChapterHtmlSlimParser.cpp` `startElement()`: Both branches rewrite the same if `(!idAttr.empty())` block. The merged version will combine both approaches (TOC anchors get page breaks + immediate recording; footnote anchors get deferred recording). - `EpubReaderActivity.cpp` `render()`: The `pendingAnchor` resolution block is positioned at the exact same insertion point where #1143 places its `pendingTocIndex` block (line 596, right after `nextPageNumber` assignment). During merge, both blocks will sit side-by-side. --- ## Why merge separately? 1. Immediate user value: Footnote navigation works now without waiting for the full TOC overhaul 2. Easier review: ~100 lines vs. 500+ lines in #1143 3. Bug fix included: The page recording bug is fixed here and will carry into #1143 4. Minimal conflicts: Structured for clean merge — both PRs touch the same files but in complementary ways --- ### AI Usage Did you use AI tools to help write this code? _**< YES >**_ Done by Claude Opus 4.6 --- lib/Epub/Epub/Section.cpp | 60 ++++++++++++++++--- lib/Epub/Epub/Section.h | 5 ++ .../Epub/parsers/ChapterHtmlSlimParser.cpp | 22 ++++++- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h | 7 +++ src/activities/reader/EpubReaderActivity.cpp | 19 +++++- src/activities/reader/EpubReaderActivity.h | 3 + 6 files changed, 106 insertions(+), 10 deletions(-) diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index bc04d475..9365df20 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -10,10 +10,10 @@ #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 17; +constexpr uint8_t SECTION_FILE_VERSION = 18; constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(bool) + - sizeof(uint8_t) + sizeof(uint32_t); + sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t); } // namespace uint32_t Section::onPageComplete(std::unique_ptr page) { @@ -44,7 +44,7 @@ void Section::writeSectionFileHeader(const int fontId, const float lineCompressi static_assert(HEADER_SIZE == sizeof(SECTION_FILE_VERSION) + sizeof(fontId) + sizeof(lineCompression) + sizeof(extraParagraphSpacing) + sizeof(paragraphAlignment) + sizeof(viewportWidth) + sizeof(viewportHeight) + sizeof(pageCount) + sizeof(hyphenationEnabled) + - sizeof(embeddedStyle) + sizeof(imageRendering) + sizeof(uint32_t), + sizeof(embeddedStyle) + sizeof(imageRendering) + sizeof(uint32_t) + sizeof(uint32_t), "Header size mismatch"); serialization::writePod(file, SECTION_FILE_VERSION); serialization::writePod(file, fontId); @@ -56,8 +56,9 @@ void Section::writeSectionFileHeader(const int fontId, const float lineCompressi serialization::writePod(file, hyphenationEnabled); serialization::writePod(file, embeddedStyle); serialization::writePod(file, imageRendering); - serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0 when written) - serialization::writePod(file, static_cast(0)); // Placeholder for LUT offset + serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0, patched later) + serialization::writePod(file, static_cast(0)); // Placeholder for LUT offset (patched later) + serialization::writePod(file, static_cast(0)); // Placeholder for anchor map offset (patched later) } bool Section::loadSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing, @@ -239,10 +240,20 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c return false; } - // Go back and write LUT offset - file.seek(HEADER_SIZE - sizeof(uint32_t) - sizeof(pageCount)); + // Write anchor-to-page map for fragment navigation (e.g. footnote targets) + const uint32_t anchorMapOffset = file.position(); + const auto& anchors = visitor.getAnchors(); + serialization::writePod(file, static_cast(anchors.size())); + for (const auto& [anchor, page] : anchors) { + serialization::writeString(file, anchor); + serialization::writePod(file, page); + } + + // Patch header with final pageCount, lutOffset, and anchorMapOffset + file.seek(HEADER_SIZE - sizeof(uint32_t) * 2 - sizeof(pageCount)); serialization::writePod(file, pageCount); serialization::writePod(file, lutOffset); + serialization::writePod(file, anchorMapOffset); file.close(); if (cssParser) { cssParser->clear(); @@ -255,7 +266,7 @@ std::unique_ptr Section::loadPageFromSectionFile() { return nullptr; } - file.seek(HEADER_SIZE - sizeof(uint32_t)); + file.seek(HEADER_SIZE - sizeof(uint32_t) * 2); uint32_t lutOffset; serialization::readPod(file, lutOffset); file.seek(lutOffset + sizeof(uint32_t) * currentPage); @@ -267,3 +278,36 @@ std::unique_ptr Section::loadPageFromSectionFile() { file.close(); return page; } + +std::optional Section::getPageForAnchor(const std::string& anchor) const { + FsFile f; + if (!Storage.openFileForRead("SCT", filePath, f)) { + return std::nullopt; + } + + const uint32_t fileSize = f.size(); + f.seek(HEADER_SIZE - sizeof(uint32_t)); + uint32_t anchorMapOffset; + serialization::readPod(f, anchorMapOffset); + if (anchorMapOffset == 0 || anchorMapOffset >= fileSize) { + f.close(); + return std::nullopt; + } + + f.seek(anchorMapOffset); + uint16_t count; + serialization::readPod(f, count); + for (uint16_t i = 0; i < count; i++) { + std::string key; + uint16_t page; + serialization::readString(f, key); + serialization::readPod(f, page); + if (key == anchor) { + f.close(); + return page; + } + } + + f.close(); + return std::nullopt; +} diff --git a/lib/Epub/Epub/Section.h b/lib/Epub/Epub/Section.h index 70fd0fe5..6f002c44 100644 --- a/lib/Epub/Epub/Section.h +++ b/lib/Epub/Epub/Section.h @@ -1,6 +1,8 @@ #pragma once #include #include +#include +#include #include "Epub.h" @@ -37,4 +39,7 @@ class Section { uint16_t viewportWidth, uint16_t viewportHeight, bool hyphenationEnabled, bool embeddedStyle, uint8_t imageRendering, const std::function& popupFn = nullptr); std::unique_ptr loadPageFromSectionFile(); + + // Look up the page number for an anchor id from the section cache file. + std::optional getPageForAnchor(const std::string& anchor) const; }; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 6a0636bb..1df3ceab 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -133,11 +133,21 @@ void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) { // This handles cases like

text

where the // div's margin should be preserved, even though it has no direct text content. currentTextBlock->setBlockStyle(currentTextBlock->getBlockStyle().getCombinedBlockStyle(blockStyle)); + + if (!pendingAnchorId.empty()) { + anchorData.push_back({std::move(pendingAnchorId), static_cast(completedPageCount)}); + pendingAnchorId.clear(); + } return; } makePages(); } + // Record deferred anchor after previous block is flushed + if (!pendingAnchorId.empty()) { + anchorData.push_back({std::move(pendingAnchorId), static_cast(completedPageCount)}); + pendingAnchorId.clear(); + } currentTextBlock.reset(new ParsedText(extraParagraphSpacing, hyphenationEnabled, blockStyle)); wordsExtractedInBlock = 0; } @@ -151,7 +161,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* return; } - // Extract class and style attributes for CSS processing + // Extract class, style, and id attributes std::string classAttr; std::string styleAttr; if (atts != nullptr) { @@ -160,6 +170,9 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* classAttr = atts[i + 1]; } else if (strcmp(atts[i], "style") == 0) { styleAttr = atts[i + 1]; + } else if (strcmp(atts[i], "id") == 0) { + // Defer recording until startNewTextBlock, after previous block is flushed to pages + self->pendingAnchorId = atts[i + 1]; } } } @@ -374,6 +387,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* if (self->currentPage && !self->currentPage->elements.empty() && (self->currentPageNextY + displayHeight > self->viewportHeight)) { self->completePageFn(std::move(self->currentPage)); + self->completedPageCount++; self->currentPage.reset(new Page()); if (!self->currentPage) { LOG_ERR("EHP", "Failed to create new page"); @@ -990,7 +1004,12 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() { // Process last page if there is still text if (currentTextBlock) { makePages(); + if (!pendingAnchorId.empty()) { + anchorData.push_back({std::move(pendingAnchorId), static_cast(completedPageCount)}); + pendingAnchorId.clear(); + } completePageFn(std::move(currentPage)); + completedPageCount++; currentPage.reset(); currentTextBlock.reset(); } @@ -1003,6 +1022,7 @@ void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr line) { if (currentPageNextY + lineHeight > viewportHeight) { completePageFn(std::move(currentPage)); + completedPageCount++; currentPage.reset(new Page()); currentPageNextY = 0; } diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index ce530797..1cc0ea39 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "../FootnoteEntry.h" @@ -69,6 +70,11 @@ class ChapterHtmlSlimParser { int tableRowIndex = 0; int tableColIndex = 0; + // Anchor-to-page mapping: tracks which page each HTML id attribute lands on + int completedPageCount = 0; + std::vector> anchorData; + std::string pendingAnchorId; // deferred until after previous text block is flushed + // Footnote link tracking bool insideFootnoteLink = false; int footnoteLinkDepth = -1; @@ -119,4 +125,5 @@ class ChapterHtmlSlimParser { ~ChapterHtmlSlimParser() = default; bool parseAndBuildPages(); void addLineToPage(std::shared_ptr line); + const std::vector>& getAnchors() const { return anchorData; } }; diff --git a/src/activities/reader/EpubReaderActivity.cpp b/src/activities/reader/EpubReaderActivity.cpp index 059d3ea1..d2af6751 100644 --- a/src/activities/reader/EpubReaderActivity.cpp +++ b/src/activities/reader/EpubReaderActivity.cpp @@ -595,6 +595,16 @@ void EpubReaderActivity::render(RenderLock&& lock) { section->currentPage = nextPageNumber; } + if (!pendingAnchor.empty()) { + if (const auto page = section->getPageForAnchor(pendingAnchor)) { + section->currentPage = *page; + LOG_DBG("ERS", "Resolved anchor '%s' to page %d", pendingAnchor.c_str(), *page); + } else { + LOG_DBG("ERS", "Anchor '%s' not found in section %d", pendingAnchor.c_str(), currentSpineIndex); + } + pendingAnchor.clear(); + } + // handles changes in reader settings and reset to approximate position based on cached progress if (cachedChapterTotalPageCount > 0) { // only goes to relative position if spine index matches cached value @@ -790,12 +800,18 @@ void EpubReaderActivity::navigateToHref(const std::string& hrefStr, const bool s LOG_DBG("ERS", "Saved position [%d]: spine %d, page %d", footnoteDepth, currentSpineIndex, section->currentPage); } + // Extract fragment anchor (e.g. "#note1" or "chapter2.xhtml#note1") + std::string anchor; + const auto hashPos = hrefStr.find('#'); + if (hashPos != std::string::npos && hashPos + 1 < hrefStr.size()) { + anchor = hrefStr.substr(hashPos + 1); + } + // Check for same-file anchor reference (#anchor only) bool sameFile = !hrefStr.empty() && hrefStr[0] == '#'; int targetSpineIndex; if (sameFile) { - // Same file — navigate to page 0 of current spine item targetSpineIndex = currentSpineIndex; } else { targetSpineIndex = epub->resolveHrefToSpineIndex(hrefStr); @@ -809,6 +825,7 @@ void EpubReaderActivity::navigateToHref(const std::string& hrefStr, const bool s { RenderLock lock(*this); + pendingAnchor = std::move(anchor); currentSpineIndex = targetSpineIndex; nextPageNumber = 0; section.reset(); diff --git a/src/activities/reader/EpubReaderActivity.h b/src/activities/reader/EpubReaderActivity.h index 91c6f049..316677ba 100644 --- a/src/activities/reader/EpubReaderActivity.h +++ b/src/activities/reader/EpubReaderActivity.h @@ -11,6 +11,9 @@ class EpubReaderActivity final : public Activity { std::unique_ptr
section = nullptr; int currentSpineIndex = 0; int nextPageNumber = 0; + // Set when navigating to a footnote href with a fragment (e.g. #note1). + // Cleared on the next render after the new section loads and resolves it to a page. + std::string pendingAnchor; int pagesUntilFullRefresh = 0; int cachedSpineIndex = 0; int cachedChapterTotalPageCount = 0;