feat: footnote anchor navigation (#1245)

## Summary: Enable footnote anchor navigation in EPUB reader

This PR extracts the core anchor-to-page mapping mechanism from PR #1143
(TOC fragment navigation) to provide immediate footnote navigation
support. By merging this focused subset first, users get a complete
footnote experience now while simplifying the eventual review and merge
of the full #1143 PR.

  ---

## What this extracts from PR #1143

PR #1143 implements comprehensive TOC fragment navigation for EPUBs with
multi-chapter spine files. This PR takes only the anchor resolution
infrastructure:

- Anchor-to-page mapping in section cache: During page layout,
ChapterHtmlSlimParser records which page each HTML id attribute lands
on, serializing the map into the .bin cache file.
- Anchor resolution in `EpubReaderActivity`: When navigating to a
footnote link with a fragment (e.g., `chapter2.xhtml#note1`), the reader
resolves the anchor to a page number and jumps directly to it.
- Section file format change: Bumped to version 15, adds anchor map
offset in header.

  ---

## Simplified scope vs. PR #1143

To minimize conflicts and complexity, this PR differs from #1143 in key
ways:

* **Anchors tracked**
  * **Origin:** Only TOC anchors (passed via `std::set`)
  * **This branch:** All `id` attributes

* **Page breaks**
  * **Origin**: Forces new page at TOC chapter boundaries
  * **This branch:** None — natural flow

* **TOC integration**
  * **Origin**: `tocBoundaries`, `getTocIndexForPage()`, chapter skip
  * **This branch:** None — just footnote links

* **Bug fix**
  * **This branch:** Fixed anchor page off-by-1/2 bug


The anchor recording bug (recording page number before `makePages()`
flushes previous block) was identified and fixed during this extraction.
The fix uses a deferred `pendingAnchorId` pattern that records the
anchor after page completion.

  ---

## Positioning for future merge

Changes are structured to minimize conflicts when #1143 eventually
merges:

- `ChapterHtmlSlimParser.cpp` `startElement()`: Both branches rewrite
the same if `(!idAttr.empty())` block. The merged version will combine
both approaches (TOC anchors get page breaks + immediate recording;
footnote anchors get deferred recording).
- `EpubReaderActivity.cpp` `render()`: The `pendingAnchor` resolution
block is positioned at the exact same insertion point where #1143 places
its `pendingTocIndex` block (line 596, right after `nextPageNumber`
assignment). During merge, both blocks will sit side-by-side.
   
  ---

## Why merge separately?

1. Immediate user value: Footnote navigation works now without waiting
for the full TOC overhaul
   2. Easier review: ~100 lines vs. 500+ lines in #1143 
3. Bug fix included: The page recording bug is fixed here and will carry
into #1143
4. Minimal conflicts: Structured for clean merge — both PRs touch the
same files but in complementary ways
---

### AI Usage

Did you use AI tools to help write this code? _**< YES >**_ Done by
Claude Opus 4.6
This commit is contained in:
Uri Tauber
2026-03-06 20:10:45 +02:00
committed by GitHub
parent 18b36efbae
commit 4d22256745
6 changed files with 106 additions and 10 deletions

View File

@@ -10,10 +10,10 @@
#include "parsers/ChapterHtmlSlimParser.h"
namespace {
constexpr uint8_t SECTION_FILE_VERSION = 17;
constexpr uint8_t SECTION_FILE_VERSION = 18;
constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) +
sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(bool) +
sizeof(uint8_t) + sizeof(uint32_t);
sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t);
} // namespace
uint32_t Section::onPageComplete(std::unique_ptr<Page> page) {
@@ -44,7 +44,7 @@ void Section::writeSectionFileHeader(const int fontId, const float lineCompressi
static_assert(HEADER_SIZE == sizeof(SECTION_FILE_VERSION) + sizeof(fontId) + sizeof(lineCompression) +
sizeof(extraParagraphSpacing) + sizeof(paragraphAlignment) + sizeof(viewportWidth) +
sizeof(viewportHeight) + sizeof(pageCount) + sizeof(hyphenationEnabled) +
sizeof(embeddedStyle) + sizeof(imageRendering) + sizeof(uint32_t),
sizeof(embeddedStyle) + sizeof(imageRendering) + sizeof(uint32_t) + sizeof(uint32_t),
"Header size mismatch");
serialization::writePod(file, SECTION_FILE_VERSION);
serialization::writePod(file, fontId);
@@ -56,8 +56,9 @@ void Section::writeSectionFileHeader(const int fontId, const float lineCompressi
serialization::writePod(file, hyphenationEnabled);
serialization::writePod(file, embeddedStyle);
serialization::writePod(file, imageRendering);
serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0 when written)
serialization::writePod(file, static_cast<uint32_t>(0)); // Placeholder for LUT offset
serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0, patched later)
serialization::writePod(file, static_cast<uint32_t>(0)); // Placeholder for LUT offset (patched later)
serialization::writePod(file, static_cast<uint32_t>(0)); // Placeholder for anchor map offset (patched later)
}
bool Section::loadSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
@@ -239,10 +240,20 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
return false;
}
// Go back and write LUT offset
file.seek(HEADER_SIZE - sizeof(uint32_t) - sizeof(pageCount));
// Write anchor-to-page map for fragment navigation (e.g. footnote targets)
const uint32_t anchorMapOffset = file.position();
const auto& anchors = visitor.getAnchors();
serialization::writePod(file, static_cast<uint16_t>(anchors.size()));
for (const auto& [anchor, page] : anchors) {
serialization::writeString(file, anchor);
serialization::writePod(file, page);
}
// Patch header with final pageCount, lutOffset, and anchorMapOffset
file.seek(HEADER_SIZE - sizeof(uint32_t) * 2 - sizeof(pageCount));
serialization::writePod(file, pageCount);
serialization::writePod(file, lutOffset);
serialization::writePod(file, anchorMapOffset);
file.close();
if (cssParser) {
cssParser->clear();
@@ -255,7 +266,7 @@ std::unique_ptr<Page> Section::loadPageFromSectionFile() {
return nullptr;
}
file.seek(HEADER_SIZE - sizeof(uint32_t));
file.seek(HEADER_SIZE - sizeof(uint32_t) * 2);
uint32_t lutOffset;
serialization::readPod(file, lutOffset);
file.seek(lutOffset + sizeof(uint32_t) * currentPage);
@@ -267,3 +278,36 @@ std::unique_ptr<Page> Section::loadPageFromSectionFile() {
file.close();
return page;
}
std::optional<uint16_t> Section::getPageForAnchor(const std::string& anchor) const {
FsFile f;
if (!Storage.openFileForRead("SCT", filePath, f)) {
return std::nullopt;
}
const uint32_t fileSize = f.size();
f.seek(HEADER_SIZE - sizeof(uint32_t));
uint32_t anchorMapOffset;
serialization::readPod(f, anchorMapOffset);
if (anchorMapOffset == 0 || anchorMapOffset >= fileSize) {
f.close();
return std::nullopt;
}
f.seek(anchorMapOffset);
uint16_t count;
serialization::readPod(f, count);
for (uint16_t i = 0; i < count; i++) {
std::string key;
uint16_t page;
serialization::readString(f, key);
serialization::readPod(f, page);
if (key == anchor) {
f.close();
return page;
}
}
f.close();
return std::nullopt;
}

View File

@@ -1,6 +1,8 @@
#pragma once
#include <functional>
#include <memory>
#include <optional>
#include <string>
#include "Epub.h"
@@ -37,4 +39,7 @@ class Section {
uint16_t viewportWidth, uint16_t viewportHeight, bool hyphenationEnabled, bool embeddedStyle,
uint8_t imageRendering, const std::function<void()>& popupFn = nullptr);
std::unique_ptr<Page> loadPageFromSectionFile();
// Look up the page number for an anchor id from the section cache file.
std::optional<uint16_t> getPageForAnchor(const std::string& anchor) const;
};

View File

@@ -133,11 +133,21 @@ void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) {
// This handles cases like <div style="margin-bottom:2em"><h1>text</h1></div> where the
// div's margin should be preserved, even though it has no direct text content.
currentTextBlock->setBlockStyle(currentTextBlock->getBlockStyle().getCombinedBlockStyle(blockStyle));
if (!pendingAnchorId.empty()) {
anchorData.push_back({std::move(pendingAnchorId), static_cast<uint16_t>(completedPageCount)});
pendingAnchorId.clear();
}
return;
}
makePages();
}
// Record deferred anchor after previous block is flushed
if (!pendingAnchorId.empty()) {
anchorData.push_back({std::move(pendingAnchorId), static_cast<uint16_t>(completedPageCount)});
pendingAnchorId.clear();
}
currentTextBlock.reset(new ParsedText(extraParagraphSpacing, hyphenationEnabled, blockStyle));
wordsExtractedInBlock = 0;
}
@@ -151,7 +161,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
return;
}
// Extract class and style attributes for CSS processing
// Extract class, style, and id attributes
std::string classAttr;
std::string styleAttr;
if (atts != nullptr) {
@@ -160,6 +170,9 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
classAttr = atts[i + 1];
} else if (strcmp(atts[i], "style") == 0) {
styleAttr = atts[i + 1];
} else if (strcmp(atts[i], "id") == 0) {
// Defer recording until startNewTextBlock, after previous block is flushed to pages
self->pendingAnchorId = atts[i + 1];
}
}
}
@@ -374,6 +387,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
if (self->currentPage && !self->currentPage->elements.empty() &&
(self->currentPageNextY + displayHeight > self->viewportHeight)) {
self->completePageFn(std::move(self->currentPage));
self->completedPageCount++;
self->currentPage.reset(new Page());
if (!self->currentPage) {
LOG_ERR("EHP", "Failed to create new page");
@@ -990,7 +1004,12 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() {
// Process last page if there is still text
if (currentTextBlock) {
makePages();
if (!pendingAnchorId.empty()) {
anchorData.push_back({std::move(pendingAnchorId), static_cast<uint16_t>(completedPageCount)});
pendingAnchorId.clear();
}
completePageFn(std::move(currentPage));
completedPageCount++;
currentPage.reset();
currentTextBlock.reset();
}
@@ -1003,6 +1022,7 @@ void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr<TextBlock> line) {
if (currentPageNextY + lineHeight > viewportHeight) {
completePageFn(std::move(currentPage));
completedPageCount++;
currentPage.reset(new Page());
currentPageNextY = 0;
}

View File

@@ -5,6 +5,7 @@
#include <climits>
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include "../FootnoteEntry.h"
@@ -69,6 +70,11 @@ class ChapterHtmlSlimParser {
int tableRowIndex = 0;
int tableColIndex = 0;
// Anchor-to-page mapping: tracks which page each HTML id attribute lands on
int completedPageCount = 0;
std::vector<std::pair<std::string, uint16_t>> anchorData;
std::string pendingAnchorId; // deferred until after previous text block is flushed
// Footnote link tracking
bool insideFootnoteLink = false;
int footnoteLinkDepth = -1;
@@ -119,4 +125,5 @@ class ChapterHtmlSlimParser {
~ChapterHtmlSlimParser() = default;
bool parseAndBuildPages();
void addLineToPage(std::shared_ptr<TextBlock> line);
const std::vector<std::pair<std::string, uint16_t>>& getAnchors() const { return anchorData; }
};

View File

@@ -595,6 +595,16 @@ void EpubReaderActivity::render(RenderLock&& lock) {
section->currentPage = nextPageNumber;
}
if (!pendingAnchor.empty()) {
if (const auto page = section->getPageForAnchor(pendingAnchor)) {
section->currentPage = *page;
LOG_DBG("ERS", "Resolved anchor '%s' to page %d", pendingAnchor.c_str(), *page);
} else {
LOG_DBG("ERS", "Anchor '%s' not found in section %d", pendingAnchor.c_str(), currentSpineIndex);
}
pendingAnchor.clear();
}
// handles changes in reader settings and reset to approximate position based on cached progress
if (cachedChapterTotalPageCount > 0) {
// only goes to relative position if spine index matches cached value
@@ -790,12 +800,18 @@ void EpubReaderActivity::navigateToHref(const std::string& hrefStr, const bool s
LOG_DBG("ERS", "Saved position [%d]: spine %d, page %d", footnoteDepth, currentSpineIndex, section->currentPage);
}
// Extract fragment anchor (e.g. "#note1" or "chapter2.xhtml#note1")
std::string anchor;
const auto hashPos = hrefStr.find('#');
if (hashPos != std::string::npos && hashPos + 1 < hrefStr.size()) {
anchor = hrefStr.substr(hashPos + 1);
}
// Check for same-file anchor reference (#anchor only)
bool sameFile = !hrefStr.empty() && hrefStr[0] == '#';
int targetSpineIndex;
if (sameFile) {
// Same file — navigate to page 0 of current spine item
targetSpineIndex = currentSpineIndex;
} else {
targetSpineIndex = epub->resolveHrefToSpineIndex(hrefStr);
@@ -809,6 +825,7 @@ void EpubReaderActivity::navigateToHref(const std::string& hrefStr, const bool s
{
RenderLock lock(*this);
pendingAnchor = std::move(anchor);
currentSpineIndex = targetSpineIndex;
nextPageNumber = 0;
section.reset();

View File

@@ -11,6 +11,9 @@ class EpubReaderActivity final : public Activity {
std::unique_ptr<Section> section = nullptr;
int currentSpineIndex = 0;
int nextPageNumber = 0;
// Set when navigating to a footnote href with a fragment (e.g. #note1).
// Cleared on the next render after the new section loads and resolves it to a page.
std::string pendingAnchor;
int pagesUntilFullRefresh = 0;
int cachedSpineIndex = 0;
int cachedChapterTotalPageCount = 0;