feat: slim footnotes support (#1031)

## Summary
**What is the goal of this PR?** Implement support for footnotes in epub
files.
It is based on #553, but simplified — removed the parts which
complicated the code and burden the CPU/RAM. This version supports basic
footnotes and lets the user jump from location to location inside the
epub.

**What changes are included?**
- `FootnoteEntry` struct — A small POD struct (number[24], href[64])
shared between parser, page storage, and UI.
- Parser: `<a href>` detection (`ChapterHtmlSlimParser`) — During a
single parsing pass, internal epub links are detected and collected as
footnotes. The link text is underlined to hint navigability.
Bracket/whitespace normalization is applied to the display label (e.g.
[1] → 1).
- Footnote-to-page assignment (`ChapterHtmlSlimParser`, `Page`) —
Footnotes are attached to the exact page where their anchor word
appears, tracked via a cumulative word counter during layout, surviving
paragraph splits and the 750-word mid-paragraph safety flush.
- Page serialization (`Page`, `Section`) — Footnotes are
serialized/deserialized per page (max 16 per page). Section cache
version bumped to 14 to force a clean rebuild.
- Href → spine resolution (`Epub`) — `resolveHrefToSpineIndex()` maps an
href (e.g. `chapter2.xhtml#note1`) to its spine index by filename
matching.
- Footnotes menu + activity (`EpubReaderMenuActivity`,
`EpubReaderFootnotesActivity`) — A new "Footnotes" entry in the reader
menu lists all footnote links found on the current page. The user
scrolls and selects to navigate.
- Navigate & restore (`EpubReaderActivity`) — `navigateToHref()` saves
the current spine index and page number, then jumps to the target. The
Back button restores the saved position when the user is done reading
the footnote.

  **Additional Context**

**What was removed vs #553:** virtual spine items
(`addVirtualSpineItem`, `isVirtualSpineItem`), two-pass parsing,
`<aside>` content extraction to temp HTML files, `<p class="note">`
paragraph note extraction, `replaceHtmlEntities` (master already has
`lookupHtmlEntity`), `footnotePages` / `buildFilteredChapterList`,
`noterefCallback` / `Noteref` struct, and the stack size increase from 8
KB to 24 KB (not needed without two-pass parsing and virtual file I/O on
the render task).
 
**Performance:** Single-pass parsing. No new heap allocations in the hot
path — footnote text is collected into fixed stack buffers (char[24],
char[64]). Active runtime memory is ~2.8 KB worst-case (one page × 16
footnotes × 88 bytes, mirrored in `currentPageFootnotes`). Flash usage
is unchanged at 97.4%; RAM stays at 31%.
   
**Known limitations:** When clicking a footnote, it jumps to the start
of the HTML file instead of the specific anchor. This could be
problematic for books that don't have separate files for each footnote.
(no element-id-to-page mapping yet - will be another PR soon).

---

### AI Usage

Did you use AI tools to help write this code? _**< PARTIALLY>**_
Claude Opus 4.6 was used to do most of the migration, I checked manually
its work, and fixed some stuff, but I haven't review all the changes
yet, so feedback is welcomed.

---------

Co-authored-by: Arthur Tazhitdinov <lisnake@gmail.com>
This commit is contained in:
Uri Tauber
2026-02-26 16:47:34 +02:00
committed by GitHub
parent 451774ddf8
commit 30d8a8d011
15 changed files with 481 additions and 22 deletions

View File

@@ -858,3 +858,30 @@ float Epub::calculateProgress(const int currentSpineIndex, const float currentSp
const float totalProgress = static_cast<float>(prevChapterSize) + sectionProgSize;
return totalProgress / static_cast<float>(bookSize);
}
int Epub::resolveHrefToSpineIndex(const std::string& href) const {
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) return -1;
// Extract filename (remove #anchor)
std::string target = href;
size_t hashPos = target.find('#');
if (hashPos != std::string::npos) target = target.substr(0, hashPos);
// Same-file reference (anchor-only)
if (target.empty()) return -1;
// Extract just the filename for comparison
size_t targetSlash = target.find_last_of('/');
std::string targetFilename = (targetSlash != std::string::npos) ? target.substr(targetSlash + 1) : target;
for (int i = 0; i < getSpineItemsCount(); i++) {
const auto& spineHref = getSpineItem(i).href;
// Try exact match first
if (spineHref == target) return i;
// Then filename-only match
size_t spineSlash = spineHref.find_last_of('/');
std::string spineFilename = (spineSlash != std::string::npos) ? spineHref.substr(spineSlash + 1) : spineHref;
if (spineFilename == targetFilename) return i;
}
return -1;
}

View File

@@ -72,4 +72,5 @@ class Epub {
size_t getBookSize() const;
float calculateProgress(int currentSpineIndex, float currentSpineRead) const;
CssParser* getCssParser() const { return cssParser.get(); }
int resolveHrefToSpineIndex(const std::string& href) const;
};

View File

@@ -0,0 +1,13 @@
#pragma once
#include <cstring>
struct FootnoteEntry {
char number[24];
char href[64];
FootnoteEntry() {
number[0] = '\0';
href[0] = '\0';
}
};

View File

@@ -67,6 +67,18 @@ bool Page::serialize(FsFile& file) const {
}
}
// Serialize footnotes (clamp to MAX_FOOTNOTES_PER_PAGE to match addFootnote/deserialize limits)
const uint16_t fnCount = std::min<uint16_t>(footnotes.size(), MAX_FOOTNOTES_PER_PAGE);
serialization::writePod(file, fnCount);
for (uint16_t i = 0; i < fnCount; i++) {
const auto& fn = footnotes[i];
if (file.write(fn.number, sizeof(fn.number)) != sizeof(fn.number) ||
file.write(fn.href, sizeof(fn.href)) != sizeof(fn.href)) {
LOG_ERR("PGE", "Failed to write footnote");
return false;
}
}
return true;
}
@@ -92,5 +104,24 @@ std::unique_ptr<Page> Page::deserialize(FsFile& file) {
}
}
// Deserialize footnotes
uint16_t fnCount;
serialization::readPod(file, fnCount);
if (fnCount > MAX_FOOTNOTES_PER_PAGE) {
LOG_ERR("PGE", "Invalid footnote count %u", fnCount);
return nullptr;
}
page->footnotes.resize(fnCount);
for (uint16_t i = 0; i < fnCount; i++) {
auto& entry = page->footnotes[i];
if (file.read(entry.number, sizeof(entry.number)) != sizeof(entry.number) ||
file.read(entry.href, sizeof(entry.href)) != sizeof(entry.href)) {
LOG_ERR("PGE", "Failed to read footnote %u", i);
return nullptr;
}
entry.number[sizeof(entry.number) - 1] = '\0';
entry.href[sizeof(entry.href) - 1] = '\0';
}
return page;
}

View File

@@ -5,6 +5,7 @@
#include <utility>
#include <vector>
#include "FootnoteEntry.h"
#include "blocks/ImageBlock.h"
#include "blocks/TextBlock.h"
@@ -57,6 +58,19 @@ class Page {
public:
// the list of block index and line numbers on this page
std::vector<std::shared_ptr<PageElement>> elements;
std::vector<FootnoteEntry> footnotes;
static constexpr uint16_t MAX_FOOTNOTES_PER_PAGE = 16;
void addFootnote(const char* number, const char* href) {
if (footnotes.size() >= MAX_FOOTNOTES_PER_PAGE) return; // Cap per-page footnotes
FootnoteEntry entry;
strncpy(entry.number, number, sizeof(entry.number) - 1);
entry.number[sizeof(entry.number) - 1] = '\0';
strncpy(entry.href, href, sizeof(entry.href) - 1);
entry.href[sizeof(entry.href) - 1] = '\0';
footnotes.push_back(entry);
}
void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) const;
bool serialize(FsFile& file) const;
static std::unique_ptr<Page> deserialize(FsFile& file);

View File

@@ -29,6 +29,7 @@ class TextBlock final : public Block {
const BlockStyle& getBlockStyle() const { return blockStyle; }
const std::vector<std::string>& getWords() const { return words; }
bool isEmpty() override { return words.empty(); }
size_t wordCount() const { return words.size(); }
// given a renderer works out where to break the words into lines
void render(const GfxRenderer& renderer, int fontId, int x, int y) const;
BlockType getType() override { return TEXT_BLOCK; }

View File

@@ -49,6 +49,24 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib
return false;
}
const char* getAttribute(const XML_Char** atts, const char* attrName) {
if (!atts) return nullptr;
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], attrName) == 0) return atts[i + 1];
}
return nullptr;
}
bool isInternalEpubLink(const char* href) {
if (!href || href[0] == '\0') return false;
if (strncmp(href, "http://", 7) == 0 || strncmp(href, "https://", 8) == 0) return false;
if (strncmp(href, "mailto:", 7) == 0) return false;
if (strncmp(href, "ftp://", 6) == 0) return false;
if (strncmp(href, "tel:", 4) == 0) return false;
if (strncmp(href, "javascript:", 11) == 0) return false;
return true;
}
bool isHeaderOrBlock(const char* name) {
return matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS);
}
@@ -121,6 +139,7 @@ void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) {
makePages();
}
currentTextBlock.reset(new ParsedText(extraParagraphSpacing, hyphenationEnabled, blockStyle));
wordsExtractedInBlock = 0;
}
void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
@@ -430,6 +449,50 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
}
}
// Detect internal <a href="..."> links (footnotes, cross-references)
// Note: <aside epub:type="footnote"> elements are rendered as normal content
// without special handling. Links pointing to them are collected as footnotes.
if (strcmp(name, "a") == 0) {
const char* href = getAttribute(atts, "href");
bool isInternalLink = isInternalEpubLink(href);
// Special case: javascript:void(0) links with data attributes
// Example: <a href="javascript:void(0)"
// data-xyz="{&quot;name&quot;:&quot;OPS/ch2.xhtml&quot;,&quot;frag&quot;:&quot;id46&quot;}">
if (href && strncmp(href, "javascript:", 11) == 0) {
isInternalLink = false;
// TODO: Parse data-* attributes to extract actual href
}
if (isInternalLink) {
// Flush buffer before style change
if (self->partWordBufferIndex > 0) {
self->flushPartWordBuffer();
self->nextWordContinues = true;
}
self->insideFootnoteLink = true;
self->footnoteLinkDepth = self->depth;
strncpy(self->currentFootnoteLinkHref, href, sizeof(self->currentFootnoteLinkHref) - 1);
self->currentFootnoteLinkHref[sizeof(self->currentFootnoteLinkHref) - 1] = '\0';
self->currentFootnoteLinkText[0] = '\0';
self->currentFootnoteLinkTextLen = 0;
// Apply underline style to visually indicate the link
self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth);
StyleStackEntry entry;
entry.depth = self->depth;
entry.hasUnderline = true;
entry.underline = true;
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
// Skip CSS resolution — we already handled styling for this <a> tag
self->depth += 1;
return;
}
}
// Compute CSS style for this element
CssStyle cssStyle;
if (self->cssParser) {
@@ -582,6 +645,19 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
return;
}
// Collect footnote link display text (for the number label)
// Skip whitespace and brackets to normalize noterefs like "[1]" → "1"
if (self->insideFootnoteLink) {
for (int i = 0; i < len; i++) {
unsigned char c = static_cast<unsigned char>(s[i]);
if (isWhitespace(c) || c == '[' || c == ']') continue;
if (self->currentFootnoteLinkTextLen < static_cast<int>(sizeof(self->currentFootnoteLinkText)) - 1) {
self->currentFootnoteLinkText[self->currentFootnoteLinkTextLen++] = c;
self->currentFootnoteLinkText[self->currentFootnoteLinkTextLen] = '\0';
}
}
}
for (int i = 0; i < len; i++) {
if (isWhitespace(s[i])) {
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
@@ -743,6 +819,21 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
self->depth -= 1;
// Closing a footnote link — create entry from collected text and href
if (self->insideFootnoteLink && self->depth == self->footnoteLinkDepth) {
if (self->currentFootnoteLinkText[0] != '\0' && self->currentFootnoteLinkHref[0] != '\0') {
FootnoteEntry entry;
strncpy(entry.number, self->currentFootnoteLinkText, sizeof(entry.number) - 1);
entry.number[sizeof(entry.number) - 1] = '\0';
strncpy(entry.href, self->currentFootnoteLinkHref, sizeof(entry.href) - 1);
entry.href[sizeof(entry.href) - 1] = '\0';
int wordIndex =
self->wordsExtractedInBlock + (self->currentTextBlock ? static_cast<int>(self->currentTextBlock->size()) : 0);
self->pendingFootnotes.push_back({wordIndex, entry});
}
self->insideFootnoteLink = false;
}
// Leaving skip
if (self->skipUntilDepth == self->depth) {
self->skipUntilDepth = INT_MAX;
@@ -910,6 +1001,15 @@ void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr<TextBlock> line) {
currentPageNextY = 0;
}
// Track cumulative words to assign footnotes to the page containing their anchor
wordsExtractedInBlock += line->wordCount();
auto footnoteIt = pendingFootnotes.begin();
while (footnoteIt != pendingFootnotes.end() && footnoteIt->first <= wordsExtractedInBlock) {
currentPage->addFootnote(footnoteIt->second.number, footnoteIt->second.href);
++footnoteIt;
}
pendingFootnotes.erase(pendingFootnotes.begin(), footnoteIt);
// Apply horizontal left inset (margin + padding) as x position offset
const int16_t xOffset = line->getBlockStyle().leftInset();
currentPage->elements.push_back(std::make_shared<PageLine>(line, xOffset, currentPageNextY));
@@ -947,6 +1047,16 @@ void ChapterHtmlSlimParser::makePages() {
renderer, fontId, effectiveWidth,
[this](const std::shared_ptr<TextBlock>& textBlock) { addLineToPage(textBlock); });
// Fallback: transfer any remaining pending footnotes to current page.
// Normally addLineToPage handles this via word-index tracking, but this catches
// edge cases where a footnote's word index equals the exact block size.
if (!pendingFootnotes.empty() && currentPage) {
for (const auto& [idx, fn] : pendingFootnotes) {
currentPage->addFootnote(fn.number, fn.href);
}
pendingFootnotes.clear();
}
// Apply bottom spacing after the paragraph (stored in pixels)
if (blockStyle.marginBottom > 0) {
currentPageNextY += blockStyle.marginBottom;

View File

@@ -5,7 +5,9 @@
#include <climits>
#include <functional>
#include <memory>
#include <vector>
#include "../FootnoteEntry.h"
#include "../ParsedText.h"
#include "../blocks/ImageBlock.h"
#include "../blocks/TextBlock.h"
@@ -66,6 +68,15 @@ class ChapterHtmlSlimParser {
int tableRowIndex = 0;
int tableColIndex = 0;
// Footnote link tracking
bool insideFootnoteLink = false;
int footnoteLinkDepth = -1;
char currentFootnoteLinkText[24] = {};
int currentFootnoteLinkTextLen = 0;
char currentFootnoteLinkHref[64] = {};
std::vector<std::pair<int, FootnoteEntry>> pendingFootnotes; // <wordIndex, entry>
int wordsExtractedInBlock = 0;
void updateEffectiveInlineStyle();
void startNewTextBlock(const BlockStyle& blockStyle);
void flushPartWordBuffer();