feat: slim footnotes support (#1031)
## Summary **What is the goal of this PR?** Implement support for footnotes in epub files. It is based on #553, but simplified — removed the parts which complicated the code and burden the CPU/RAM. This version supports basic footnotes and lets the user jump from location to location inside the epub. **What changes are included?** - `FootnoteEntry` struct — A small POD struct (number[24], href[64]) shared between parser, page storage, and UI. - Parser: `<a href>` detection (`ChapterHtmlSlimParser`) — During a single parsing pass, internal epub links are detected and collected as footnotes. The link text is underlined to hint navigability. Bracket/whitespace normalization is applied to the display label (e.g. [1] → 1). - Footnote-to-page assignment (`ChapterHtmlSlimParser`, `Page`) — Footnotes are attached to the exact page where their anchor word appears, tracked via a cumulative word counter during layout, surviving paragraph splits and the 750-word mid-paragraph safety flush. - Page serialization (`Page`, `Section`) — Footnotes are serialized/deserialized per page (max 16 per page). Section cache version bumped to 14 to force a clean rebuild. - Href → spine resolution (`Epub`) — `resolveHrefToSpineIndex()` maps an href (e.g. `chapter2.xhtml#note1`) to its spine index by filename matching. - Footnotes menu + activity (`EpubReaderMenuActivity`, `EpubReaderFootnotesActivity`) — A new "Footnotes" entry in the reader menu lists all footnote links found on the current page. The user scrolls and selects to navigate. - Navigate & restore (`EpubReaderActivity`) — `navigateToHref()` saves the current spine index and page number, then jumps to the target. The Back button restores the saved position when the user is done reading the footnote. **Additional Context** **What was removed vs #553:** virtual spine items (`addVirtualSpineItem`, `isVirtualSpineItem`), two-pass parsing, `<aside>` content extraction to temp HTML files, `<p class="note">` paragraph note extraction, `replaceHtmlEntities` (master already has `lookupHtmlEntity`), `footnotePages` / `buildFilteredChapterList`, `noterefCallback` / `Noteref` struct, and the stack size increase from 8 KB to 24 KB (not needed without two-pass parsing and virtual file I/O on the render task). **Performance:** Single-pass parsing. No new heap allocations in the hot path — footnote text is collected into fixed stack buffers (char[24], char[64]). Active runtime memory is ~2.8 KB worst-case (one page × 16 footnotes × 88 bytes, mirrored in `currentPageFootnotes`). Flash usage is unchanged at 97.4%; RAM stays at 31%. **Known limitations:** When clicking a footnote, it jumps to the start of the HTML file instead of the specific anchor. This could be problematic for books that don't have separate files for each footnote. (no element-id-to-page mapping yet - will be another PR soon). --- ### AI Usage Did you use AI tools to help write this code? _**< PARTIALLY>**_ Claude Opus 4.6 was used to do most of the migration, I checked manually its work, and fixed some stuff, but I haven't review all the changes yet, so feedback is welcomed. --------- Co-authored-by: Arthur Tazhitdinov <lisnake@gmail.com>
This commit is contained in:
@@ -858,3 +858,30 @@ float Epub::calculateProgress(const int currentSpineIndex, const float currentSp
|
||||
const float totalProgress = static_cast<float>(prevChapterSize) + sectionProgSize;
|
||||
return totalProgress / static_cast<float>(bookSize);
|
||||
}
|
||||
|
||||
int Epub::resolveHrefToSpineIndex(const std::string& href) const {
|
||||
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) return -1;
|
||||
|
||||
// Extract filename (remove #anchor)
|
||||
std::string target = href;
|
||||
size_t hashPos = target.find('#');
|
||||
if (hashPos != std::string::npos) target = target.substr(0, hashPos);
|
||||
|
||||
// Same-file reference (anchor-only)
|
||||
if (target.empty()) return -1;
|
||||
|
||||
// Extract just the filename for comparison
|
||||
size_t targetSlash = target.find_last_of('/');
|
||||
std::string targetFilename = (targetSlash != std::string::npos) ? target.substr(targetSlash + 1) : target;
|
||||
|
||||
for (int i = 0; i < getSpineItemsCount(); i++) {
|
||||
const auto& spineHref = getSpineItem(i).href;
|
||||
// Try exact match first
|
||||
if (spineHref == target) return i;
|
||||
// Then filename-only match
|
||||
size_t spineSlash = spineHref.find_last_of('/');
|
||||
std::string spineFilename = (spineSlash != std::string::npos) ? spineHref.substr(spineSlash + 1) : spineHref;
|
||||
if (spineFilename == targetFilename) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -72,4 +72,5 @@ class Epub {
|
||||
size_t getBookSize() const;
|
||||
float calculateProgress(int currentSpineIndex, float currentSpineRead) const;
|
||||
CssParser* getCssParser() const { return cssParser.get(); }
|
||||
int resolveHrefToSpineIndex(const std::string& href) const;
|
||||
};
|
||||
|
||||
13
lib/Epub/Epub/FootnoteEntry.h
Normal file
13
lib/Epub/Epub/FootnoteEntry.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
|
||||
struct FootnoteEntry {
|
||||
char number[24];
|
||||
char href[64];
|
||||
|
||||
FootnoteEntry() {
|
||||
number[0] = '\0';
|
||||
href[0] = '\0';
|
||||
}
|
||||
};
|
||||
@@ -67,6 +67,18 @@ bool Page::serialize(FsFile& file) const {
|
||||
}
|
||||
}
|
||||
|
||||
// Serialize footnotes (clamp to MAX_FOOTNOTES_PER_PAGE to match addFootnote/deserialize limits)
|
||||
const uint16_t fnCount = std::min<uint16_t>(footnotes.size(), MAX_FOOTNOTES_PER_PAGE);
|
||||
serialization::writePod(file, fnCount);
|
||||
for (uint16_t i = 0; i < fnCount; i++) {
|
||||
const auto& fn = footnotes[i];
|
||||
if (file.write(fn.number, sizeof(fn.number)) != sizeof(fn.number) ||
|
||||
file.write(fn.href, sizeof(fn.href)) != sizeof(fn.href)) {
|
||||
LOG_ERR("PGE", "Failed to write footnote");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -92,5 +104,24 @@ std::unique_ptr<Page> Page::deserialize(FsFile& file) {
|
||||
}
|
||||
}
|
||||
|
||||
// Deserialize footnotes
|
||||
uint16_t fnCount;
|
||||
serialization::readPod(file, fnCount);
|
||||
if (fnCount > MAX_FOOTNOTES_PER_PAGE) {
|
||||
LOG_ERR("PGE", "Invalid footnote count %u", fnCount);
|
||||
return nullptr;
|
||||
}
|
||||
page->footnotes.resize(fnCount);
|
||||
for (uint16_t i = 0; i < fnCount; i++) {
|
||||
auto& entry = page->footnotes[i];
|
||||
if (file.read(entry.number, sizeof(entry.number)) != sizeof(entry.number) ||
|
||||
file.read(entry.href, sizeof(entry.href)) != sizeof(entry.href)) {
|
||||
LOG_ERR("PGE", "Failed to read footnote %u", i);
|
||||
return nullptr;
|
||||
}
|
||||
entry.number[sizeof(entry.number) - 1] = '\0';
|
||||
entry.href[sizeof(entry.href) - 1] = '\0';
|
||||
}
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "FootnoteEntry.h"
|
||||
#include "blocks/ImageBlock.h"
|
||||
#include "blocks/TextBlock.h"
|
||||
|
||||
@@ -57,6 +58,19 @@ class Page {
|
||||
public:
|
||||
// the list of block index and line numbers on this page
|
||||
std::vector<std::shared_ptr<PageElement>> elements;
|
||||
std::vector<FootnoteEntry> footnotes;
|
||||
static constexpr uint16_t MAX_FOOTNOTES_PER_PAGE = 16;
|
||||
|
||||
void addFootnote(const char* number, const char* href) {
|
||||
if (footnotes.size() >= MAX_FOOTNOTES_PER_PAGE) return; // Cap per-page footnotes
|
||||
FootnoteEntry entry;
|
||||
strncpy(entry.number, number, sizeof(entry.number) - 1);
|
||||
entry.number[sizeof(entry.number) - 1] = '\0';
|
||||
strncpy(entry.href, href, sizeof(entry.href) - 1);
|
||||
entry.href[sizeof(entry.href) - 1] = '\0';
|
||||
footnotes.push_back(entry);
|
||||
}
|
||||
|
||||
void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) const;
|
||||
bool serialize(FsFile& file) const;
|
||||
static std::unique_ptr<Page> deserialize(FsFile& file);
|
||||
|
||||
@@ -29,6 +29,7 @@ class TextBlock final : public Block {
|
||||
const BlockStyle& getBlockStyle() const { return blockStyle; }
|
||||
const std::vector<std::string>& getWords() const { return words; }
|
||||
bool isEmpty() override { return words.empty(); }
|
||||
size_t wordCount() const { return words.size(); }
|
||||
// given a renderer works out where to break the words into lines
|
||||
void render(const GfxRenderer& renderer, int fontId, int x, int y) const;
|
||||
BlockType getType() override { return TEXT_BLOCK; }
|
||||
|
||||
@@ -49,6 +49,24 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib
|
||||
return false;
|
||||
}
|
||||
|
||||
const char* getAttribute(const XML_Char** atts, const char* attrName) {
|
||||
if (!atts) return nullptr;
|
||||
for (int i = 0; atts[i]; i += 2) {
|
||||
if (strcmp(atts[i], attrName) == 0) return atts[i + 1];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool isInternalEpubLink(const char* href) {
|
||||
if (!href || href[0] == '\0') return false;
|
||||
if (strncmp(href, "http://", 7) == 0 || strncmp(href, "https://", 8) == 0) return false;
|
||||
if (strncmp(href, "mailto:", 7) == 0) return false;
|
||||
if (strncmp(href, "ftp://", 6) == 0) return false;
|
||||
if (strncmp(href, "tel:", 4) == 0) return false;
|
||||
if (strncmp(href, "javascript:", 11) == 0) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isHeaderOrBlock(const char* name) {
|
||||
return matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS);
|
||||
}
|
||||
@@ -121,6 +139,7 @@ void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) {
|
||||
makePages();
|
||||
}
|
||||
currentTextBlock.reset(new ParsedText(extraParagraphSpacing, hyphenationEnabled, blockStyle));
|
||||
wordsExtractedInBlock = 0;
|
||||
}
|
||||
|
||||
void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
|
||||
@@ -430,6 +449,50 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
|
||||
}
|
||||
}
|
||||
|
||||
// Detect internal <a href="..."> links (footnotes, cross-references)
|
||||
// Note: <aside epub:type="footnote"> elements are rendered as normal content
|
||||
// without special handling. Links pointing to them are collected as footnotes.
|
||||
if (strcmp(name, "a") == 0) {
|
||||
const char* href = getAttribute(atts, "href");
|
||||
|
||||
bool isInternalLink = isInternalEpubLink(href);
|
||||
|
||||
// Special case: javascript:void(0) links with data attributes
|
||||
// Example: <a href="javascript:void(0)"
|
||||
// data-xyz="{"name":"OPS/ch2.xhtml","frag":"id46"}">
|
||||
if (href && strncmp(href, "javascript:", 11) == 0) {
|
||||
isInternalLink = false;
|
||||
// TODO: Parse data-* attributes to extract actual href
|
||||
}
|
||||
|
||||
if (isInternalLink) {
|
||||
// Flush buffer before style change
|
||||
if (self->partWordBufferIndex > 0) {
|
||||
self->flushPartWordBuffer();
|
||||
self->nextWordContinues = true;
|
||||
}
|
||||
self->insideFootnoteLink = true;
|
||||
self->footnoteLinkDepth = self->depth;
|
||||
strncpy(self->currentFootnoteLinkHref, href, sizeof(self->currentFootnoteLinkHref) - 1);
|
||||
self->currentFootnoteLinkHref[sizeof(self->currentFootnoteLinkHref) - 1] = '\0';
|
||||
self->currentFootnoteLinkText[0] = '\0';
|
||||
self->currentFootnoteLinkTextLen = 0;
|
||||
|
||||
// Apply underline style to visually indicate the link
|
||||
self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth);
|
||||
StyleStackEntry entry;
|
||||
entry.depth = self->depth;
|
||||
entry.hasUnderline = true;
|
||||
entry.underline = true;
|
||||
self->inlineStyleStack.push_back(entry);
|
||||
self->updateEffectiveInlineStyle();
|
||||
|
||||
// Skip CSS resolution — we already handled styling for this <a> tag
|
||||
self->depth += 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute CSS style for this element
|
||||
CssStyle cssStyle;
|
||||
if (self->cssParser) {
|
||||
@@ -582,6 +645,19 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
||||
return;
|
||||
}
|
||||
|
||||
// Collect footnote link display text (for the number label)
|
||||
// Skip whitespace and brackets to normalize noterefs like "[1]" → "1"
|
||||
if (self->insideFootnoteLink) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
unsigned char c = static_cast<unsigned char>(s[i]);
|
||||
if (isWhitespace(c) || c == '[' || c == ']') continue;
|
||||
if (self->currentFootnoteLinkTextLen < static_cast<int>(sizeof(self->currentFootnoteLinkText)) - 1) {
|
||||
self->currentFootnoteLinkText[self->currentFootnoteLinkTextLen++] = c;
|
||||
self->currentFootnoteLinkText[self->currentFootnoteLinkTextLen] = '\0';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (isWhitespace(s[i])) {
|
||||
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
|
||||
@@ -743,6 +819,21 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
|
||||
|
||||
self->depth -= 1;
|
||||
|
||||
// Closing a footnote link — create entry from collected text and href
|
||||
if (self->insideFootnoteLink && self->depth == self->footnoteLinkDepth) {
|
||||
if (self->currentFootnoteLinkText[0] != '\0' && self->currentFootnoteLinkHref[0] != '\0') {
|
||||
FootnoteEntry entry;
|
||||
strncpy(entry.number, self->currentFootnoteLinkText, sizeof(entry.number) - 1);
|
||||
entry.number[sizeof(entry.number) - 1] = '\0';
|
||||
strncpy(entry.href, self->currentFootnoteLinkHref, sizeof(entry.href) - 1);
|
||||
entry.href[sizeof(entry.href) - 1] = '\0';
|
||||
int wordIndex =
|
||||
self->wordsExtractedInBlock + (self->currentTextBlock ? static_cast<int>(self->currentTextBlock->size()) : 0);
|
||||
self->pendingFootnotes.push_back({wordIndex, entry});
|
||||
}
|
||||
self->insideFootnoteLink = false;
|
||||
}
|
||||
|
||||
// Leaving skip
|
||||
if (self->skipUntilDepth == self->depth) {
|
||||
self->skipUntilDepth = INT_MAX;
|
||||
@@ -910,6 +1001,15 @@ void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr<TextBlock> line) {
|
||||
currentPageNextY = 0;
|
||||
}
|
||||
|
||||
// Track cumulative words to assign footnotes to the page containing their anchor
|
||||
wordsExtractedInBlock += line->wordCount();
|
||||
auto footnoteIt = pendingFootnotes.begin();
|
||||
while (footnoteIt != pendingFootnotes.end() && footnoteIt->first <= wordsExtractedInBlock) {
|
||||
currentPage->addFootnote(footnoteIt->second.number, footnoteIt->second.href);
|
||||
++footnoteIt;
|
||||
}
|
||||
pendingFootnotes.erase(pendingFootnotes.begin(), footnoteIt);
|
||||
|
||||
// Apply horizontal left inset (margin + padding) as x position offset
|
||||
const int16_t xOffset = line->getBlockStyle().leftInset();
|
||||
currentPage->elements.push_back(std::make_shared<PageLine>(line, xOffset, currentPageNextY));
|
||||
@@ -947,6 +1047,16 @@ void ChapterHtmlSlimParser::makePages() {
|
||||
renderer, fontId, effectiveWidth,
|
||||
[this](const std::shared_ptr<TextBlock>& textBlock) { addLineToPage(textBlock); });
|
||||
|
||||
// Fallback: transfer any remaining pending footnotes to current page.
|
||||
// Normally addLineToPage handles this via word-index tracking, but this catches
|
||||
// edge cases where a footnote's word index equals the exact block size.
|
||||
if (!pendingFootnotes.empty() && currentPage) {
|
||||
for (const auto& [idx, fn] : pendingFootnotes) {
|
||||
currentPage->addFootnote(fn.number, fn.href);
|
||||
}
|
||||
pendingFootnotes.clear();
|
||||
}
|
||||
|
||||
// Apply bottom spacing after the paragraph (stored in pixels)
|
||||
if (blockStyle.marginBottom > 0) {
|
||||
currentPageNextY += blockStyle.marginBottom;
|
||||
|
||||
@@ -5,7 +5,9 @@
|
||||
#include <climits>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "../FootnoteEntry.h"
|
||||
#include "../ParsedText.h"
|
||||
#include "../blocks/ImageBlock.h"
|
||||
#include "../blocks/TextBlock.h"
|
||||
@@ -66,6 +68,15 @@ class ChapterHtmlSlimParser {
|
||||
int tableRowIndex = 0;
|
||||
int tableColIndex = 0;
|
||||
|
||||
// Footnote link tracking
|
||||
bool insideFootnoteLink = false;
|
||||
int footnoteLinkDepth = -1;
|
||||
char currentFootnoteLinkText[24] = {};
|
||||
int currentFootnoteLinkTextLen = 0;
|
||||
char currentFootnoteLinkHref[64] = {};
|
||||
std::vector<std::pair<int, FootnoteEntry>> pendingFootnotes; // <wordIndex, entry>
|
||||
int wordsExtractedInBlock = 0;
|
||||
|
||||
void updateEffectiveInlineStyle();
|
||||
void startNewTextBlock(const BlockStyle& blockStyle);
|
||||
void flushPartWordBuffer();
|
||||
|
||||
Reference in New Issue
Block a user