feat: slim footnotes support (#1031)

## Summary **What is the goal of this PR?** Implement support for footnotes in epub files. It is based on #553, but simplified — removed the parts which complicated the code and burden the CPU/RAM. This version supports basic footnotes and lets the user jump from location to location inside the epub. **What changes are included?** - `FootnoteEntry` struct — A small POD struct (number[24], href[64]) shared between parser, page storage, and UI. - Parser: `<a href>` detection (`ChapterHtmlSlimParser`) — During a single parsing pass, internal epub links are detected and collected as footnotes. The link text is underlined to hint navigability. Bracket/whitespace normalization is applied to the display label (e.g. [1] → 1). - Footnote-to-page assignment (`ChapterHtmlSlimParser`, `Page`) — Footnotes are attached to the exact page where their anchor word appears, tracked via a cumulative word counter during layout, surviving paragraph splits and the 750-word mid-paragraph safety flush. - Page serialization (`Page`, `Section`) — Footnotes are serialized/deserialized per page (max 16 per page). Section cache version bumped to 14 to force a clean rebuild. - Href → spine resolution (`Epub`) — `resolveHrefToSpineIndex()` maps an href (e.g. `chapter2.xhtml#note1`) to its spine index by filename matching. - Footnotes menu + activity (`EpubReaderMenuActivity`, `EpubReaderFootnotesActivity`) — A new "Footnotes" entry in the reader menu lists all footnote links found on the current page. The user scrolls and selects to navigate. - Navigate & restore (`EpubReaderActivity`) — `navigateToHref()` saves the current spine index and page number, then jumps to the target. The Back button restores the saved position when the user is done reading the footnote. **Additional Context** **What was removed vs #553:** virtual spine items (`addVirtualSpineItem`, `isVirtualSpineItem`), two-pass parsing, `<aside>` content extraction to temp HTML files, `<p class="note">` paragraph note extraction, `replaceHtmlEntities` (master already has `lookupHtmlEntity`), `footnotePages` / `buildFilteredChapterList`, `noterefCallback` / `Noteref` struct, and the stack size increase from 8 KB to 24 KB (not needed without two-pass parsing and virtual file I/O on the render task). **Performance:** Single-pass parsing. No new heap allocations in the hot path — footnote text is collected into fixed stack buffers (char[24], char[64]). Active runtime memory is ~2.8 KB worst-case (one page × 16 footnotes × 88 bytes, mirrored in `currentPageFootnotes`). Flash usage is unchanged at 97.4%; RAM stays at 31%. **Known limitations:** When clicking a footnote, it jumps to the start of the HTML file instead of the specific anchor. This could be problematic for books that don't have separate files for each footnote. (no element-id-to-page mapping yet - will be another PR soon). --- ### AI Usage Did you use AI tools to help write this code? _**< PARTIALLY>**_ Claude Opus 4.6 was used to do most of the migration, I checked manually its work, and fixed some stuff, but I haven't review all the changes yet, so feedback is welcomed. --------- Co-authored-by: Arthur Tazhitdinov <lisnake@gmail.com>
2026-02-26 16:47:34 +02:00
parent 451774ddf8
commit 30d8a8d011
15 changed files with 481 additions and 22 deletions
--- a/lib/Epub/Epub.cpp
+++ b/lib/Epub/Epub.cpp
@@ -858,3 +858,30 @@ float Epub::calculateProgress(const int currentSpineIndex, const float currentSp
  const float totalProgress = static_cast<float>(prevChapterSize) + sectionProgSize;
  return totalProgress / static_cast<float>(bookSize);
 }
+
+int Epub::resolveHrefToSpineIndex(const std::string& href) const {
+  if (!bookMetadataCache || !bookMetadataCache->isLoaded()) return -1;
+
+  // Extract filename (remove #anchor)
+  std::string target = href;
+  size_t hashPos = target.find('#');
+  if (hashPos != std::string::npos) target = target.substr(0, hashPos);
+
+  // Same-file reference (anchor-only)
+  if (target.empty()) return -1;
+
+  // Extract just the filename for comparison
+  size_t targetSlash = target.find_last_of('/');
+  std::string targetFilename = (targetSlash != std::string::npos) ? target.substr(targetSlash + 1) : target;
+
+  for (int i = 0; i < getSpineItemsCount(); i++) {
+    const auto& spineHref = getSpineItem(i).href;
+    // Try exact match first
+    if (spineHref == target) return i;
+    // Then filename-only match
+    size_t spineSlash = spineHref.find_last_of('/');
+    std::string spineFilename = (spineSlash != std::string::npos) ? spineHref.substr(spineSlash + 1) : spineHref;
+    if (spineFilename == targetFilename) return i;
+  }
+  return -1;
+}
--- a/lib/Epub/Epub.h
+++ b/lib/Epub/Epub.h
@@ -72,4 +72,5 @@ class Epub {
  size_t getBookSize() const;
  float calculateProgress(int currentSpineIndex, float currentSpineRead) const;
  CssParser* getCssParser() const { return cssParser.get(); }
+  int resolveHrefToSpineIndex(const std::string& href) const;
 };
--- a/lib/Epub/Epub/FootnoteEntry.h
+++ b/lib/Epub/Epub/FootnoteEntry.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <cstring>
+
+struct FootnoteEntry {
+  char number[24];
+  char href[64];
+
+  FootnoteEntry() {
+    number[0] = '\0';
+    href[0] = '\0';
+  }
+};
--- a/lib/Epub/Epub/Page.cpp
+++ b/lib/Epub/Epub/Page.cpp
@@ -67,6 +67,18 @@ bool Page::serialize(FsFile& file) const {
    }
  }

+  // Serialize footnotes (clamp to MAX_FOOTNOTES_PER_PAGE to match addFootnote/deserialize limits)
+  const uint16_t fnCount = std::min<uint16_t>(footnotes.size(), MAX_FOOTNOTES_PER_PAGE);
+  serialization::writePod(file, fnCount);
+  for (uint16_t i = 0; i < fnCount; i++) {
+    const auto& fn = footnotes[i];
+    if (file.write(fn.number, sizeof(fn.number)) != sizeof(fn.number) ||
+        file.write(fn.href, sizeof(fn.href)) != sizeof(fn.href)) {
+      LOG_ERR("PGE", "Failed to write footnote");
+      return false;
+    }
+  }
+
  return true;
 }

@@ -92,5 +104,24 @@ std::unique_ptr<Page> Page::deserialize(FsFile& file) {
    }
  }

+  // Deserialize footnotes
+  uint16_t fnCount;
+  serialization::readPod(file, fnCount);
+  if (fnCount > MAX_FOOTNOTES_PER_PAGE) {
+    LOG_ERR("PGE", "Invalid footnote count %u", fnCount);
+    return nullptr;
+  }
+  page->footnotes.resize(fnCount);
+  for (uint16_t i = 0; i < fnCount; i++) {
+    auto& entry = page->footnotes[i];
+    if (file.read(entry.number, sizeof(entry.number)) != sizeof(entry.number) ||
+        file.read(entry.href, sizeof(entry.href)) != sizeof(entry.href)) {
+      LOG_ERR("PGE", "Failed to read footnote %u", i);
+      return nullptr;
+    }
+    entry.number[sizeof(entry.number) - 1] = '\0';
+    entry.href[sizeof(entry.href) - 1] = '\0';
+  }
+
  return page;
 }
--- a/lib/Epub/Epub/Page.h
+++ b/lib/Epub/Epub/Page.h
@@ -5,6 +5,7 @@
 #include <utility>
 #include <vector>

+#include "FootnoteEntry.h"
 #include "blocks/ImageBlock.h"
 #include "blocks/TextBlock.h"

@@ -57,6 +58,19 @@ class Page {
 public:
  // the list of block index and line numbers on this page
  std::vector<std::shared_ptr<PageElement>> elements;
+  std::vector<FootnoteEntry> footnotes;
+  static constexpr uint16_t MAX_FOOTNOTES_PER_PAGE = 16;
+
+  void addFootnote(const char* number, const char* href) {
+    if (footnotes.size() >= MAX_FOOTNOTES_PER_PAGE) return;  // Cap per-page footnotes
+    FootnoteEntry entry;
+    strncpy(entry.number, number, sizeof(entry.number) - 1);
+    entry.number[sizeof(entry.number) - 1] = '\0';
+    strncpy(entry.href, href, sizeof(entry.href) - 1);
+    entry.href[sizeof(entry.href) - 1] = '\0';
+    footnotes.push_back(entry);
+  }
+
  void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) const;
  bool serialize(FsFile& file) const;
  static std::unique_ptr<Page> deserialize(FsFile& file);
--- a/lib/Epub/Epub/blocks/TextBlock.h
+++ b/lib/Epub/Epub/blocks/TextBlock.h
@@ -29,6 +29,7 @@ class TextBlock final : public Block {
  const BlockStyle& getBlockStyle() const { return blockStyle; }
  const std::vector<std::string>& getWords() const { return words; }
  bool isEmpty() override { return words.empty(); }
+  size_t wordCount() const { return words.size(); }
  // given a renderer works out where to break the words into lines
  void render(const GfxRenderer& renderer, int fontId, int x, int y) const;
  BlockType getType() override { return TEXT_BLOCK; }
--- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
+++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
@@ -49,6 +49,24 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib
  return false;
 }

+const char* getAttribute(const XML_Char** atts, const char* attrName) {
+  if (!atts) return nullptr;
+  for (int i = 0; atts[i]; i += 2) {
+    if (strcmp(atts[i], attrName) == 0) return atts[i + 1];
+  }
+  return nullptr;
+}
+
+bool isInternalEpubLink(const char* href) {
+  if (!href || href[0] == '\0') return false;
+  if (strncmp(href, "http://", 7) == 0 || strncmp(href, "https://", 8) == 0) return false;
+  if (strncmp(href, "mailto:", 7) == 0) return false;
+  if (strncmp(href, "ftp://", 6) == 0) return false;
+  if (strncmp(href, "tel:", 4) == 0) return false;
+  if (strncmp(href, "javascript:", 11) == 0) return false;
+  return true;
+}
+
 bool isHeaderOrBlock(const char* name) {
  return matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS);
 }
@@ -121,6 +139,7 @@ void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) {
    makePages();
  }
  currentTextBlock.reset(new ParsedText(extraParagraphSpacing, hyphenationEnabled, blockStyle));
+  wordsExtractedInBlock = 0;
 }

 void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
@@ -430,6 +449,50 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
    }
  }

+  // Detect internal <a href="..."> links (footnotes, cross-references)
+  // Note: <aside epub:type="footnote"> elements are rendered as normal content
+  // without special handling. Links pointing to them are collected as footnotes.
+  if (strcmp(name, "a") == 0) {
+    const char* href = getAttribute(atts, "href");
+
+    bool isInternalLink = isInternalEpubLink(href);
+
+    // Special case: javascript:void(0) links with data attributes
+    // Example: <a href="javascript:void(0)"
+    // data-xyz="{&quot;name&quot;:&quot;OPS/ch2.xhtml&quot;,&quot;frag&quot;:&quot;id46&quot;}">
+    if (href && strncmp(href, "javascript:", 11) == 0) {
+      isInternalLink = false;
+      // TODO: Parse data-* attributes to extract actual href
+    }
+
+    if (isInternalLink) {
+      // Flush buffer before style change
+      if (self->partWordBufferIndex > 0) {
+        self->flushPartWordBuffer();
+        self->nextWordContinues = true;
+      }
+      self->insideFootnoteLink = true;
+      self->footnoteLinkDepth = self->depth;
+      strncpy(self->currentFootnoteLinkHref, href, sizeof(self->currentFootnoteLinkHref) - 1);
+      self->currentFootnoteLinkHref[sizeof(self->currentFootnoteLinkHref) - 1] = '\0';
+      self->currentFootnoteLinkText[0] = '\0';
+      self->currentFootnoteLinkTextLen = 0;
+
+      // Apply underline style to visually indicate the link
+      self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth);
+      StyleStackEntry entry;
+      entry.depth = self->depth;
+      entry.hasUnderline = true;
+      entry.underline = true;
+      self->inlineStyleStack.push_back(entry);
+      self->updateEffectiveInlineStyle();
+
+      // Skip CSS resolution — we already handled styling for this <a> tag
+      self->depth += 1;
+      return;
+    }
+  }
+
  // Compute CSS style for this element
  CssStyle cssStyle;
  if (self->cssParser) {
@@ -582,6 +645,19 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
    return;
  }

+  // Collect footnote link display text (for the number label)
+  // Skip whitespace and brackets to normalize noterefs like "[1]" → "1"
+  if (self->insideFootnoteLink) {
+    for (int i = 0; i < len; i++) {
+      unsigned char c = static_cast<unsigned char>(s[i]);
+      if (isWhitespace(c) || c == '[' || c == ']') continue;
+      if (self->currentFootnoteLinkTextLen < static_cast<int>(sizeof(self->currentFootnoteLinkText)) - 1) {
+        self->currentFootnoteLinkText[self->currentFootnoteLinkTextLen++] = c;
+        self->currentFootnoteLinkText[self->currentFootnoteLinkTextLen] = '\0';
+      }
+    }
+  }
+
  for (int i = 0; i < len; i++) {
    if (isWhitespace(s[i])) {
      // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
@@ -743,6 +819,21 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n

  self->depth -= 1;

+  // Closing a footnote link — create entry from collected text and href
+  if (self->insideFootnoteLink && self->depth == self->footnoteLinkDepth) {
+    if (self->currentFootnoteLinkText[0] != '\0' && self->currentFootnoteLinkHref[0] != '\0') {
+      FootnoteEntry entry;
+      strncpy(entry.number, self->currentFootnoteLinkText, sizeof(entry.number) - 1);
+      entry.number[sizeof(entry.number) - 1] = '\0';
+      strncpy(entry.href, self->currentFootnoteLinkHref, sizeof(entry.href) - 1);
+      entry.href[sizeof(entry.href) - 1] = '\0';
+      int wordIndex =
+          self->wordsExtractedInBlock + (self->currentTextBlock ? static_cast<int>(self->currentTextBlock->size()) : 0);
+      self->pendingFootnotes.push_back({wordIndex, entry});
+    }
+    self->insideFootnoteLink = false;
+  }
+
  // Leaving skip
  if (self->skipUntilDepth == self->depth) {
    self->skipUntilDepth = INT_MAX;
@@ -910,6 +1001,15 @@ void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr<TextBlock> line) {
    currentPageNextY = 0;
  }

+  // Track cumulative words to assign footnotes to the page containing their anchor
+  wordsExtractedInBlock += line->wordCount();
+  auto footnoteIt = pendingFootnotes.begin();
+  while (footnoteIt != pendingFootnotes.end() && footnoteIt->first <= wordsExtractedInBlock) {
+    currentPage->addFootnote(footnoteIt->second.number, footnoteIt->second.href);
+    ++footnoteIt;
+  }
+  pendingFootnotes.erase(pendingFootnotes.begin(), footnoteIt);
+
  // Apply horizontal left inset (margin + padding) as x position offset
  const int16_t xOffset = line->getBlockStyle().leftInset();
  currentPage->elements.push_back(std::make_shared<PageLine>(line, xOffset, currentPageNextY));
@@ -947,6 +1047,16 @@ void ChapterHtmlSlimParser::makePages() {
      renderer, fontId, effectiveWidth,
      [this](const std::shared_ptr<TextBlock>& textBlock) { addLineToPage(textBlock); });

+  // Fallback: transfer any remaining pending footnotes to current page.
+  // Normally addLineToPage handles this via word-index tracking, but this catches
+  // edge cases where a footnote's word index equals the exact block size.
+  if (!pendingFootnotes.empty() && currentPage) {
+    for (const auto& [idx, fn] : pendingFootnotes) {
+      currentPage->addFootnote(fn.number, fn.href);
+    }
+    pendingFootnotes.clear();
+  }
+
  // Apply bottom spacing after the paragraph (stored in pixels)
  if (blockStyle.marginBottom > 0) {
    currentPageNextY += blockStyle.marginBottom;
--- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h
+++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h
@@ -5,7 +5,9 @@
 #include <climits>
 #include <functional>
 #include <memory>
+#include <vector>

+#include "../FootnoteEntry.h"
 #include "../ParsedText.h"
 #include "../blocks/ImageBlock.h"
 #include "../blocks/TextBlock.h"
@@ -66,6 +68,15 @@ class ChapterHtmlSlimParser {
  int tableRowIndex = 0;
  int tableColIndex = 0;

+  // Footnote link tracking
+  bool insideFootnoteLink = false;
+  int footnoteLinkDepth = -1;
+  char currentFootnoteLinkText[24] = {};
+  int currentFootnoteLinkTextLen = 0;
+  char currentFootnoteLinkHref[64] = {};
+  std::vector<std::pair<int, FootnoteEntry>> pendingFootnotes;  // <wordIndex, entry>
+  int wordsExtractedInBlock = 0;
+
  void updateEffectiveInlineStyle();
  void startNewTextBlock(const BlockStyle& blockStyle);
  void flushPartWordBuffer();