diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index 3be770fe..876bea75 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -858,3 +858,30 @@ float Epub::calculateProgress(const int currentSpineIndex, const float currentSp const float totalProgress = static_cast(prevChapterSize) + sectionProgSize; return totalProgress / static_cast(bookSize); } + +int Epub::resolveHrefToSpineIndex(const std::string& href) const { + if (!bookMetadataCache || !bookMetadataCache->isLoaded()) return -1; + + // Extract filename (remove #anchor) + std::string target = href; + size_t hashPos = target.find('#'); + if (hashPos != std::string::npos) target = target.substr(0, hashPos); + + // Same-file reference (anchor-only) + if (target.empty()) return -1; + + // Extract just the filename for comparison + size_t targetSlash = target.find_last_of('/'); + std::string targetFilename = (targetSlash != std::string::npos) ? target.substr(targetSlash + 1) : target; + + for (int i = 0; i < getSpineItemsCount(); i++) { + const auto& spineHref = getSpineItem(i).href; + // Try exact match first + if (spineHref == target) return i; + // Then filename-only match + size_t spineSlash = spineHref.find_last_of('/'); + std::string spineFilename = (spineSlash != std::string::npos) ? spineHref.substr(spineSlash + 1) : spineHref; + if (spineFilename == targetFilename) return i; + } + return -1; +} diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index cde9d210..9ffa8d37 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -72,4 +72,5 @@ class Epub { size_t getBookSize() const; float calculateProgress(int currentSpineIndex, float currentSpineRead) const; CssParser* getCssParser() const { return cssParser.get(); } + int resolveHrefToSpineIndex(const std::string& href) const; }; diff --git a/lib/Epub/Epub/FootnoteEntry.h b/lib/Epub/Epub/FootnoteEntry.h new file mode 100644 index 00000000..8023df42 --- /dev/null +++ b/lib/Epub/Epub/FootnoteEntry.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +struct FootnoteEntry { + char number[24]; + char href[64]; + + FootnoteEntry() { + number[0] = '\0'; + href[0] = '\0'; + } +}; diff --git a/lib/Epub/Epub/Page.cpp b/lib/Epub/Epub/Page.cpp index 9eb68469..ba12589e 100644 --- a/lib/Epub/Epub/Page.cpp +++ b/lib/Epub/Epub/Page.cpp @@ -67,6 +67,18 @@ bool Page::serialize(FsFile& file) const { } } + // Serialize footnotes (clamp to MAX_FOOTNOTES_PER_PAGE to match addFootnote/deserialize limits) + const uint16_t fnCount = std::min(footnotes.size(), MAX_FOOTNOTES_PER_PAGE); + serialization::writePod(file, fnCount); + for (uint16_t i = 0; i < fnCount; i++) { + const auto& fn = footnotes[i]; + if (file.write(fn.number, sizeof(fn.number)) != sizeof(fn.number) || + file.write(fn.href, sizeof(fn.href)) != sizeof(fn.href)) { + LOG_ERR("PGE", "Failed to write footnote"); + return false; + } + } + return true; } @@ -92,5 +104,24 @@ std::unique_ptr Page::deserialize(FsFile& file) { } } + // Deserialize footnotes + uint16_t fnCount; + serialization::readPod(file, fnCount); + if (fnCount > MAX_FOOTNOTES_PER_PAGE) { + LOG_ERR("PGE", "Invalid footnote count %u", fnCount); + return nullptr; + } + page->footnotes.resize(fnCount); + for (uint16_t i = 0; i < fnCount; i++) { + auto& entry = page->footnotes[i]; + if (file.read(entry.number, sizeof(entry.number)) != sizeof(entry.number) || + file.read(entry.href, sizeof(entry.href)) != sizeof(entry.href)) { + LOG_ERR("PGE", "Failed to read footnote %u", i); + return nullptr; + } + entry.number[sizeof(entry.number) - 1] = '\0'; + entry.href[sizeof(entry.href) - 1] = '\0'; + } + return page; } diff --git a/lib/Epub/Epub/Page.h b/lib/Epub/Epub/Page.h index 9970baec..7b4c18ac 100644 --- a/lib/Epub/Epub/Page.h +++ b/lib/Epub/Epub/Page.h @@ -5,6 +5,7 @@ #include #include +#include "FootnoteEntry.h" #include "blocks/ImageBlock.h" #include "blocks/TextBlock.h" @@ -57,6 +58,19 @@ class Page { public: // the list of block index and line numbers on this page std::vector> elements; + std::vector footnotes; + static constexpr uint16_t MAX_FOOTNOTES_PER_PAGE = 16; + + void addFootnote(const char* number, const char* href) { + if (footnotes.size() >= MAX_FOOTNOTES_PER_PAGE) return; // Cap per-page footnotes + FootnoteEntry entry; + strncpy(entry.number, number, sizeof(entry.number) - 1); + entry.number[sizeof(entry.number) - 1] = '\0'; + strncpy(entry.href, href, sizeof(entry.href) - 1); + entry.href[sizeof(entry.href) - 1] = '\0'; + footnotes.push_back(entry); + } + void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) const; bool serialize(FsFile& file) const; static std::unique_ptr deserialize(FsFile& file); diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index 9a1dad15..b654dcf1 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -29,6 +29,7 @@ class TextBlock final : public Block { const BlockStyle& getBlockStyle() const { return blockStyle; } const std::vector& getWords() const { return words; } bool isEmpty() override { return words.empty(); } + size_t wordCount() const { return words.size(); } // given a renderer works out where to break the words into lines void render(const GfxRenderer& renderer, int fontId, int x, int y) const; BlockType getType() override { return TEXT_BLOCK; } diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index d502933d..8afe3ff9 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -49,6 +49,24 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } +const char* getAttribute(const XML_Char** atts, const char* attrName) { + if (!atts) return nullptr; + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], attrName) == 0) return atts[i + 1]; + } + return nullptr; +} + +bool isInternalEpubLink(const char* href) { + if (!href || href[0] == '\0') return false; + if (strncmp(href, "http://", 7) == 0 || strncmp(href, "https://", 8) == 0) return false; + if (strncmp(href, "mailto:", 7) == 0) return false; + if (strncmp(href, "ftp://", 6) == 0) return false; + if (strncmp(href, "tel:", 4) == 0) return false; + if (strncmp(href, "javascript:", 11) == 0) return false; + return true; +} + bool isHeaderOrBlock(const char* name) { return matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); } @@ -121,6 +139,7 @@ void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) { makePages(); } currentTextBlock.reset(new ParsedText(extraParagraphSpacing, hyphenationEnabled, blockStyle)); + wordsExtractedInBlock = 0; } void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { @@ -430,6 +449,50 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } } + // Detect internal links (footnotes, cross-references) + // Note: