diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index 9365df20..00fa29dd 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -4,13 +4,16 @@ #include #include +#include +#include + #include "Epub/css/CssParser.h" #include "Page.h" #include "hyphenation/Hyphenator.h" #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 18; +constexpr uint8_t SECTION_FILE_VERSION = 19; constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t); @@ -113,6 +116,7 @@ bool Section::loadSectionFile(const int fontId, const float lineCompression, con serialization::readPod(file, pageCount); file.close(); LOG_DBG("SCT", "Deserialization succeeded: %d pages", pageCount); + buildTocBoundaries(readAnchorMap(filePath)); return true; } @@ -203,11 +207,24 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c } } + // Collect TOC anchors for this spine so the parser can insert page breaks at chapter boundaries + std::set tocAnchors; + const int startTocIndex = epub->getTocIndexForSpineIndex(spineIndex); + if (startTocIndex >= 0) { + for (int i = startTocIndex; i < epub->getTocItemsCount(); i++) { + auto entry = epub->getTocItem(i); + if (entry.spineIndex != spineIndex) break; + if (!entry.anchor.empty()) { + tocAnchors.insert(std::move(entry.anchor)); + } + } + } + ChapterHtmlSlimParser visitor( epub, tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, viewportHeight, hyphenationEnabled, [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, - embeddedStyle, contentBase, imageBasePath, imageRendering, popupFn, cssParser); + embeddedStyle, contentBase, imageBasePath, imageRendering, std::move(tocAnchors), popupFn, cssParser); Hyphenator::setPreferredLanguage(epub->getLanguage()); success = visitor.parseAndBuildPages(); @@ -240,7 +257,7 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c return false; } - // Write anchor-to-page map for fragment navigation (e.g. footnote targets) + // Write anchor-to-page map for fragment navigation (footnotes + TOC) const uint32_t anchorMapOffset = file.position(); const auto& anchors = visitor.getAnchors(); serialization::writePod(file, static_cast(anchors.size())); @@ -258,6 +275,13 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c if (cssParser) { cssParser->clear(); } + + // Convert anchor vector to map for buildTocBoundaries + std::map anchorMap; + for (const auto& [a, p] : anchors) { + anchorMap.emplace(a, p); + } + buildTocBoundaries(anchorMap); return true; } @@ -311,3 +335,137 @@ std::optional Section::getPageForAnchor(const std::string& anchor) con f.close(); return std::nullopt; } + +std::map Section::readAnchorMap(const std::string& sectionPath) { + FsFile f; + if (!Storage.openFileForRead("SCT", sectionPath, f)) { + return {}; + } + + f.seek(HEADER_SIZE - sizeof(uint32_t)); + uint32_t anchorMapOffset; + serialization::readPod(f, anchorMapOffset); + if (anchorMapOffset == 0) { + f.close(); + return {}; + } + + f.seek(anchorMapOffset); + uint16_t count; + serialization::readPod(f, count); + std::map result; + for (uint16_t i = 0; i < count; i++) { + std::string key; + uint16_t page; + serialization::readString(f, key); + serialization::readPod(f, page); + result.emplace(std::move(key), page); + } + + f.close(); + return result; +} + +void Section::buildTocBoundaries(const std::map& anchorMap) { + tocBoundaries.clear(); + const int startTocIndex = epub->getTocIndexForSpineIndex(spineIndex); + if (startTocIndex < 0) return; + + const int tocCount = epub->getTocItemsCount(); + for (int i = startTocIndex; i < tocCount; i++) { + const auto entry = epub->getTocItem(i); + if (entry.spineIndex != spineIndex) break; + uint16_t page = 0; + if (!entry.anchor.empty()) { + auto it = anchorMap.find(entry.anchor); + if (it != anchorMap.end()) page = it->second; + } + tocBoundaries.push_back({i, page}); + } + std::sort(tocBoundaries.begin(), tocBoundaries.end(), + [](const TocBoundary& a, const TocBoundary& b) { return a.startPage < b.startPage; }); +} + +int Section::getTocIndexForPage(const int page) const { + if (tocBoundaries.empty()) { + return epub->getTocIndexForSpineIndex(spineIndex); + } + + auto it = std::upper_bound(tocBoundaries.begin(), tocBoundaries.end(), static_cast(page), + [](uint16_t p, const TocBoundary& boundary) { return p < boundary.startPage; }); + if (it == tocBoundaries.begin()) { + return tocBoundaries[0].tocIndex; + } + return std::prev(it)->tocIndex; +} + +std::optional Section::getPageForTocIndex(const int tocIndex) const { + for (const auto& boundary : tocBoundaries) { + if (boundary.tocIndex == tocIndex) { + return boundary.startPage; + } + } + return std::nullopt; +} + +std::optional Section::getPageRangeForTocIndex(const int tocIndex) const { + for (size_t i = 0; i < tocBoundaries.size(); i++) { + if (tocBoundaries[i].tocIndex == tocIndex) { + const int startPage = tocBoundaries[i].startPage; + const int endPage = (i + 1 < tocBoundaries.size()) ? static_cast(tocBoundaries[i + 1].startPage) : pageCount; + return TocPageRange{startPage, endPage}; + } + } + return std::nullopt; +} + +std::optional Section::readCachedPageCount(const std::string& cachePath, const int spineIndex, + const int fontId, const float lineCompression, + const bool extraParagraphSpacing, const uint8_t paragraphAlignment, + const uint16_t viewportWidth, const uint16_t viewportHeight, + const bool hyphenationEnabled, const bool embeddedStyle, + const uint8_t imageRendering) { + const std::string path = cachePath + "/sections/" + std::to_string(spineIndex) + ".bin"; + FsFile f; + if (!Storage.openFileForRead("SCT", path, f)) { + return std::nullopt; + } + + uint8_t version; + serialization::readPod(f, version); + if (version != SECTION_FILE_VERSION) { + f.close(); + return std::nullopt; + } + + int fileFontId; + float fileLineCompression; + bool fileExtraParagraphSpacing; + uint8_t fileParagraphAlignment; + uint16_t fileViewportWidth, fileViewportHeight; + bool fileHyphenationEnabled, fileEmbeddedStyle; + uint8_t fileImageRendering; + serialization::readPod(f, fileFontId); + serialization::readPod(f, fileLineCompression); + serialization::readPod(f, fileExtraParagraphSpacing); + serialization::readPod(f, fileParagraphAlignment); + serialization::readPod(f, fileViewportWidth); + serialization::readPod(f, fileViewportHeight); + serialization::readPod(f, fileHyphenationEnabled); + serialization::readPod(f, fileEmbeddedStyle); + serialization::readPod(f, fileImageRendering); + + if (fontId != fileFontId || lineCompression != fileLineCompression || + extraParagraphSpacing != fileExtraParagraphSpacing || paragraphAlignment != fileParagraphAlignment || + viewportWidth != fileViewportWidth || viewportHeight != fileViewportHeight || + hyphenationEnabled != fileHyphenationEnabled || embeddedStyle != fileEmbeddedStyle || + imageRendering != fileImageRendering) { + f.close(); + return std::nullopt; + } + + uint16_t count; + serialization::readPod(f, count); + f.close(); + return count; +} diff --git a/lib/Epub/Epub/Section.h b/lib/Epub/Epub/Section.h index 6f002c44..602fef00 100644 --- a/lib/Epub/Epub/Section.h +++ b/lib/Epub/Epub/Section.h @@ -1,8 +1,10 @@ #pragma once #include +#include #include #include #include +#include #include "Epub.h" @@ -21,6 +23,15 @@ class Section { bool embeddedStyle, uint8_t imageRendering); uint32_t onPageComplete(std::unique_ptr page); + struct TocBoundary { + int tocIndex = 0; + uint16_t startPage = 0; + }; + std::vector tocBoundaries; + + static std::map readAnchorMap(const std::string& sectionPath); + void buildTocBoundaries(const std::map& anchorMap); + public: uint16_t pageCount = 0; int currentPage = 0; @@ -40,6 +51,23 @@ class Section { uint8_t imageRendering, const std::function& popupFn = nullptr); std::unique_ptr loadPageFromSectionFile(); - // Look up the page number for an anchor id from the section cache file. + // Look up the page number for an anchor id from the section cache file (used for footnotes). std::optional getPageForAnchor(const std::string& anchor) const; + + // TOC boundary navigation: maps TOC entries to page ranges within this section. + int getTocIndexForPage(int page) const; + std::optional getPageForTocIndex(int tocIndex) const; + + struct TocPageRange { + int startPage; + int endPage; + }; + std::optional getPageRangeForTocIndex(int tocIndex) const; + + // Reads just the pageCount from an existing section cache file without loading the full section. + static std::optional readCachedPageCount(const std::string& cachePath, int spineIndex, int fontId, + float lineCompression, bool extraParagraphSpacing, + uint8_t paragraphAlignment, uint16_t viewportWidth, + uint16_t viewportHeight, bool hyphenationEnabled, + bool embeddedStyle, uint8_t imageRendering); }; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 1df3ceab..a9027606 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -135,6 +135,12 @@ void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) { currentTextBlock->setBlockStyle(currentTextBlock->getBlockStyle().getCombinedBlockStyle(blockStyle)); if (!pendingAnchorId.empty()) { + if (tocAnchors.count(pendingAnchorId) && currentPage && !currentPage->elements.empty()) { + completePageFn(std::move(currentPage)); + completedPageCount++; + currentPage.reset(new Page()); + currentPageNextY = 0; + } anchorData.push_back({std::move(pendingAnchorId), static_cast(completedPageCount)}); pendingAnchorId.clear(); } @@ -144,7 +150,14 @@ void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) { makePages(); } // Record deferred anchor after previous block is flushed + // Force page break at TOC chapter boundaries so chapters start on a fresh page if (!pendingAnchorId.empty()) { + if (tocAnchors.count(pendingAnchorId) && currentPage && !currentPage->elements.empty()) { + completePageFn(std::move(currentPage)); + completedPageCount++; + currentPage.reset(new Page()); + currentPageNextY = 0; + } anchorData.push_back({std::move(pendingAnchorId), static_cast(completedPageCount)}); pendingAnchorId.clear(); } diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index 1cc0ea39..a53d54df 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -4,7 +4,9 @@ #include #include +#include #include +#include #include #include @@ -75,6 +77,10 @@ class ChapterHtmlSlimParser { std::vector> anchorData; std::string pendingAnchorId; // deferred until after previous text block is flushed + // TOC anchors: when a TOC anchor is encountered, force a page break so chapters start on a fresh page + std::set tocAnchors; + std::map tocAnchorPageMap; + // Footnote link tracking bool insideFootnoteLink = false; int footnoteLinkDepth = -1; @@ -102,6 +108,7 @@ class ChapterHtmlSlimParser { const std::function)>& completePageFn, const bool embeddedStyle, const std::string& contentBase, const std::string& imageBasePath, const uint8_t imageRendering = 0, + std::set tocAnchors = {}, const std::function& popupFn = nullptr, const CssParser* cssParser = nullptr) : epub(epub), @@ -120,7 +127,8 @@ class ChapterHtmlSlimParser { embeddedStyle(embeddedStyle), imageRendering(imageRendering), contentBase(contentBase), - imageBasePath(imageBasePath) {} + imageBasePath(imageBasePath), + tocAnchors(std::move(tocAnchors)) {} ~ChapterHtmlSlimParser() = default; bool parseAndBuildPages();