feat: add TOC boundary API and anchor page breaks to Section
Extend Section with TOC boundary tracking: buildTocBoundaries(), getTocIndexForPage(), getPageForTocIndex(), getPageRangeForTocIndex(), readAnchorMap(), and readCachedPageCount() for lightweight cache queries. ChapterHtmlSlimParser now accepts a tocAnchors set and forces page breaks at TOC anchor boundaries so each chapter starts on a fresh page. Increment SECTION_FILE_VERSION to 19 for new TOC boundary data. Ported from upstream PRs #1143 and #1172, adapted to mod architecture. Made-with: Cursor
This commit is contained in:
@@ -4,13 +4,16 @@
|
|||||||
#include <Logging.h>
|
#include <Logging.h>
|
||||||
#include <Serialization.h>
|
#include <Serialization.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
#include "Epub/css/CssParser.h"
|
#include "Epub/css/CssParser.h"
|
||||||
#include "Page.h"
|
#include "Page.h"
|
||||||
#include "hyphenation/Hyphenator.h"
|
#include "hyphenation/Hyphenator.h"
|
||||||
#include "parsers/ChapterHtmlSlimParser.h"
|
#include "parsers/ChapterHtmlSlimParser.h"
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
constexpr uint8_t SECTION_FILE_VERSION = 18;
|
constexpr uint8_t SECTION_FILE_VERSION = 19;
|
||||||
constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) +
|
constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) +
|
||||||
sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(bool) +
|
sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(bool) +
|
||||||
sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t);
|
sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t);
|
||||||
@@ -113,6 +116,7 @@ bool Section::loadSectionFile(const int fontId, const float lineCompression, con
|
|||||||
serialization::readPod(file, pageCount);
|
serialization::readPod(file, pageCount);
|
||||||
file.close();
|
file.close();
|
||||||
LOG_DBG("SCT", "Deserialization succeeded: %d pages", pageCount);
|
LOG_DBG("SCT", "Deserialization succeeded: %d pages", pageCount);
|
||||||
|
buildTocBoundaries(readAnchorMap(filePath));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -203,11 +207,24 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Collect TOC anchors for this spine so the parser can insert page breaks at chapter boundaries
|
||||||
|
std::set<std::string> tocAnchors;
|
||||||
|
const int startTocIndex = epub->getTocIndexForSpineIndex(spineIndex);
|
||||||
|
if (startTocIndex >= 0) {
|
||||||
|
for (int i = startTocIndex; i < epub->getTocItemsCount(); i++) {
|
||||||
|
auto entry = epub->getTocItem(i);
|
||||||
|
if (entry.spineIndex != spineIndex) break;
|
||||||
|
if (!entry.anchor.empty()) {
|
||||||
|
tocAnchors.insert(std::move(entry.anchor));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ChapterHtmlSlimParser visitor(
|
ChapterHtmlSlimParser visitor(
|
||||||
epub, tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth,
|
epub, tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth,
|
||||||
viewportHeight, hyphenationEnabled,
|
viewportHeight, hyphenationEnabled,
|
||||||
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); },
|
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); },
|
||||||
embeddedStyle, contentBase, imageBasePath, imageRendering, popupFn, cssParser);
|
embeddedStyle, contentBase, imageBasePath, imageRendering, std::move(tocAnchors), popupFn, cssParser);
|
||||||
Hyphenator::setPreferredLanguage(epub->getLanguage());
|
Hyphenator::setPreferredLanguage(epub->getLanguage());
|
||||||
success = visitor.parseAndBuildPages();
|
success = visitor.parseAndBuildPages();
|
||||||
|
|
||||||
@@ -240,7 +257,7 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write anchor-to-page map for fragment navigation (e.g. footnote targets)
|
// Write anchor-to-page map for fragment navigation (footnotes + TOC)
|
||||||
const uint32_t anchorMapOffset = file.position();
|
const uint32_t anchorMapOffset = file.position();
|
||||||
const auto& anchors = visitor.getAnchors();
|
const auto& anchors = visitor.getAnchors();
|
||||||
serialization::writePod(file, static_cast<uint16_t>(anchors.size()));
|
serialization::writePod(file, static_cast<uint16_t>(anchors.size()));
|
||||||
@@ -258,6 +275,13 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
|
|||||||
if (cssParser) {
|
if (cssParser) {
|
||||||
cssParser->clear();
|
cssParser->clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert anchor vector to map for buildTocBoundaries
|
||||||
|
std::map<std::string, uint16_t> anchorMap;
|
||||||
|
for (const auto& [a, p] : anchors) {
|
||||||
|
anchorMap.emplace(a, p);
|
||||||
|
}
|
||||||
|
buildTocBoundaries(anchorMap);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -311,3 +335,137 @@ std::optional<uint16_t> Section::getPageForAnchor(const std::string& anchor) con
|
|||||||
f.close();
|
f.close();
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::map<std::string, uint16_t> Section::readAnchorMap(const std::string& sectionPath) {
|
||||||
|
FsFile f;
|
||||||
|
if (!Storage.openFileForRead("SCT", sectionPath, f)) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
f.seek(HEADER_SIZE - sizeof(uint32_t));
|
||||||
|
uint32_t anchorMapOffset;
|
||||||
|
serialization::readPod(f, anchorMapOffset);
|
||||||
|
if (anchorMapOffset == 0) {
|
||||||
|
f.close();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
f.seek(anchorMapOffset);
|
||||||
|
uint16_t count;
|
||||||
|
serialization::readPod(f, count);
|
||||||
|
std::map<std::string, uint16_t> result;
|
||||||
|
for (uint16_t i = 0; i < count; i++) {
|
||||||
|
std::string key;
|
||||||
|
uint16_t page;
|
||||||
|
serialization::readString(f, key);
|
||||||
|
serialization::readPod(f, page);
|
||||||
|
result.emplace(std::move(key), page);
|
||||||
|
}
|
||||||
|
|
||||||
|
f.close();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Section::buildTocBoundaries(const std::map<std::string, uint16_t>& anchorMap) {
|
||||||
|
tocBoundaries.clear();
|
||||||
|
const int startTocIndex = epub->getTocIndexForSpineIndex(spineIndex);
|
||||||
|
if (startTocIndex < 0) return;
|
||||||
|
|
||||||
|
const int tocCount = epub->getTocItemsCount();
|
||||||
|
for (int i = startTocIndex; i < tocCount; i++) {
|
||||||
|
const auto entry = epub->getTocItem(i);
|
||||||
|
if (entry.spineIndex != spineIndex) break;
|
||||||
|
uint16_t page = 0;
|
||||||
|
if (!entry.anchor.empty()) {
|
||||||
|
auto it = anchorMap.find(entry.anchor);
|
||||||
|
if (it != anchorMap.end()) page = it->second;
|
||||||
|
}
|
||||||
|
tocBoundaries.push_back({i, page});
|
||||||
|
}
|
||||||
|
std::sort(tocBoundaries.begin(), tocBoundaries.end(),
|
||||||
|
[](const TocBoundary& a, const TocBoundary& b) { return a.startPage < b.startPage; });
|
||||||
|
}
|
||||||
|
|
||||||
|
int Section::getTocIndexForPage(const int page) const {
|
||||||
|
if (tocBoundaries.empty()) {
|
||||||
|
return epub->getTocIndexForSpineIndex(spineIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto it = std::upper_bound(tocBoundaries.begin(), tocBoundaries.end(), static_cast<uint16_t>(page),
|
||||||
|
[](uint16_t p, const TocBoundary& boundary) { return p < boundary.startPage; });
|
||||||
|
if (it == tocBoundaries.begin()) {
|
||||||
|
return tocBoundaries[0].tocIndex;
|
||||||
|
}
|
||||||
|
return std::prev(it)->tocIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<int> Section::getPageForTocIndex(const int tocIndex) const {
|
||||||
|
for (const auto& boundary : tocBoundaries) {
|
||||||
|
if (boundary.tocIndex == tocIndex) {
|
||||||
|
return boundary.startPage;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<Section::TocPageRange> Section::getPageRangeForTocIndex(const int tocIndex) const {
|
||||||
|
for (size_t i = 0; i < tocBoundaries.size(); i++) {
|
||||||
|
if (tocBoundaries[i].tocIndex == tocIndex) {
|
||||||
|
const int startPage = tocBoundaries[i].startPage;
|
||||||
|
const int endPage = (i + 1 < tocBoundaries.size()) ? static_cast<int>(tocBoundaries[i + 1].startPage) : pageCount;
|
||||||
|
return TocPageRange{startPage, endPage};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<uint16_t> Section::readCachedPageCount(const std::string& cachePath, const int spineIndex,
|
||||||
|
const int fontId, const float lineCompression,
|
||||||
|
const bool extraParagraphSpacing, const uint8_t paragraphAlignment,
|
||||||
|
const uint16_t viewportWidth, const uint16_t viewportHeight,
|
||||||
|
const bool hyphenationEnabled, const bool embeddedStyle,
|
||||||
|
const uint8_t imageRendering) {
|
||||||
|
const std::string path = cachePath + "/sections/" + std::to_string(spineIndex) + ".bin";
|
||||||
|
FsFile f;
|
||||||
|
if (!Storage.openFileForRead("SCT", path, f)) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t version;
|
||||||
|
serialization::readPod(f, version);
|
||||||
|
if (version != SECTION_FILE_VERSION) {
|
||||||
|
f.close();
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
int fileFontId;
|
||||||
|
float fileLineCompression;
|
||||||
|
bool fileExtraParagraphSpacing;
|
||||||
|
uint8_t fileParagraphAlignment;
|
||||||
|
uint16_t fileViewportWidth, fileViewportHeight;
|
||||||
|
bool fileHyphenationEnabled, fileEmbeddedStyle;
|
||||||
|
uint8_t fileImageRendering;
|
||||||
|
serialization::readPod(f, fileFontId);
|
||||||
|
serialization::readPod(f, fileLineCompression);
|
||||||
|
serialization::readPod(f, fileExtraParagraphSpacing);
|
||||||
|
serialization::readPod(f, fileParagraphAlignment);
|
||||||
|
serialization::readPod(f, fileViewportWidth);
|
||||||
|
serialization::readPod(f, fileViewportHeight);
|
||||||
|
serialization::readPod(f, fileHyphenationEnabled);
|
||||||
|
serialization::readPod(f, fileEmbeddedStyle);
|
||||||
|
serialization::readPod(f, fileImageRendering);
|
||||||
|
|
||||||
|
if (fontId != fileFontId || lineCompression != fileLineCompression ||
|
||||||
|
extraParagraphSpacing != fileExtraParagraphSpacing || paragraphAlignment != fileParagraphAlignment ||
|
||||||
|
viewportWidth != fileViewportWidth || viewportHeight != fileViewportHeight ||
|
||||||
|
hyphenationEnabled != fileHyphenationEnabled || embeddedStyle != fileEmbeddedStyle ||
|
||||||
|
imageRendering != fileImageRendering) {
|
||||||
|
f.close();
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t count;
|
||||||
|
serialization::readPod(f, count);
|
||||||
|
f.close();
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "Epub.h"
|
#include "Epub.h"
|
||||||
|
|
||||||
@@ -21,6 +23,15 @@ class Section {
|
|||||||
bool embeddedStyle, uint8_t imageRendering);
|
bool embeddedStyle, uint8_t imageRendering);
|
||||||
uint32_t onPageComplete(std::unique_ptr<Page> page);
|
uint32_t onPageComplete(std::unique_ptr<Page> page);
|
||||||
|
|
||||||
|
struct TocBoundary {
|
||||||
|
int tocIndex = 0;
|
||||||
|
uint16_t startPage = 0;
|
||||||
|
};
|
||||||
|
std::vector<TocBoundary> tocBoundaries;
|
||||||
|
|
||||||
|
static std::map<std::string, uint16_t> readAnchorMap(const std::string& sectionPath);
|
||||||
|
void buildTocBoundaries(const std::map<std::string, uint16_t>& anchorMap);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
uint16_t pageCount = 0;
|
uint16_t pageCount = 0;
|
||||||
int currentPage = 0;
|
int currentPage = 0;
|
||||||
@@ -40,6 +51,23 @@ class Section {
|
|||||||
uint8_t imageRendering, const std::function<void()>& popupFn = nullptr);
|
uint8_t imageRendering, const std::function<void()>& popupFn = nullptr);
|
||||||
std::unique_ptr<Page> loadPageFromSectionFile();
|
std::unique_ptr<Page> loadPageFromSectionFile();
|
||||||
|
|
||||||
// Look up the page number for an anchor id from the section cache file.
|
// Look up the page number for an anchor id from the section cache file (used for footnotes).
|
||||||
std::optional<uint16_t> getPageForAnchor(const std::string& anchor) const;
|
std::optional<uint16_t> getPageForAnchor(const std::string& anchor) const;
|
||||||
|
|
||||||
|
// TOC boundary navigation: maps TOC entries to page ranges within this section.
|
||||||
|
int getTocIndexForPage(int page) const;
|
||||||
|
std::optional<int> getPageForTocIndex(int tocIndex) const;
|
||||||
|
|
||||||
|
struct TocPageRange {
|
||||||
|
int startPage;
|
||||||
|
int endPage;
|
||||||
|
};
|
||||||
|
std::optional<TocPageRange> getPageRangeForTocIndex(int tocIndex) const;
|
||||||
|
|
||||||
|
// Reads just the pageCount from an existing section cache file without loading the full section.
|
||||||
|
static std::optional<uint16_t> readCachedPageCount(const std::string& cachePath, int spineIndex, int fontId,
|
||||||
|
float lineCompression, bool extraParagraphSpacing,
|
||||||
|
uint8_t paragraphAlignment, uint16_t viewportWidth,
|
||||||
|
uint16_t viewportHeight, bool hyphenationEnabled,
|
||||||
|
bool embeddedStyle, uint8_t imageRendering);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -135,6 +135,12 @@ void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) {
|
|||||||
currentTextBlock->setBlockStyle(currentTextBlock->getBlockStyle().getCombinedBlockStyle(blockStyle));
|
currentTextBlock->setBlockStyle(currentTextBlock->getBlockStyle().getCombinedBlockStyle(blockStyle));
|
||||||
|
|
||||||
if (!pendingAnchorId.empty()) {
|
if (!pendingAnchorId.empty()) {
|
||||||
|
if (tocAnchors.count(pendingAnchorId) && currentPage && !currentPage->elements.empty()) {
|
||||||
|
completePageFn(std::move(currentPage));
|
||||||
|
completedPageCount++;
|
||||||
|
currentPage.reset(new Page());
|
||||||
|
currentPageNextY = 0;
|
||||||
|
}
|
||||||
anchorData.push_back({std::move(pendingAnchorId), static_cast<uint16_t>(completedPageCount)});
|
anchorData.push_back({std::move(pendingAnchorId), static_cast<uint16_t>(completedPageCount)});
|
||||||
pendingAnchorId.clear();
|
pendingAnchorId.clear();
|
||||||
}
|
}
|
||||||
@@ -144,7 +150,14 @@ void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) {
|
|||||||
makePages();
|
makePages();
|
||||||
}
|
}
|
||||||
// Record deferred anchor after previous block is flushed
|
// Record deferred anchor after previous block is flushed
|
||||||
|
// Force page break at TOC chapter boundaries so chapters start on a fresh page
|
||||||
if (!pendingAnchorId.empty()) {
|
if (!pendingAnchorId.empty()) {
|
||||||
|
if (tocAnchors.count(pendingAnchorId) && currentPage && !currentPage->elements.empty()) {
|
||||||
|
completePageFn(std::move(currentPage));
|
||||||
|
completedPageCount++;
|
||||||
|
currentPage.reset(new Page());
|
||||||
|
currentPageNextY = 0;
|
||||||
|
}
|
||||||
anchorData.push_back({std::move(pendingAnchorId), static_cast<uint16_t>(completedPageCount)});
|
anchorData.push_back({std::move(pendingAnchorId), static_cast<uint16_t>(completedPageCount)});
|
||||||
pendingAnchorId.clear();
|
pendingAnchorId.clear();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,9 @@
|
|||||||
|
|
||||||
#include <climits>
|
#include <climits>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -75,6 +77,10 @@ class ChapterHtmlSlimParser {
|
|||||||
std::vector<std::pair<std::string, uint16_t>> anchorData;
|
std::vector<std::pair<std::string, uint16_t>> anchorData;
|
||||||
std::string pendingAnchorId; // deferred until after previous text block is flushed
|
std::string pendingAnchorId; // deferred until after previous text block is flushed
|
||||||
|
|
||||||
|
// TOC anchors: when a TOC anchor is encountered, force a page break so chapters start on a fresh page
|
||||||
|
std::set<std::string> tocAnchors;
|
||||||
|
std::map<std::string, uint16_t> tocAnchorPageMap;
|
||||||
|
|
||||||
// Footnote link tracking
|
// Footnote link tracking
|
||||||
bool insideFootnoteLink = false;
|
bool insideFootnoteLink = false;
|
||||||
int footnoteLinkDepth = -1;
|
int footnoteLinkDepth = -1;
|
||||||
@@ -102,6 +108,7 @@ class ChapterHtmlSlimParser {
|
|||||||
const std::function<void(std::unique_ptr<Page>)>& completePageFn,
|
const std::function<void(std::unique_ptr<Page>)>& completePageFn,
|
||||||
const bool embeddedStyle, const std::string& contentBase,
|
const bool embeddedStyle, const std::string& contentBase,
|
||||||
const std::string& imageBasePath, const uint8_t imageRendering = 0,
|
const std::string& imageBasePath, const uint8_t imageRendering = 0,
|
||||||
|
std::set<std::string> tocAnchors = {},
|
||||||
const std::function<void()>& popupFn = nullptr, const CssParser* cssParser = nullptr)
|
const std::function<void()>& popupFn = nullptr, const CssParser* cssParser = nullptr)
|
||||||
|
|
||||||
: epub(epub),
|
: epub(epub),
|
||||||
@@ -120,7 +127,8 @@ class ChapterHtmlSlimParser {
|
|||||||
embeddedStyle(embeddedStyle),
|
embeddedStyle(embeddedStyle),
|
||||||
imageRendering(imageRendering),
|
imageRendering(imageRendering),
|
||||||
contentBase(contentBase),
|
contentBase(contentBase),
|
||||||
imageBasePath(imageBasePath) {}
|
imageBasePath(imageBasePath),
|
||||||
|
tocAnchors(std::move(tocAnchors)) {}
|
||||||
|
|
||||||
~ChapterHtmlSlimParser() = default;
|
~ChapterHtmlSlimParser() = default;
|
||||||
bool parseAndBuildPages();
|
bool parseAndBuildPages();
|
||||||
|
|||||||
Reference in New Issue
Block a user