granular position tracking
This commit is contained in:
@@ -40,6 +40,12 @@ class Page {
|
||||
public:
|
||||
// the list of block index and line numbers on this page
|
||||
std::vector<std::shared_ptr<PageElement>> elements;
|
||||
|
||||
// Byte offset in source HTML where this page's content begins
|
||||
// Used for restoring reading position after re-indexing due to font/setting changes
|
||||
// This is stored in the Section file's LUT, not in Page serialization
|
||||
uint32_t firstContentOffset = 0;
|
||||
|
||||
void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) const;
|
||||
bool serialize(FsFile& file) const;
|
||||
static std::unique_ptr<Page> deserialize(FsFile& file);
|
||||
|
||||
@@ -8,10 +8,15 @@
|
||||
#include "parsers/ChapterHtmlSlimParser.h"
|
||||
|
||||
namespace {
|
||||
constexpr uint8_t SECTION_FILE_VERSION = 11;
|
||||
// Version 12: Added content offsets to LUT for position restoration after re-indexing
|
||||
constexpr uint8_t SECTION_FILE_VERSION = 12;
|
||||
constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) +
|
||||
sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) +
|
||||
sizeof(uint32_t);
|
||||
|
||||
// LUT entry structure: { filePosition, contentOffset }
|
||||
// Each entry is 8 bytes (2 x uint32_t)
|
||||
constexpr size_t LUT_ENTRY_SIZE = sizeof(uint32_t) * 2;
|
||||
} // namespace
|
||||
|
||||
uint32_t Section::onPageComplete(std::unique_ptr<Page> page) {
|
||||
@@ -181,12 +186,23 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
|
||||
}
|
||||
writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth,
|
||||
viewportHeight, hyphenationEnabled);
|
||||
std::vector<uint32_t> lut = {};
|
||||
|
||||
// LUT entries: { filePosition, contentOffset } pairs
|
||||
struct LutEntry {
|
||||
uint32_t filePos;
|
||||
uint32_t contentOffset;
|
||||
};
|
||||
std::vector<LutEntry> lut = {};
|
||||
|
||||
ChapterHtmlSlimParser visitor(
|
||||
tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth,
|
||||
viewportHeight, hyphenationEnabled,
|
||||
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, progressFn,
|
||||
[this, &lut](std::unique_ptr<Page> page) {
|
||||
// Capture content offset before processing
|
||||
const uint32_t contentOffset = page->firstContentOffset;
|
||||
const uint32_t filePos = this->onPageComplete(std::move(page));
|
||||
lut.push_back({filePos, contentOffset});
|
||||
}, progressFn,
|
||||
epub->getCssParser());
|
||||
Hyphenator::setPreferredLanguage(epub->getLanguage());
|
||||
success = visitor.parseAndBuildPages();
|
||||
@@ -197,8 +213,10 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
|
||||
// Create a placeholder page for malformed chapters instead of failing entirely
|
||||
// This allows the book to continue loading with chapters that do parse successfully
|
||||
auto placeholderPage = std::unique_ptr<Page>(new Page());
|
||||
placeholderPage->firstContentOffset = 0;
|
||||
// Add placeholder to LUT
|
||||
lut.emplace_back(this->onPageComplete(std::move(placeholderPage)));
|
||||
const uint32_t filePos = this->onPageComplete(std::move(placeholderPage));
|
||||
lut.push_back({filePos, 0});
|
||||
|
||||
// If we still have no pages, the placeholder creation failed
|
||||
if (pageCount == 0) {
|
||||
@@ -211,13 +229,14 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
|
||||
|
||||
const uint32_t lutOffset = file.position();
|
||||
bool hasFailedLutRecords = false;
|
||||
// Write LUT
|
||||
for (const uint32_t& pos : lut) {
|
||||
if (pos == 0) {
|
||||
// Write LUT with both file position and content offset
|
||||
for (const auto& entry : lut) {
|
||||
if (entry.filePos == 0) {
|
||||
hasFailedLutRecords = true;
|
||||
break;
|
||||
}
|
||||
serialization::writePod(file, pos);
|
||||
serialization::writePod(file, entry.filePos);
|
||||
serialization::writePod(file, entry.contentOffset);
|
||||
}
|
||||
|
||||
if (hasFailedLutRecords) {
|
||||
@@ -243,12 +262,106 @@ std::unique_ptr<Page> Section::loadPageFromSectionFile() {
|
||||
file.seek(HEADER_SIZE - sizeof(uint32_t));
|
||||
uint32_t lutOffset;
|
||||
serialization::readPod(file, lutOffset);
|
||||
file.seek(lutOffset + sizeof(uint32_t) * currentPage);
|
||||
|
||||
// LUT entries are now 8 bytes each: { filePos (4), contentOffset (4) }
|
||||
file.seek(lutOffset + LUT_ENTRY_SIZE * currentPage);
|
||||
uint32_t pagePos;
|
||||
serialization::readPod(file, pagePos);
|
||||
// Skip contentOffset for now - we don't need it when just loading the page
|
||||
|
||||
file.seek(pagePos);
|
||||
|
||||
auto page = Page::deserialize(file);
|
||||
file.close();
|
||||
return page;
|
||||
}
|
||||
|
||||
int Section::findPageForContentOffset(uint32_t targetOffset) const {
|
||||
if (pageCount == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
FsFile f;
|
||||
if (!SdMan.openFileForRead("SCT", filePath, f)) {
|
||||
Serial.printf("[%lu] [SCT] findPageForContentOffset: Failed to open file\n", millis());
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Read LUT offset from header
|
||||
f.seek(HEADER_SIZE - sizeof(uint32_t));
|
||||
uint32_t lutOffset;
|
||||
serialization::readPod(f, lutOffset);
|
||||
|
||||
// Binary search through the LUT to find the page containing targetOffset
|
||||
// We want the largest contentOffset that is <= targetOffset
|
||||
int left = 0;
|
||||
int right = pageCount - 1;
|
||||
int result = 0;
|
||||
|
||||
while (left <= right) {
|
||||
const int mid = left + (right - left) / 2;
|
||||
|
||||
// Read content offset for page 'mid'
|
||||
// LUT entry format: { filePos (4), contentOffset (4) }
|
||||
f.seek(lutOffset + LUT_ENTRY_SIZE * mid + sizeof(uint32_t)); // Skip filePos
|
||||
uint32_t midOffset;
|
||||
serialization::readPod(f, midOffset);
|
||||
|
||||
if (midOffset <= targetOffset) {
|
||||
result = mid; // This page could be the answer
|
||||
left = mid + 1; // Look for a later page that might also qualify
|
||||
} else {
|
||||
right = mid - 1; // Look for an earlier page
|
||||
}
|
||||
}
|
||||
|
||||
// When multiple pages share the same content offset (e.g., a large text
|
||||
// block spanning multiple pages), scan backward to find the FIRST page
|
||||
// with that offset, not the last
|
||||
if (result > 0) {
|
||||
f.seek(lutOffset + LUT_ENTRY_SIZE * result + sizeof(uint32_t));
|
||||
uint32_t resultOffset;
|
||||
serialization::readPod(f, resultOffset);
|
||||
|
||||
while (result > 0) {
|
||||
f.seek(lutOffset + LUT_ENTRY_SIZE * (result - 1) + sizeof(uint32_t));
|
||||
uint32_t prevOffset;
|
||||
serialization::readPod(f, prevOffset);
|
||||
if (prevOffset == resultOffset) {
|
||||
result--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f.close();
|
||||
Serial.printf("[%lu] [SCT] findPageForContentOffset: offset %u -> page %d\n", millis(), targetOffset, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t Section::getContentOffsetForPage(int pageIndex) const {
|
||||
if (pageCount == 0 || pageIndex < 0 || pageIndex >= pageCount) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
FsFile f;
|
||||
if (!SdMan.openFileForRead("SCT", filePath, f)) {
|
||||
Serial.printf("[%lu] [SCT] getContentOffsetForPage: Failed to open file\n", millis());
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Read LUT offset from header
|
||||
f.seek(HEADER_SIZE - sizeof(uint32_t));
|
||||
uint32_t lutOffset;
|
||||
serialization::readPod(f, lutOffset);
|
||||
|
||||
// Read content offset for the specified page
|
||||
// LUT entry format: { filePos (4), contentOffset (4) }
|
||||
f.seek(lutOffset + LUT_ENTRY_SIZE * pageIndex + sizeof(uint32_t)); // Skip filePos
|
||||
uint32_t contentOffset;
|
||||
serialization::readPod(f, contentOffset);
|
||||
|
||||
f.close();
|
||||
return contentOffset;
|
||||
}
|
||||
|
||||
@@ -36,4 +36,9 @@ class Section {
|
||||
const std::function<void()>& progressSetupFn = nullptr,
|
||||
const std::function<void(int)>& progressFn = nullptr);
|
||||
std::unique_ptr<Page> loadPageFromSectionFile();
|
||||
|
||||
// Methods for content offset-based position tracking
|
||||
// Used to restore reading position after re-indexing due to font/setting changes
|
||||
int findPageForContentOffset(uint32_t targetOffset) const;
|
||||
uint32_t getContentOffsetForPage(int pageIndex) const;
|
||||
};
|
||||
|
||||
@@ -332,6 +332,11 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
||||
if (self->skipUntilDepth < self->depth) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Capture byte offset of this character data for page position tracking
|
||||
if (self->xmlParser) {
|
||||
self->lastCharDataOffset = XML_GetCurrentByteIndex(self->xmlParser);
|
||||
}
|
||||
|
||||
// Determine font style from depth-based tracking and CSS effective style
|
||||
const bool isBold = self->boldUntilDepth < self->depth || self->effectiveBold;
|
||||
@@ -477,17 +482,18 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
|
||||
bool ChapterHtmlSlimParser::parseAndBuildPages() {
|
||||
startNewTextBlock((TextBlock::Style)this->paragraphAlignment);
|
||||
|
||||
const XML_Parser parser = XML_ParserCreate(nullptr);
|
||||
xmlParser = XML_ParserCreate(nullptr);
|
||||
int done;
|
||||
|
||||
if (!parser) {
|
||||
if (!xmlParser) {
|
||||
Serial.printf("[%lu] [EHP] Couldn't allocate memory for parser\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
FsFile file;
|
||||
if (!SdMan.openFileForRead("EHP", filepath, file)) {
|
||||
XML_ParserFree(parser);
|
||||
XML_ParserFree(xmlParser);
|
||||
xmlParser = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -495,19 +501,24 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() {
|
||||
const size_t totalSize = file.size();
|
||||
size_t bytesRead = 0;
|
||||
int lastProgress = -1;
|
||||
|
||||
// Initialize offset tracking - first page starts at offset 0
|
||||
currentPageStartOffset = 0;
|
||||
lastCharDataOffset = 0;
|
||||
|
||||
XML_SetUserData(parser, this);
|
||||
XML_SetElementHandler(parser, startElement, endElement);
|
||||
XML_SetCharacterDataHandler(parser, characterData);
|
||||
XML_SetUserData(xmlParser, this);
|
||||
XML_SetElementHandler(xmlParser, startElement, endElement);
|
||||
XML_SetCharacterDataHandler(xmlParser, characterData);
|
||||
|
||||
do {
|
||||
void* const buf = XML_GetBuffer(parser, 1024);
|
||||
void* const buf = XML_GetBuffer(xmlParser, 1024);
|
||||
if (!buf) {
|
||||
Serial.printf("[%lu] [EHP] Couldn't allocate memory for buffer\n", millis());
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
XML_StopParser(xmlParser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(xmlParser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(xmlParser, nullptr);
|
||||
XML_ParserFree(xmlParser);
|
||||
xmlParser = nullptr;
|
||||
file.close();
|
||||
return false;
|
||||
}
|
||||
@@ -516,10 +527,11 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() {
|
||||
|
||||
if (len == 0 && file.available() > 0) {
|
||||
Serial.printf("[%lu] [EHP] File read error\n", millis());
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
XML_StopParser(xmlParser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(xmlParser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(xmlParser, nullptr);
|
||||
XML_ParserFree(xmlParser);
|
||||
xmlParser = nullptr;
|
||||
file.close();
|
||||
return false;
|
||||
}
|
||||
@@ -537,27 +549,33 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() {
|
||||
|
||||
done = file.available() == 0;
|
||||
|
||||
if (XML_ParseBuffer(parser, static_cast<int>(len), done) == XML_STATUS_ERROR) {
|
||||
Serial.printf("[%lu] [EHP] Parse error at line %lu:\n%s\n", millis(), XML_GetCurrentLineNumber(parser),
|
||||
XML_ErrorString(XML_GetErrorCode(parser)));
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
if (XML_ParseBuffer(xmlParser, static_cast<int>(len), done) == XML_STATUS_ERROR) {
|
||||
Serial.printf("[%lu] [EHP] Parse error at line %lu:\n%s\n", millis(), XML_GetCurrentLineNumber(xmlParser),
|
||||
XML_ErrorString(XML_GetErrorCode(xmlParser)));
|
||||
XML_StopParser(xmlParser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(xmlParser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(xmlParser, nullptr);
|
||||
XML_ParserFree(xmlParser);
|
||||
xmlParser = nullptr;
|
||||
file.close();
|
||||
return false;
|
||||
}
|
||||
} while (!done);
|
||||
|
||||
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
XML_StopParser(xmlParser, XML_FALSE); // Stop any pending processing
|
||||
XML_SetElementHandler(xmlParser, nullptr, nullptr); // Clear callbacks
|
||||
XML_SetCharacterDataHandler(xmlParser, nullptr);
|
||||
XML_ParserFree(xmlParser);
|
||||
xmlParser = nullptr;
|
||||
file.close();
|
||||
|
||||
// Process last page if there is still text
|
||||
if (currentTextBlock) {
|
||||
makePages();
|
||||
// Set the content offset for the final page
|
||||
if (currentPage) {
|
||||
currentPage->firstContentOffset = static_cast<uint32_t>(currentPageStartOffset);
|
||||
}
|
||||
completePageFn(std::move(currentPage));
|
||||
currentPage.reset();
|
||||
currentTextBlock.reset();
|
||||
@@ -570,8 +588,15 @@ void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr<TextBlock> line) {
|
||||
const int lineHeight = renderer.getLineHeight(fontId) * lineCompression;
|
||||
|
||||
if (currentPageNextY + lineHeight > viewportHeight) {
|
||||
// Set the content offset for the page being completed
|
||||
if (currentPage) {
|
||||
currentPage->firstContentOffset = static_cast<uint32_t>(currentPageStartOffset);
|
||||
}
|
||||
completePageFn(std::move(currentPage));
|
||||
|
||||
// Start new page - offset will be set when first content is added
|
||||
currentPage.reset(new Page());
|
||||
currentPageStartOffset = lastCharDataOffset; // Use offset from when content was parsed
|
||||
currentPageNextY = 0;
|
||||
}
|
||||
|
||||
@@ -587,6 +612,8 @@ void ChapterHtmlSlimParser::makePages() {
|
||||
|
||||
if (!currentPage) {
|
||||
currentPage.reset(new Page());
|
||||
// Use offset captured during character data parsing
|
||||
currentPageStartOffset = lastCharDataOffset;
|
||||
currentPageNextY = 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -54,6 +54,11 @@ class ChapterHtmlSlimParser {
|
||||
bool effectiveBold = false;
|
||||
bool effectiveItalic = false;
|
||||
bool effectiveUnderline = false;
|
||||
|
||||
// Byte offset tracking for position restoration after re-indexing
|
||||
XML_Parser xmlParser = nullptr; // Store parser for getting current byte index
|
||||
size_t currentPageStartOffset = 0; // Byte offset when current page was started
|
||||
size_t lastCharDataOffset = 0; // Byte offset of last character data (captured during parsing)
|
||||
|
||||
void updateEffectiveInlineStyle();
|
||||
void startNewTextBlock(TextBlock::Style style);
|
||||
|
||||
Reference in New Issue
Block a user