Consolidate chapter page data into single file (#144)

## Summary

* Consolidate chapter page data into single file
* Header structure of the file stays the same, following the page count,
we now put a LUT offset
   * The page data is all then appended to this file
* Finally the LUT is appended to the end of the file, and the page count
is updated
* This will also significantly improve the duration of cache cleanup
which takes a while to scan the directory and cleanup content
* Remove page file version as it's all tied up into the section file now
* Bumped section file version to 7
* Moved section content into sub directory
* Updated docs

## Additional Context

* Benchmarks:
  * Generating 74 pages of content from a chapter in Jade Legacy took:
    * master: 6,229ms
    * this PR: 1,305ms
    * Speedup of 79%
  * Generating 207 pages of content from Livesuit book:
    * With progress bar UI updates:
      * master: 24,250ms
      * this PR: 8,063ms
      * Speedup of 67%
    * Without progress bar UI updates:
      * master: 13,055ms
      * this PR: 3,600ms
      * Speedup of 72%
This commit is contained in:
Dave Allie
2025-12-29 12:19:54 +10:00
committed by GitHub
parent b1763821b5
commit 534504cf7a
11 changed files with 162 additions and 143 deletions

View File

@@ -8,54 +8,60 @@
#include "parsers/ChapterHtmlSlimParser.h"
namespace {
constexpr uint8_t SECTION_FILE_VERSION = 6;
constexpr uint8_t SECTION_FILE_VERSION = 7;
constexpr size_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(int) +
sizeof(int) + sizeof(int) + sizeof(size_t);
} // namespace
void Section::onPageComplete(std::unique_ptr<Page> page) {
const auto filePath = cachePath + "/page_" + std::to_string(pageCount) + ".bin";
File outputFile;
if (!FsHelpers::openFileForWrite("SCT", filePath, outputFile)) {
return;
size_t Section::onPageComplete(std::unique_ptr<Page> page) {
if (!file) {
Serial.printf("[%lu] [SCT] File not open for writing page %d\n", millis(), pageCount);
return 0;
}
page->serialize(outputFile);
outputFile.close();
const auto position = file.position();
if (!page->serialize(file)) {
Serial.printf("[%lu] [SCT] Failed to serialize page %d\n", millis(), pageCount);
return 0;
}
Serial.printf("[%lu] [SCT] Page %d processed\n", millis(), pageCount);
pageCount++;
return position;
}
void Section::writeCacheMetadata(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) const {
File outputFile;
if (!FsHelpers::openFileForWrite("SCT", cachePath + "/section.bin", outputFile)) {
void Section::writeSectionFileHeader(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) {
if (!file) {
Serial.printf("[%lu] [SCT] File not open for writing header\n", millis());
return;
}
serialization::writePod(outputFile, SECTION_FILE_VERSION);
serialization::writePod(outputFile, fontId);
serialization::writePod(outputFile, lineCompression);
serialization::writePod(outputFile, extraParagraphSpacing);
serialization::writePod(outputFile, viewportWidth);
serialization::writePod(outputFile, viewportHeight);
serialization::writePod(outputFile, pageCount);
outputFile.close();
static_assert(HEADER_SIZE == sizeof(SECTION_FILE_VERSION) + sizeof(fontId) + sizeof(lineCompression) +
sizeof(extraParagraphSpacing) + sizeof(viewportWidth) + sizeof(viewportHeight) +
sizeof(pageCount) + sizeof(size_t),
"Header size mismatch");
serialization::writePod(file, SECTION_FILE_VERSION);
serialization::writePod(file, fontId);
serialization::writePod(file, lineCompression);
serialization::writePod(file, extraParagraphSpacing);
serialization::writePod(file, viewportWidth);
serialization::writePod(file, viewportHeight);
serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0 when written)
serialization::writePod(file, static_cast<size_t>(0)); // Placeholder for LUT offset
}
bool Section::loadCacheMetadata(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) {
const auto sectionFilePath = cachePath + "/section.bin";
File inputFile;
if (!FsHelpers::openFileForRead("SCT", sectionFilePath, inputFile)) {
bool Section::loadSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) {
if (!FsHelpers::openFileForRead("SCT", filePath, file)) {
return false;
}
// Match parameters
{
uint8_t version;
serialization::readPod(inputFile, version);
serialization::readPod(file, version);
if (version != SECTION_FILE_VERSION) {
inputFile.close();
file.close();
Serial.printf("[%lu] [SCT] Deserialization failed: Unknown version %u\n", millis(), version);
clearCache();
return false;
@@ -64,41 +70,36 @@ bool Section::loadCacheMetadata(const int fontId, const float lineCompression, c
int fileFontId, fileViewportWidth, fileViewportHeight;
float fileLineCompression;
bool fileExtraParagraphSpacing;
serialization::readPod(inputFile, fileFontId);
serialization::readPod(inputFile, fileLineCompression);
serialization::readPod(inputFile, fileExtraParagraphSpacing);
serialization::readPod(inputFile, fileViewportWidth);
serialization::readPod(inputFile, fileViewportHeight);
serialization::readPod(file, fileFontId);
serialization::readPod(file, fileLineCompression);
serialization::readPod(file, fileExtraParagraphSpacing);
serialization::readPod(file, fileViewportWidth);
serialization::readPod(file, fileViewportHeight);
if (fontId != fileFontId || lineCompression != fileLineCompression ||
extraParagraphSpacing != fileExtraParagraphSpacing || viewportWidth != fileViewportWidth ||
viewportHeight != fileViewportHeight) {
inputFile.close();
file.close();
Serial.printf("[%lu] [SCT] Deserialization failed: Parameters do not match\n", millis());
clearCache();
return false;
}
}
serialization::readPod(inputFile, pageCount);
inputFile.close();
serialization::readPod(file, pageCount);
file.close();
Serial.printf("[%lu] [SCT] Deserialization succeeded: %d pages\n", millis(), pageCount);
return true;
}
void Section::setupCacheDir() const {
epub->setupCacheDir();
SD.mkdir(cachePath.c_str());
}
// Your updated class method (assuming you are using the 'SD' object, which is a wrapper for a specific filesystem)
bool Section::clearCache() const {
if (!SD.exists(cachePath.c_str())) {
if (!SD.exists(filePath.c_str())) {
Serial.printf("[%lu] [SCT] Cache does not exist, no action needed\n", millis());
return true;
}
if (!FsHelpers::removeDir(cachePath.c_str())) {
if (!SD.remove(filePath.c_str())) {
Serial.printf("[%lu] [SCT] Failed to clear cache\n", millis());
return false;
}
@@ -107,10 +108,10 @@ bool Section::clearCache() const {
return true;
}
bool Section::persistPageDataToSD(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight,
const std::function<void()>& progressSetupFn,
const std::function<void(int)>& progressFn) {
bool Section::createSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight,
const std::function<void()>& progressSetupFn,
const std::function<void(int)>& progressFn) {
constexpr size_t MIN_SIZE_FOR_PROGRESS = 50 * 1024; // 50KB
const auto localPath = epub->getSpineItem(spineIndex).href;
const auto tmpHtmlPath = epub->getCachePath() + "/.tmp_" + std::to_string(spineIndex) + ".html";
@@ -156,30 +157,66 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression,
progressSetupFn();
}
if (!FsHelpers::openFileForWrite("SCT", filePath, file)) {
return false;
}
writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight);
std::vector<size_t> lut = {};
ChapterHtmlSlimParser visitor(
tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight,
[this](std::unique_ptr<Page> page) { this->onPageComplete(std::move(page)); }, progressFn);
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); },
progressFn);
success = visitor.parseAndBuildPages();
SD.remove(tmpHtmlPath.c_str());
if (!success) {
Serial.printf("[%lu] [SCT] Failed to parse XML and build pages\n", millis());
file.close();
SD.remove(filePath.c_str());
return false;
}
writeCacheMetadata(fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight);
const auto lutOffset = file.position();
bool hasFailedLutRecords = false;
// Write LUT
for (const auto& pos : lut) {
if (pos == 0) {
hasFailedLutRecords = true;
break;
}
serialization::writePod(file, pos);
}
if (hasFailedLutRecords) {
Serial.printf("[%lu] [SCT] Failed to write LUT due to invalid page positions\n", millis());
file.close();
SD.remove(filePath.c_str());
return false;
}
// Go back and write LUT offset
file.seek(HEADER_SIZE - sizeof(size_t) - sizeof(pageCount));
serialization::writePod(file, pageCount);
serialization::writePod(file, lutOffset);
file.close();
return true;
}
std::unique_ptr<Page> Section::loadPageFromSD() const {
const auto filePath = cachePath + "/page_" + std::to_string(currentPage) + ".bin";
File inputFile;
if (!FsHelpers::openFileForRead("SCT", filePath, inputFile)) {
std::unique_ptr<Page> Section::loadPageFromSectionFile() {
if (!FsHelpers::openFileForRead("SCT", filePath, file)) {
return nullptr;
}
auto page = Page::deserialize(inputFile);
inputFile.close();
file.seek(HEADER_SIZE - sizeof(size_t));
size_t lutOffset;
serialization::readPod(file, lutOffset);
file.seek(lutOffset + sizeof(size_t) * currentPage);
size_t pagePos;
serialization::readPod(file, pagePos);
file.seek(pagePos);
auto page = Page::deserialize(file);
file.close();
return page;
}