Dave Allie 534504cf7a
Consolidate chapter page data into single file (#144)
## Summary

* Consolidate chapter page data into single file
* Header structure of the file stays the same, following the page count,
we now put a LUT offset
   * The page data is all then appended to this file
* Finally the LUT is appended to the end of the file, and the page count
is updated
* This will also significantly improve the duration of cache cleanup
which takes a while to scan the directory and cleanup content
* Remove page file version as it's all tied up into the section file now
* Bumped section file version to 7
* Moved section content into sub directory
* Updated docs

## Additional Context

* Benchmarks:
  * Generating 74 pages of content from a chapter in Jade Legacy took:
    * master: 6,229ms
    * this PR: 1,305ms
    * Speedup of 79%
  * Generating 207 pages of content from Livesuit book:
    * With progress bar UI updates:
      * master: 24,250ms
      * this PR: 8,063ms
      * Speedup of 67%
    * Without progress bar UI updates:
      * master: 13,055ms
      * this PR: 3,600ms
      * Speedup of 72%
2025-12-29 13:19:54 +11:00

223 lines
7.7 KiB
C++

#include "Section.h"
#include <FsHelpers.h>
#include <SD.h>
#include <Serialization.h>
#include "Page.h"
#include "parsers/ChapterHtmlSlimParser.h"
namespace {
constexpr uint8_t SECTION_FILE_VERSION = 7;
constexpr size_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(int) +
sizeof(int) + sizeof(int) + sizeof(size_t);
} // namespace
size_t Section::onPageComplete(std::unique_ptr<Page> page) {
if (!file) {
Serial.printf("[%lu] [SCT] File not open for writing page %d\n", millis(), pageCount);
return 0;
}
const auto position = file.position();
if (!page->serialize(file)) {
Serial.printf("[%lu] [SCT] Failed to serialize page %d\n", millis(), pageCount);
return 0;
}
Serial.printf("[%lu] [SCT] Page %d processed\n", millis(), pageCount);
pageCount++;
return position;
}
void Section::writeSectionFileHeader(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) {
if (!file) {
Serial.printf("[%lu] [SCT] File not open for writing header\n", millis());
return;
}
static_assert(HEADER_SIZE == sizeof(SECTION_FILE_VERSION) + sizeof(fontId) + sizeof(lineCompression) +
sizeof(extraParagraphSpacing) + sizeof(viewportWidth) + sizeof(viewportHeight) +
sizeof(pageCount) + sizeof(size_t),
"Header size mismatch");
serialization::writePod(file, SECTION_FILE_VERSION);
serialization::writePod(file, fontId);
serialization::writePod(file, lineCompression);
serialization::writePod(file, extraParagraphSpacing);
serialization::writePod(file, viewportWidth);
serialization::writePod(file, viewportHeight);
serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0 when written)
serialization::writePod(file, static_cast<size_t>(0)); // Placeholder for LUT offset
}
bool Section::loadSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) {
if (!FsHelpers::openFileForRead("SCT", filePath, file)) {
return false;
}
// Match parameters
{
uint8_t version;
serialization::readPod(file, version);
if (version != SECTION_FILE_VERSION) {
file.close();
Serial.printf("[%lu] [SCT] Deserialization failed: Unknown version %u\n", millis(), version);
clearCache();
return false;
}
int fileFontId, fileViewportWidth, fileViewportHeight;
float fileLineCompression;
bool fileExtraParagraphSpacing;
serialization::readPod(file, fileFontId);
serialization::readPod(file, fileLineCompression);
serialization::readPod(file, fileExtraParagraphSpacing);
serialization::readPod(file, fileViewportWidth);
serialization::readPod(file, fileViewportHeight);
if (fontId != fileFontId || lineCompression != fileLineCompression ||
extraParagraphSpacing != fileExtraParagraphSpacing || viewportWidth != fileViewportWidth ||
viewportHeight != fileViewportHeight) {
file.close();
Serial.printf("[%lu] [SCT] Deserialization failed: Parameters do not match\n", millis());
clearCache();
return false;
}
}
serialization::readPod(file, pageCount);
file.close();
Serial.printf("[%lu] [SCT] Deserialization succeeded: %d pages\n", millis(), pageCount);
return true;
}
// Your updated class method (assuming you are using the 'SD' object, which is a wrapper for a specific filesystem)
bool Section::clearCache() const {
if (!SD.exists(filePath.c_str())) {
Serial.printf("[%lu] [SCT] Cache does not exist, no action needed\n", millis());
return true;
}
if (!SD.remove(filePath.c_str())) {
Serial.printf("[%lu] [SCT] Failed to clear cache\n", millis());
return false;
}
Serial.printf("[%lu] [SCT] Cache cleared successfully\n", millis());
return true;
}
bool Section::createSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight,
const std::function<void()>& progressSetupFn,
const std::function<void(int)>& progressFn) {
constexpr size_t MIN_SIZE_FOR_PROGRESS = 50 * 1024; // 50KB
const auto localPath = epub->getSpineItem(spineIndex).href;
const auto tmpHtmlPath = epub->getCachePath() + "/.tmp_" + std::to_string(spineIndex) + ".html";
// Retry logic for SD card timing issues
bool success = false;
size_t fileSize = 0;
for (int attempt = 0; attempt < 3 && !success; attempt++) {
if (attempt > 0) {
Serial.printf("[%lu] [SCT] Retrying stream (attempt %d)...\n", millis(), attempt + 1);
delay(50); // Brief delay before retry
}
// Remove any incomplete file from previous attempt before retrying
if (SD.exists(tmpHtmlPath.c_str())) {
SD.remove(tmpHtmlPath.c_str());
}
File tmpHtml;
if (!FsHelpers::openFileForWrite("SCT", tmpHtmlPath, tmpHtml)) {
continue;
}
success = epub->readItemContentsToStream(localPath, tmpHtml, 1024);
fileSize = tmpHtml.size();
tmpHtml.close();
// If streaming failed, remove the incomplete file immediately
if (!success && SD.exists(tmpHtmlPath.c_str())) {
SD.remove(tmpHtmlPath.c_str());
Serial.printf("[%lu] [SCT] Removed incomplete temp file after failed attempt\n", millis());
}
}
if (!success) {
Serial.printf("[%lu] [SCT] Failed to stream item contents to temp file after retries\n", millis());
return false;
}
Serial.printf("[%lu] [SCT] Streamed temp HTML to %s (%d bytes)\n", millis(), tmpHtmlPath.c_str(), fileSize);
// Only show progress bar for larger chapters where rendering overhead is worth it
if (progressSetupFn && fileSize >= MIN_SIZE_FOR_PROGRESS) {
progressSetupFn();
}
if (!FsHelpers::openFileForWrite("SCT", filePath, file)) {
return false;
}
writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight);
std::vector<size_t> lut = {};
ChapterHtmlSlimParser visitor(
tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight,
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); },
progressFn);
success = visitor.parseAndBuildPages();
SD.remove(tmpHtmlPath.c_str());
if (!success) {
Serial.printf("[%lu] [SCT] Failed to parse XML and build pages\n", millis());
file.close();
SD.remove(filePath.c_str());
return false;
}
const auto lutOffset = file.position();
bool hasFailedLutRecords = false;
// Write LUT
for (const auto& pos : lut) {
if (pos == 0) {
hasFailedLutRecords = true;
break;
}
serialization::writePod(file, pos);
}
if (hasFailedLutRecords) {
Serial.printf("[%lu] [SCT] Failed to write LUT due to invalid page positions\n", millis());
file.close();
SD.remove(filePath.c_str());
return false;
}
// Go back and write LUT offset
file.seek(HEADER_SIZE - sizeof(size_t) - sizeof(pageCount));
serialization::writePod(file, pageCount);
serialization::writePod(file, lutOffset);
file.close();
return true;
}
std::unique_ptr<Page> Section::loadPageFromSectionFile() {
if (!FsHelpers::openFileForRead("SCT", filePath, file)) {
return nullptr;
}
file.seek(HEADER_SIZE - sizeof(size_t));
size_t lutOffset;
serialization::readPod(file, lutOffset);
file.seek(lutOffset + sizeof(size_t) * currentPage);
size_t pagePos;
serialization::readPod(file, pagePos);
file.seek(pagePos);
auto page = Page::deserialize(file);
file.close();
return page;
}