crosspoint-reader/lib/Epub/Epub/BookMetadataCache.h
Daniel 06ced8f2d1 perf: optimize large EPUB indexing from O(n²) to O(n log n)
Three optimizations for EPUBs with many chapters (e.g. 2768 chapters):

1. OPF idref→href lookup: Build sorted hash index during manifest parsing,
   use binary search during spine resolution. Reduces ~4min to ~30-60s.

2. TOC href→spineIndex lookup: Build sorted hash index in beginTocPass(),
   use binary search in createTocEntry(). Reduces ~4min to ~30-60s.

3. ZIP central-dir cursor: Resume scanning from last position instead of
   restarting from beginning. Reduces ~8min to ~1-3min.

All optimizations only activate for large EPUBs (≥400 spine items).
Small books use unchanged code paths.

Memory impact: ~33KB + ~39KB temporary during indexing, freed after.
Expected total: ~17min → ~3-5min for Shadow Slave (2768 chapters).

Also adds phase timing logs for performance measurement.
2026-01-20 23:35:54 -08:00

113 lines
3.2 KiB
C++

#pragma once
#include <SDCardManager.h>
#include <algorithm>
#include <string>
#include <vector>
class BookMetadataCache {
public:
struct BookMetadata {
std::string title;
std::string author;
std::string language;
std::string coverItemHref;
std::string textReferenceHref;
};
struct SpineEntry {
std::string href;
size_t cumulativeSize;
int16_t tocIndex;
SpineEntry() : cumulativeSize(0), tocIndex(-1) {}
SpineEntry(std::string href, const size_t cumulativeSize, const int16_t tocIndex)
: href(std::move(href)), cumulativeSize(cumulativeSize), tocIndex(tocIndex) {}
};
struct TocEntry {
std::string title;
std::string href;
std::string anchor;
uint8_t level;
int16_t spineIndex;
TocEntry() : level(0), spineIndex(-1) {}
TocEntry(std::string title, std::string href, std::string anchor, const uint8_t level, const int16_t spineIndex)
: title(std::move(title)),
href(std::move(href)),
anchor(std::move(anchor)),
level(level),
spineIndex(spineIndex) {}
};
private:
std::string cachePath;
size_t lutOffset;
uint16_t spineCount;
uint16_t tocCount;
bool loaded;
bool buildMode;
FsFile bookFile;
// Temp file handles during build
FsFile spineFile;
FsFile tocFile;
// Index for fast href→spineIndex lookup (used only for large EPUBs)
struct SpineHrefIndexEntry {
uint64_t hrefHash; // FNV-1a 64-bit hash
uint16_t hrefLen; // length for collision reduction
int16_t spineIndex;
};
std::vector<SpineHrefIndexEntry> spineHrefIndex;
bool useSpineHrefIndex = false;
static constexpr uint16_t LARGE_SPINE_THRESHOLD = 400;
// FNV-1a 64-bit hash function
static uint64_t fnvHash64(const std::string& s) {
uint64_t hash = 14695981039346656037ull;
for (char c : s) {
hash ^= static_cast<uint8_t>(c);
hash *= 1099511628211ull;
}
return hash;
}
uint32_t writeSpineEntry(FsFile& file, const SpineEntry& entry) const;
uint32_t writeTocEntry(FsFile& file, const TocEntry& entry) const;
SpineEntry readSpineEntry(FsFile& file) const;
TocEntry readTocEntry(FsFile& file) const;
public:
BookMetadata coreMetadata;
explicit BookMetadataCache(std::string cachePath)
: cachePath(std::move(cachePath)), lutOffset(0), spineCount(0), tocCount(0), loaded(false), buildMode(false) {}
~BookMetadataCache() = default;
// Building phase (stream to disk immediately)
bool beginWrite();
bool beginContentOpfPass();
void createSpineEntry(const std::string& href);
bool endContentOpfPass();
bool beginTocPass();
void createTocEntry(const std::string& title, const std::string& href, const std::string& anchor, uint8_t level);
bool endTocPass();
bool endWrite();
bool cleanupTmpFiles() const;
// Post-processing to update mappings and sizes
bool buildBookBin(const std::string& epubPath, const BookMetadata& metadata);
// Reading phase (read mode)
bool load();
SpineEntry getSpineEntry(int index);
TocEntry getTocEntry(int index);
int getSpineCount() const { return spineCount; }
int getTocCount() const { return tocCount; }
bool isLoaded() const { return loaded; }
};