perf: Improve large CSS files handling (#779)
## Summary Closes #766. Thank you for the help @bramschulting! **What is the goal of this PR?** - First and foremost, fix issue #766. - Through working on that, I realized the current CSS parsing/loading code can be improved dramatically for large files and still had additional performance improvements to be made, even with EPUBs with small CSS. **What changes are included?** - Stream CSS parsing and reuse normalization buffers to cut allocations - Add rule limits and selector validation to release rules and free up memory when needed - Skip CSS parsing/loading entirely when "Book's Embedded Style" is off ## Additional Context - My test EPUB has been updated [here](https://github.com/jdk2pq/css-test-epub) to include a very large CSS file to test this out --- ### AI Usage While CrossPoint doesn't have restrictions on AI tools in contributing, please be transparent about their usage as it helps set the right context for reviewers. Did you use AI tools to help write this code? _**YES**_, Codex
This commit is contained in:
@@ -208,30 +208,14 @@ bool Epub::parseTocNavFile() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string Epub::getCssRulesCache() const { return cachePath + "/css_rules.cache"; }
|
||||
|
||||
bool Epub::loadCssRulesFromCache() const {
|
||||
FsFile cssCacheFile;
|
||||
if (Storage.openFileForRead("EBP", getCssRulesCache(), cssCacheFile)) {
|
||||
if (cssParser->loadFromCache(cssCacheFile)) {
|
||||
cssCacheFile.close();
|
||||
LOG_DBG("EBP", "Loaded CSS rules from cache");
|
||||
return true;
|
||||
}
|
||||
cssCacheFile.close();
|
||||
LOG_DBG("EBP", "CSS cache invalid, reparsing");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Epub::parseCssFiles() const {
|
||||
if (cssFiles.empty()) {
|
||||
LOG_DBG("EBP", "No CSS files to parse, but CssParser created for inline styles");
|
||||
}
|
||||
|
||||
// Try to load from CSS cache first
|
||||
if (!loadCssRulesFromCache()) {
|
||||
// Cache miss - parse CSS files
|
||||
// See if we have a cached version of the CSS rules
|
||||
if (!cssParser->hasCache()) {
|
||||
// No cache yet - parse CSS files
|
||||
for (const auto& cssPath : cssFiles) {
|
||||
LOG_DBG("EBP", "Parsing CSS file: %s", cssPath.c_str());
|
||||
|
||||
@@ -262,11 +246,10 @@ void Epub::parseCssFiles() const {
|
||||
}
|
||||
|
||||
// Save to cache for next time
|
||||
FsFile cssCacheFile;
|
||||
if (Storage.openFileForWrite("EBP", getCssRulesCache(), cssCacheFile)) {
|
||||
cssParser->saveToCache(cssCacheFile);
|
||||
cssCacheFile.close();
|
||||
if (!cssParser->saveToCache()) {
|
||||
LOG_ERR("EBP", "Failed to save CSS rules to cache");
|
||||
}
|
||||
cssParser->clear();
|
||||
|
||||
LOG_DBG("EBP", "Loaded %zu CSS style rules from %zu files", cssParser->ruleCount(), cssFiles.size());
|
||||
}
|
||||
@@ -279,11 +262,11 @@ bool Epub::load(const bool buildIfMissing, const bool skipLoadingCss) {
|
||||
// Initialize spine/TOC cache
|
||||
bookMetadataCache.reset(new BookMetadataCache(cachePath));
|
||||
// Always create CssParser - needed for inline style parsing even without CSS files
|
||||
cssParser.reset(new CssParser());
|
||||
cssParser.reset(new CssParser(cachePath));
|
||||
|
||||
// Try to load existing cache first
|
||||
if (bookMetadataCache->load()) {
|
||||
if (!skipLoadingCss && !loadCssRulesFromCache()) {
|
||||
if (!skipLoadingCss && !cssParser->hasCache()) {
|
||||
LOG_DBG("EBP", "Warning: CSS rules cache not found, attempting to parse CSS files");
|
||||
// to get CSS file list
|
||||
if (!parseContentOpf(bookMetadataCache->coreMetadata)) {
|
||||
|
||||
Reference in New Issue
Block a user