## Summary Closes #766. Thank you for the help @bramschulting! **What is the goal of this PR?** - First and foremost, fix issue #766. - Through working on that, I realized the current CSS parsing/loading code can be improved dramatically for large files and still had additional performance improvements to be made, even with EPUBs with small CSS. **What changes are included?** - Stream CSS parsing and reuse normalization buffers to cut allocations - Add rule limits and selector validation to release rules and free up memory when needed - Skip CSS parsing/loading entirely when "Book's Embedded Style" is off ## Additional Context - My test EPUB has been updated [here](https://github.com/jdk2pq/css-test-epub) to include a very large CSS file to test this out --- ### AI Usage While CrossPoint doesn't have restrictions on AI tools in contributing, please be transparent about their usage as it helps set the right context for reviewers. Did you use AI tools to help write this code? _**YES**_, Codex
76 lines
2.7 KiB
C++
76 lines
2.7 KiB
C++
#pragma once
|
|
|
|
#include <Print.h>
|
|
|
|
#include <memory>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#include "Epub/BookMetadataCache.h"
|
|
#include "Epub/css/CssParser.h"
|
|
|
|
class ZipFile;
|
|
|
|
class Epub {
|
|
// the ncx file (EPUB 2)
|
|
std::string tocNcxItem;
|
|
// the nav file (EPUB 3)
|
|
std::string tocNavItem;
|
|
// where is the EPUBfile?
|
|
std::string filepath;
|
|
// the base path for items in the EPUB file
|
|
std::string contentBasePath;
|
|
// Uniq cache key based on filepath
|
|
std::string cachePath;
|
|
// Spine and TOC cache
|
|
std::unique_ptr<BookMetadataCache> bookMetadataCache;
|
|
// CSS parser for styling
|
|
std::unique_ptr<CssParser> cssParser;
|
|
// CSS files
|
|
std::vector<std::string> cssFiles;
|
|
|
|
bool findContentOpfFile(std::string* contentOpfFile) const;
|
|
bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata);
|
|
bool parseTocNcxFile() const;
|
|
bool parseTocNavFile() const;
|
|
void parseCssFiles() const;
|
|
|
|
public:
|
|
explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) {
|
|
// create a cache key based on the filepath
|
|
cachePath = cacheDir + "/epub_" + std::to_string(std::hash<std::string>{}(this->filepath));
|
|
}
|
|
~Epub() = default;
|
|
std::string& getBasePath() { return contentBasePath; }
|
|
bool load(bool buildIfMissing = true, bool skipLoadingCss = false);
|
|
bool clearCache() const;
|
|
void setupCacheDir() const;
|
|
const std::string& getCachePath() const;
|
|
const std::string& getPath() const;
|
|
const std::string& getTitle() const;
|
|
const std::string& getAuthor() const;
|
|
const std::string& getLanguage() const;
|
|
std::string getCoverBmpPath(bool cropped = false) const;
|
|
bool generateCoverBmp(bool cropped = false) const;
|
|
std::string getThumbBmpPath() const;
|
|
std::string getThumbBmpPath(int height) const;
|
|
bool generateThumbBmp(int height) const;
|
|
uint8_t* readItemContentsToBytes(const std::string& itemHref, size_t* size = nullptr,
|
|
bool trailingNullByte = false) const;
|
|
bool readItemContentsToStream(const std::string& itemHref, Print& out, size_t chunkSize) const;
|
|
bool getItemSize(const std::string& itemHref, size_t* size) const;
|
|
BookMetadataCache::SpineEntry getSpineItem(int spineIndex) const;
|
|
BookMetadataCache::TocEntry getTocItem(int tocIndex) const;
|
|
int getSpineItemsCount() const;
|
|
int getTocItemsCount() const;
|
|
int getSpineIndexForTocIndex(int tocIndex) const;
|
|
int getTocIndexForSpineIndex(int spineIndex) const;
|
|
size_t getCumulativeSpineItemSize(int spineIndex) const;
|
|
int getSpineIndexForTextReference() const;
|
|
|
|
size_t getBookSize() const;
|
|
float calculateProgress(int currentSpineIndex, float currentSpineRead) const;
|
|
CssParser* getCssParser() const { return cssParser.get(); }
|
|
};
|