feat: Add CSS parsing and CSS support in EPUBs
This commit is contained in:
parent
21277e03eb
commit
94ce987f2c
@ -85,6 +85,9 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) {
|
||||
tocNavItem = opfParser.tocNavPath;
|
||||
}
|
||||
|
||||
// Copy CSS files to metadata
|
||||
bookMetadata.cssFiles = opfParser.cssFiles;
|
||||
|
||||
Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis());
|
||||
return true;
|
||||
}
|
||||
@ -203,6 +206,55 @@ bool Epub::parseTocNavFile() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Epub::parseCssFiles() {
|
||||
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
|
||||
Serial.printf("[%lu] [EBP] Cannot parse CSS, cache not loaded\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Always create CssParser - needed for inline style parsing even without CSS files
|
||||
cssParser.reset(new CssParser());
|
||||
|
||||
const auto& cssFiles = bookMetadataCache->coreMetadata.cssFiles;
|
||||
if (cssFiles.empty()) {
|
||||
Serial.printf("[%lu] [EBP] No CSS files to parse, but CssParser created for inline styles\n", millis());
|
||||
return true;
|
||||
}
|
||||
|
||||
for (const auto& cssPath : cssFiles) {
|
||||
Serial.printf("[%lu] [EBP] Parsing CSS file: %s\n", millis(), cssPath.c_str());
|
||||
|
||||
// Extract CSS file to temp location
|
||||
const auto tmpCssPath = getCachePath() + "/.tmp.css";
|
||||
FsFile tempCssFile;
|
||||
if (!SdMan.openFileForWrite("EBP", tmpCssPath, tempCssFile)) {
|
||||
Serial.printf("[%lu] [EBP] Could not create temp CSS file\n", millis());
|
||||
continue;
|
||||
}
|
||||
if (!readItemContentsToStream(cssPath, tempCssFile, 1024)) {
|
||||
Serial.printf("[%lu] [EBP] Could not read CSS file: %s\n", millis(), cssPath.c_str());
|
||||
tempCssFile.close();
|
||||
SdMan.remove(tmpCssPath.c_str());
|
||||
continue;
|
||||
}
|
||||
tempCssFile.close();
|
||||
|
||||
// Parse the CSS file
|
||||
if (!SdMan.openFileForRead("EBP", tmpCssPath, tempCssFile)) {
|
||||
Serial.printf("[%lu] [EBP] Could not open temp CSS file for reading\n", millis());
|
||||
SdMan.remove(tmpCssPath.c_str());
|
||||
continue;
|
||||
}
|
||||
cssParser->loadFromStream(tempCssFile);
|
||||
tempCssFile.close();
|
||||
SdMan.remove(tmpCssPath.c_str());
|
||||
}
|
||||
|
||||
Serial.printf("[%lu] [EBP] Loaded %zu CSS style rules from %zu files\n", millis(), cssParser->ruleCount(),
|
||||
cssFiles.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
// load in the meta data for the epub file
|
||||
bool Epub::load(const bool buildIfMissing) {
|
||||
Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str());
|
||||
@ -212,6 +264,8 @@ bool Epub::load(const bool buildIfMissing) {
|
||||
|
||||
// Try to load existing cache first
|
||||
if (bookMetadataCache->load()) {
|
||||
// Parse CSS files from loaded cache
|
||||
parseCssFiles();
|
||||
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
|
||||
return true;
|
||||
}
|
||||
@ -299,6 +353,9 @@ bool Epub::load(const bool buildIfMissing) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Parse CSS files after cache reload
|
||||
parseCssFiles();
|
||||
|
||||
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "Epub/BookMetadataCache.h"
|
||||
#include "Epub/css/CssParser.h"
|
||||
|
||||
class ZipFile;
|
||||
|
||||
@ -24,11 +25,14 @@ class Epub {
|
||||
std::string cachePath;
|
||||
// Spine and TOC cache
|
||||
std::unique_ptr<BookMetadataCache> bookMetadataCache;
|
||||
// CSS parser for styling
|
||||
std::unique_ptr<CssParser> cssParser;
|
||||
|
||||
bool findContentOpfFile(std::string* contentOpfFile) const;
|
||||
bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata);
|
||||
bool parseTocNcxFile() const;
|
||||
bool parseTocNavFile() const;
|
||||
bool parseCssFiles();
|
||||
|
||||
public:
|
||||
explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) {
|
||||
@ -63,4 +67,5 @@ class Epub {
|
||||
|
||||
size_t getBookSize() const;
|
||||
uint8_t calculateProgress(int currentSpineIndex, float currentSpineRead) const;
|
||||
const CssParser* getCssParser() const { return cssParser.get(); }
|
||||
};
|
||||
|
||||
@ -9,7 +9,7 @@
|
||||
#include "FsHelpers.h"
|
||||
|
||||
namespace {
|
||||
constexpr uint8_t BOOK_CACHE_VERSION = 4;
|
||||
constexpr uint8_t BOOK_CACHE_VERSION = 5;
|
||||
constexpr char bookBinFile[] = "/book.bin";
|
||||
constexpr char tmpSpineBinFile[] = "/spine.bin.tmp";
|
||||
constexpr char tmpTocBinFile[] = "/toc.bin.tmp";
|
||||
@ -87,8 +87,13 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
|
||||
|
||||
constexpr uint32_t headerASize =
|
||||
sizeof(BOOK_CACHE_VERSION) + /* LUT Offset */ sizeof(uint32_t) + sizeof(spineCount) + sizeof(tocCount);
|
||||
// Calculate CSS files size: count + each string (length + data)
|
||||
uint32_t cssFilesSize = sizeof(uint16_t); // count
|
||||
for (const auto& css : metadata.cssFiles) {
|
||||
cssFilesSize += sizeof(uint32_t) + css.size();
|
||||
}
|
||||
const uint32_t metadataSize = metadata.title.size() + metadata.author.size() + metadata.coverItemHref.size() +
|
||||
metadata.textReferenceHref.size() + sizeof(uint32_t) * 4;
|
||||
metadata.textReferenceHref.size() + sizeof(uint32_t) * 4 + cssFilesSize;
|
||||
const uint32_t lutSize = sizeof(uint32_t) * spineCount + sizeof(uint32_t) * tocCount;
|
||||
const uint32_t lutOffset = headerASize + metadataSize;
|
||||
|
||||
@ -102,6 +107,11 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
|
||||
serialization::writeString(bookFile, metadata.author);
|
||||
serialization::writeString(bookFile, metadata.coverItemHref);
|
||||
serialization::writeString(bookFile, metadata.textReferenceHref);
|
||||
// CSS files
|
||||
serialization::writePod(bookFile, static_cast<uint16_t>(metadata.cssFiles.size()));
|
||||
for (const auto& css : metadata.cssFiles) {
|
||||
serialization::writeString(bookFile, css);
|
||||
}
|
||||
|
||||
// Loop through spine entries, writing LUT positions
|
||||
spineFile.seek(0);
|
||||
@ -291,6 +301,16 @@ bool BookMetadataCache::load() {
|
||||
serialization::readString(bookFile, coreMetadata.author);
|
||||
serialization::readString(bookFile, coreMetadata.coverItemHref);
|
||||
serialization::readString(bookFile, coreMetadata.textReferenceHref);
|
||||
// CSS files
|
||||
uint16_t cssCount;
|
||||
serialization::readPod(bookFile, cssCount);
|
||||
coreMetadata.cssFiles.clear();
|
||||
coreMetadata.cssFiles.reserve(cssCount);
|
||||
for (uint16_t i = 0; i < cssCount; i++) {
|
||||
std::string cssPath;
|
||||
serialization::readString(bookFile, cssPath);
|
||||
coreMetadata.cssFiles.push_back(std::move(cssPath));
|
||||
}
|
||||
|
||||
loaded = true;
|
||||
Serial.printf("[%lu] [BMC] Loaded cache data: %d spine, %d TOC entries\n", millis(), spineCount, tocCount);
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
#include <SDCardManager.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class BookMetadataCache {
|
||||
public:
|
||||
@ -11,6 +12,7 @@ class BookMetadataCache {
|
||||
std::string author;
|
||||
std::string coverItemHref;
|
||||
std::string textReferenceHref;
|
||||
std::vector<std::string> cssFiles;
|
||||
};
|
||||
|
||||
struct SpineEntry {
|
||||
|
||||
@ -10,11 +10,12 @@
|
||||
|
||||
constexpr int MAX_COST = std::numeric_limits<int>::max();
|
||||
|
||||
void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) {
|
||||
void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline) {
|
||||
if (word.empty()) return;
|
||||
|
||||
words.push_back(std::move(word));
|
||||
wordStyles.push_back(fontStyle);
|
||||
wordUnderlines.push_back(underline);
|
||||
}
|
||||
|
||||
// Consumes data to minimize memory usage
|
||||
@ -42,17 +43,33 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
|
||||
std::vector<uint16_t> wordWidths;
|
||||
wordWidths.reserve(totalWordCount);
|
||||
|
||||
// add em-space at the beginning of first word in paragraph to indent
|
||||
if ((style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) && !extraParagraphSpacing) {
|
||||
// Apply text indent: either from CSS blockStyle or default em-space for justified/left-aligned
|
||||
const bool shouldIndent = (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) && !extraParagraphSpacing;
|
||||
if (blockStyle.textIndent > 0) {
|
||||
// CSS text-indent is handled via first word width adjustment
|
||||
// We'll add the indent value directly to the first word's width
|
||||
} else if (shouldIndent) {
|
||||
// Default: add em-space at the beginning of first word in paragraph to indent
|
||||
std::string& first_word = words.front();
|
||||
first_word.insert(0, "\xe2\x80\x83");
|
||||
}
|
||||
|
||||
auto wordsIt = words.begin();
|
||||
auto wordStylesIt = wordStyles.begin();
|
||||
bool isFirst = true;
|
||||
|
||||
while (wordsIt != words.end()) {
|
||||
wordWidths.push_back(renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt));
|
||||
uint16_t width = renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt);
|
||||
|
||||
// Add CSS text-indent to first word width
|
||||
if (isFirst && blockStyle.textIndent > 0 && shouldIndent) {
|
||||
width += static_cast<uint16_t>(blockStyle.textIndent);
|
||||
isFirst = false;
|
||||
} else {
|
||||
isFirst = false;
|
||||
}
|
||||
|
||||
wordWidths.push_back(width);
|
||||
|
||||
std::advance(wordsIt, 1);
|
||||
std::advance(wordStylesIt, 1);
|
||||
@ -182,14 +199,19 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
|
||||
// Iterators always start at the beginning as we are moving content with splice below
|
||||
auto wordEndIt = words.begin();
|
||||
auto wordStyleEndIt = wordStyles.begin();
|
||||
auto wordUnderlineEndIt = wordUnderlines.begin();
|
||||
std::advance(wordEndIt, lineWordCount);
|
||||
std::advance(wordStyleEndIt, lineWordCount);
|
||||
std::advance(wordUnderlineEndIt, lineWordCount);
|
||||
|
||||
// *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
|
||||
std::list<std::string> lineWords;
|
||||
lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
|
||||
std::list<EpdFontFamily::Style> lineWordStyles;
|
||||
lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt);
|
||||
std::list<bool> lineWordUnderlines;
|
||||
lineWordUnderlines.splice(lineWordUnderlines.begin(), wordUnderlines, wordUnderlines.begin(), wordUnderlineEndIt);
|
||||
|
||||
processLine(std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style));
|
||||
processLine(std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style,
|
||||
blockStyle, std::move(lineWordUnderlines)));
|
||||
}
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "blocks/BlockStyle.h"
|
||||
#include "blocks/TextBlock.h"
|
||||
|
||||
class GfxRenderer;
|
||||
@ -15,7 +16,9 @@ class GfxRenderer;
|
||||
class ParsedText {
|
||||
std::list<std::string> words;
|
||||
std::list<EpdFontFamily::Style> wordStyles;
|
||||
std::list<bool> wordUnderlines; // Track underline per word
|
||||
TextBlock::Style style;
|
||||
BlockStyle blockStyle;
|
||||
bool extraParagraphSpacing;
|
||||
|
||||
std::vector<size_t> computeLineBreaks(int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths) const;
|
||||
@ -25,13 +28,16 @@ class ParsedText {
|
||||
std::vector<uint16_t> calculateWordWidths(const GfxRenderer& renderer, int fontId);
|
||||
|
||||
public:
|
||||
explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing)
|
||||
: style(style), extraParagraphSpacing(extraParagraphSpacing) {}
|
||||
explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing,
|
||||
const BlockStyle& blockStyle = BlockStyle())
|
||||
: style(style), blockStyle(blockStyle), extraParagraphSpacing(extraParagraphSpacing) {}
|
||||
~ParsedText() = default;
|
||||
|
||||
void addWord(std::string word, EpdFontFamily::Style fontStyle);
|
||||
void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false);
|
||||
void setStyle(const TextBlock::Style style) { this->style = style; }
|
||||
void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; }
|
||||
TextBlock::Style getStyle() const { return style; }
|
||||
const BlockStyle& getBlockStyle() const { return blockStyle; }
|
||||
size_t size() const { return words.size(); }
|
||||
bool isEmpty() const { return words.empty(); }
|
||||
void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth,
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
#include "parsers/ChapterHtmlSlimParser.h"
|
||||
|
||||
namespace {
|
||||
constexpr uint8_t SECTION_FILE_VERSION = 9;
|
||||
constexpr uint8_t SECTION_FILE_VERSION = 10;
|
||||
constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) +
|
||||
sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t);
|
||||
} // namespace
|
||||
@ -179,7 +179,7 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
|
||||
tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth,
|
||||
viewportHeight,
|
||||
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); },
|
||||
progressFn);
|
||||
progressFn, epub->getCssParser());
|
||||
success = visitor.parseAndBuildPages();
|
||||
|
||||
SdMan.remove(tmpHtmlPath.c_str());
|
||||
|
||||
17
lib/Epub/Epub/blocks/BlockStyle.h
Normal file
17
lib/Epub/Epub/blocks/BlockStyle.h
Normal file
@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
/**
|
||||
* BlockStyle - Block-level CSS properties for paragraphs
|
||||
*
|
||||
* Used to track margin/padding spacing and text indentation for block elements.
|
||||
* Padding is treated similarly to margins for rendering purposes.
|
||||
*/
|
||||
struct BlockStyle {
|
||||
int8_t marginTop = 0; // 0-2 lines
|
||||
int8_t marginBottom = 0; // 0-2 lines
|
||||
int8_t paddingTop = 0; // 0-2 lines (treated same as margin)
|
||||
int8_t paddingBottom = 0; // 0-2 lines (treated same as margin)
|
||||
int16_t textIndent = 0; // pixels
|
||||
};
|
||||
@ -14,13 +14,40 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int
|
||||
auto wordIt = words.begin();
|
||||
auto wordStylesIt = wordStyles.begin();
|
||||
auto wordXposIt = wordXpos.begin();
|
||||
|
||||
auto wordUnderlineIt = wordUnderlines.begin();
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
renderer.drawText(fontId, *wordXposIt + x, y, wordIt->c_str(), true, *wordStylesIt);
|
||||
const int wordX = *wordXposIt + x;
|
||||
renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, *wordStylesIt);
|
||||
|
||||
// Draw underline if word is underlined
|
||||
if (wordUnderlineIt != wordUnderlines.end() && *wordUnderlineIt) {
|
||||
const std::string& w = *wordIt;
|
||||
const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), *wordStylesIt);
|
||||
// y is the top of the text line; add ascender to reach baseline, then offset 2px below
|
||||
const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2;
|
||||
|
||||
int startX = wordX;
|
||||
int underlineWidth = fullWordWidth;
|
||||
|
||||
// if word starts with em-space ("\xe2\x80\x83"), account for the additional indent before drawing the line
|
||||
if (w.size() >= 3 && static_cast<uint8_t>(w[0]) == 0xE2 && static_cast<uint8_t>(w[1]) == 0x80 &&
|
||||
static_cast<uint8_t>(w[2]) == 0x83) {
|
||||
const char* visiblePtr = w.c_str() + 3;
|
||||
const int prefixWidth = renderer.getIndentWidth(fontId, std::string("\xe2\x80\x83").c_str());
|
||||
const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, *wordStylesIt);
|
||||
startX = wordX + prefixWidth;
|
||||
underlineWidth = visibleWidth;
|
||||
}
|
||||
|
||||
renderer.drawLine(startX, underlineY, startX + underlineWidth, underlineY, true);
|
||||
}
|
||||
|
||||
std::advance(wordIt, 1);
|
||||
std::advance(wordStylesIt, 1);
|
||||
std::advance(wordXposIt, 1);
|
||||
if (wordUnderlineIt != wordUnderlines.end()) {
|
||||
std::advance(wordUnderlineIt, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -37,9 +64,35 @@ bool TextBlock::serialize(FsFile& file) const {
|
||||
for (auto x : wordXpos) serialization::writePod(file, x);
|
||||
for (auto s : wordStyles) serialization::writePod(file, s);
|
||||
|
||||
// Block style
|
||||
// Underline flags (packed as bytes, 8 words per byte)
|
||||
uint8_t underlineByte = 0;
|
||||
int bitIndex = 0;
|
||||
auto underlineIt = wordUnderlines.begin();
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
if (underlineIt != wordUnderlines.end() && *underlineIt) {
|
||||
underlineByte |= 1 << bitIndex;
|
||||
}
|
||||
bitIndex++;
|
||||
if (bitIndex == 8 || i == words.size() - 1) {
|
||||
serialization::writePod(file, underlineByte);
|
||||
underlineByte = 0;
|
||||
bitIndex = 0;
|
||||
}
|
||||
if (underlineIt != wordUnderlines.end()) {
|
||||
++underlineIt;
|
||||
}
|
||||
}
|
||||
|
||||
// Block style (alignment)
|
||||
serialization::writePod(file, style);
|
||||
|
||||
// Block style (margins/padding/indent)
|
||||
serialization::writePod(file, blockStyle.marginTop);
|
||||
serialization::writePod(file, blockStyle.marginBottom);
|
||||
serialization::writePod(file, blockStyle.paddingTop);
|
||||
serialization::writePod(file, blockStyle.paddingBottom);
|
||||
serialization::writePod(file, blockStyle.textIndent);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -48,7 +101,9 @@ std::unique_ptr<TextBlock> TextBlock::deserialize(FsFile& file) {
|
||||
std::list<std::string> words;
|
||||
std::list<uint16_t> wordXpos;
|
||||
std::list<EpdFontFamily::Style> wordStyles;
|
||||
std::list<bool> wordUnderlines;
|
||||
Style style;
|
||||
BlockStyle blockStyle;
|
||||
|
||||
// Word count
|
||||
serialization::readPod(file, wc);
|
||||
@ -67,8 +122,29 @@ std::unique_ptr<TextBlock> TextBlock::deserialize(FsFile& file) {
|
||||
for (auto& x : wordXpos) serialization::readPod(file, x);
|
||||
for (auto& s : wordStyles) serialization::readPod(file, s);
|
||||
|
||||
// Block style
|
||||
// Underline flags (packed as bytes, 8 words per byte)
|
||||
wordUnderlines.resize(wc, false);
|
||||
auto underlineIt = wordUnderlines.begin();
|
||||
const int bytesNeeded = (wc + 7) / 8;
|
||||
for (int byteIdx = 0; byteIdx < bytesNeeded; byteIdx++) {
|
||||
uint8_t underlineByte;
|
||||
serialization::readPod(file, underlineByte);
|
||||
for (int bit = 0; bit < 8 && underlineIt != wordUnderlines.end(); bit++) {
|
||||
*underlineIt = (underlineByte & 1 << bit) != 0;
|
||||
++underlineIt;
|
||||
}
|
||||
}
|
||||
|
||||
// Block style (alignment)
|
||||
serialization::readPod(file, style);
|
||||
|
||||
return std::unique_ptr<TextBlock>(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style));
|
||||
// Block style (margins/padding/indent)
|
||||
serialization::readPod(file, blockStyle.marginTop);
|
||||
serialization::readPod(file, blockStyle.marginBottom);
|
||||
serialization::readPod(file, blockStyle.paddingTop);
|
||||
serialization::readPod(file, blockStyle.paddingBottom);
|
||||
serialization::readPod(file, blockStyle.textIndent);
|
||||
|
||||
return std::unique_ptr<TextBlock>(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style,
|
||||
blockStyle, std::move(wordUnderlines)));
|
||||
}
|
||||
|
||||
@ -7,6 +7,7 @@
|
||||
#include <string>
|
||||
|
||||
#include "Block.h"
|
||||
#include "BlockStyle.h"
|
||||
|
||||
// Represents a line of text on a page
|
||||
class TextBlock final : public Block {
|
||||
@ -22,15 +23,31 @@ class TextBlock final : public Block {
|
||||
std::list<std::string> words;
|
||||
std::list<uint16_t> wordXpos;
|
||||
std::list<EpdFontFamily::Style> wordStyles;
|
||||
std::list<bool> wordUnderlines; // Track underline per word
|
||||
Style style;
|
||||
BlockStyle blockStyle;
|
||||
|
||||
public:
|
||||
explicit TextBlock(std::list<std::string> words, std::list<uint16_t> word_xpos,
|
||||
std::list<EpdFontFamily::Style> word_styles, const Style style)
|
||||
: words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), style(style) {}
|
||||
std::list<EpdFontFamily::Style> word_styles, const Style style,
|
||||
const BlockStyle& blockStyle = BlockStyle(),
|
||||
std::list<bool> word_underlines = std::list<bool>())
|
||||
: words(std::move(words)),
|
||||
wordXpos(std::move(word_xpos)),
|
||||
wordStyles(std::move(word_styles)),
|
||||
wordUnderlines(std::move(word_underlines)),
|
||||
style(style),
|
||||
blockStyle(blockStyle) {
|
||||
// Ensure underlines list matches words list size
|
||||
while (this->wordUnderlines.size() < this->words.size()) {
|
||||
this->wordUnderlines.push_back(false);
|
||||
}
|
||||
}
|
||||
~TextBlock() override = default;
|
||||
void setStyle(const Style style) { this->style = style; }
|
||||
void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; }
|
||||
Style getStyle() const { return style; }
|
||||
const BlockStyle& getBlockStyle() const { return blockStyle; }
|
||||
bool isEmpty() override { return words.empty(); }
|
||||
void layout(GfxRenderer& renderer) override {};
|
||||
// given a renderer works out where to break the words into lines
|
||||
|
||||
503
lib/Epub/Epub/css/CssParser.cpp
Normal file
503
lib/Epub/Epub/css/CssParser.cpp
Normal file
@ -0,0 +1,503 @@
|
||||
#include "CssParser.h"
|
||||
|
||||
#include <HardwareSerial.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
|
||||
namespace {
|
||||
|
||||
// Buffer size for reading CSS files
|
||||
constexpr size_t READ_BUFFER_SIZE = 512;
|
||||
|
||||
// Maximum CSS file size we'll process (prevent memory issues)
|
||||
constexpr size_t MAX_CSS_SIZE = 64 * 1024;
|
||||
|
||||
// Check if character is CSS whitespace
|
||||
bool isCssWhitespace(const char c) {
|
||||
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f';
|
||||
}
|
||||
|
||||
// Read entire file into string (with size limit)
|
||||
std::string readFileContent(FsFile& file) {
|
||||
std::string content;
|
||||
content.reserve(std::min(static_cast<size_t>(file.size()), MAX_CSS_SIZE));
|
||||
|
||||
char buffer[READ_BUFFER_SIZE];
|
||||
while (file.available() && content.size() < MAX_CSS_SIZE) {
|
||||
const int bytesRead = file.read(buffer, sizeof(buffer));
|
||||
if (bytesRead <= 0) break;
|
||||
content.append(buffer, bytesRead);
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
// Remove CSS comments (/* ... */) from content
|
||||
std::string stripComments(const std::string& css) {
|
||||
std::string result;
|
||||
result.reserve(css.size());
|
||||
|
||||
size_t pos = 0;
|
||||
while (pos < css.size()) {
|
||||
// Look for start of comment
|
||||
if (pos + 1 < css.size() && css[pos] == '/' && css[pos + 1] == '*') {
|
||||
// Find end of comment
|
||||
const size_t endPos = css.find("*/", pos + 2);
|
||||
if (endPos == std::string::npos) {
|
||||
// Unterminated comment - skip rest of file
|
||||
break;
|
||||
}
|
||||
pos = endPos + 2;
|
||||
} else {
|
||||
result.push_back(css[pos]);
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Skip @-rules (like @media, @import, @font-face)
|
||||
// Returns position after the @-rule
|
||||
size_t skipAtRule(const std::string& css, const size_t start) {
|
||||
// Find the end - either semicolon (simple @-rule) or matching brace
|
||||
size_t pos = start + 1; // Skip the '@'
|
||||
|
||||
// Skip identifier
|
||||
while (pos < css.size() && (std::isalnum(css[pos]) || css[pos] == '-')) {
|
||||
++pos;
|
||||
}
|
||||
|
||||
// Look for { or ;
|
||||
int braceDepth = 0;
|
||||
while (pos < css.size()) {
|
||||
const char c = css[pos];
|
||||
if (c == '{') {
|
||||
++braceDepth;
|
||||
} else if (c == '}') {
|
||||
--braceDepth;
|
||||
if (braceDepth == 0) {
|
||||
return pos + 1;
|
||||
}
|
||||
} else if (c == ';' && braceDepth == 0) {
|
||||
return pos + 1;
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
return css.size();
|
||||
}
|
||||
|
||||
// Extract next rule from CSS content
|
||||
// Returns true if a rule was found, with selector and body filled
|
||||
bool extractNextRule(const std::string& css, size_t& pos,
|
||||
std::string& selector, std::string& body) {
|
||||
selector.clear();
|
||||
body.clear();
|
||||
|
||||
// Skip whitespace and @-rules until we find a regular rule
|
||||
while (pos < css.size()) {
|
||||
// Skip whitespace
|
||||
while (pos < css.size() && isCssWhitespace(css[pos])) {
|
||||
++pos;
|
||||
}
|
||||
|
||||
if (pos >= css.size()) return false;
|
||||
|
||||
// Handle @-rules iteratively (avoids recursion/stack overflow)
|
||||
if (css[pos] == '@') {
|
||||
pos = skipAtRule(css, pos);
|
||||
continue; // Try again after skipping the @-rule
|
||||
}
|
||||
|
||||
break; // Found start of a regular rule
|
||||
}
|
||||
|
||||
if (pos >= css.size()) return false;
|
||||
|
||||
// Find opening brace
|
||||
const size_t bracePos = css.find('{', pos);
|
||||
if (bracePos == std::string::npos) return false;
|
||||
|
||||
// Extract selector (everything before the brace)
|
||||
selector = css.substr(pos, bracePos - pos);
|
||||
|
||||
// Find matching closing brace
|
||||
int depth = 1;
|
||||
const size_t bodyStart = bracePos + 1;
|
||||
size_t bodyEnd = bodyStart;
|
||||
|
||||
while (bodyEnd < css.size() && depth > 0) {
|
||||
if (css[bodyEnd] == '{') ++depth;
|
||||
else if (css[bodyEnd] == '}') --depth;
|
||||
++bodyEnd;
|
||||
}
|
||||
|
||||
// Extract body (between braces)
|
||||
if (bodyEnd > bodyStart) {
|
||||
body = css.substr(bodyStart, bodyEnd - bodyStart - 1);
|
||||
}
|
||||
|
||||
pos = bodyEnd;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
// String utilities implementation
|
||||
|
||||
std::string CssParser::normalized(const std::string& s) {
|
||||
std::string result;
|
||||
result.reserve(s.size());
|
||||
|
||||
bool inSpace = true; // Start true to skip leading space
|
||||
for (const char c : s) {
|
||||
if (isCssWhitespace(c)) {
|
||||
if (!inSpace) {
|
||||
result.push_back(' ');
|
||||
inSpace = true;
|
||||
}
|
||||
} else {
|
||||
result.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(c))));
|
||||
inSpace = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove trailing space
|
||||
if (!result.empty() && result.back() == ' ') {
|
||||
result.pop_back();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> CssParser::splitOnChar(const std::string& s, const char delimiter) {
|
||||
std::vector<std::string> parts;
|
||||
size_t start = 0;
|
||||
|
||||
for (size_t i = 0; i <= s.size(); ++i) {
|
||||
if (i == s.size() || s[i] == delimiter) {
|
||||
std::string part = s.substr(start, i - start);
|
||||
std::string trimmed = normalized(part);
|
||||
if (!trimmed.empty()) {
|
||||
parts.push_back(trimmed);
|
||||
}
|
||||
start = i + 1;
|
||||
}
|
||||
}
|
||||
return parts;
|
||||
}
|
||||
|
||||
std::vector<std::string> CssParser::splitWhitespace(const std::string& s) {
|
||||
std::vector<std::string> parts;
|
||||
size_t start = 0;
|
||||
bool inWord = false;
|
||||
|
||||
for (size_t i = 0; i <= s.size(); ++i) {
|
||||
const bool isSpace = i == s.size() || isCssWhitespace(s[i]);
|
||||
if (isSpace && inWord) {
|
||||
parts.push_back(s.substr(start, i - start));
|
||||
inWord = false;
|
||||
} else if (!isSpace && !inWord) {
|
||||
start = i;
|
||||
inWord = true;
|
||||
}
|
||||
}
|
||||
return parts;
|
||||
}
|
||||
|
||||
// Property value interpreters
|
||||
|
||||
TextAlign CssParser::interpretAlignment(const std::string& val) {
|
||||
const std::string v = normalized(val);
|
||||
|
||||
if (v == "left" || v == "start") return TextAlign::Left;
|
||||
if (v == "right" || v == "end") return TextAlign::Right;
|
||||
if (v == "center") return TextAlign::Center;
|
||||
if (v == "justify") return TextAlign::Justify;
|
||||
|
||||
return TextAlign::None;
|
||||
}
|
||||
|
||||
CssFontStyle CssParser::interpretFontStyle(const std::string& val) {
|
||||
const std::string v = normalized(val);
|
||||
|
||||
if (v == "italic" || v == "oblique") return CssFontStyle::Italic;
|
||||
return CssFontStyle::Normal;
|
||||
}
|
||||
|
||||
CssFontWeight CssParser::interpretFontWeight(const std::string& val) {
|
||||
const std::string v = normalized(val);
|
||||
|
||||
// Named values
|
||||
if (v == "bold" || v == "bolder") return CssFontWeight::Bold;
|
||||
if (v == "normal" || v == "lighter") return CssFontWeight::Normal;
|
||||
|
||||
// Numeric values: 100-900
|
||||
// CSS spec: 400 = normal, 700 = bold
|
||||
// We use: 0-400 = normal, 700+ = bold, 500-600 = normal (conservative)
|
||||
char* endPtr = nullptr;
|
||||
const long numericWeight = std::strtol(v.c_str(), &endPtr, 10);
|
||||
|
||||
// If we parsed a number and consumed the whole string
|
||||
if (endPtr != v.c_str() && *endPtr == '\0') {
|
||||
return numericWeight >= 700 ? CssFontWeight::Bold : CssFontWeight::Normal;
|
||||
}
|
||||
|
||||
return CssFontWeight::Normal;
|
||||
}
|
||||
|
||||
CssTextDecoration CssParser::interpretDecoration(const std::string& val) {
|
||||
const std::string v = normalized(val);
|
||||
|
||||
// text-decoration can have multiple space-separated values
|
||||
if (v.find("underline") != std::string::npos) {
|
||||
return CssTextDecoration::Underline;
|
||||
}
|
||||
return CssTextDecoration::None;
|
||||
}
|
||||
|
||||
float CssParser::interpretLength(const std::string& val, const float emSize) {
|
||||
const std::string v = normalized(val);
|
||||
if (v.empty()) return 0.0f;
|
||||
|
||||
// Determine unit and multiplier
|
||||
float multiplier = 1.0f;
|
||||
size_t unitStart = v.size();
|
||||
|
||||
// Find where the number ends
|
||||
for (size_t i = 0; i < v.size(); ++i) {
|
||||
const char c = v[i];
|
||||
if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') {
|
||||
unitStart = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const std::string numPart = v.substr(0, unitStart);
|
||||
const std::string unitPart = v.substr(unitStart);
|
||||
|
||||
// Handle units
|
||||
if (unitPart == "em" || unitPart == "rem") {
|
||||
multiplier = emSize;
|
||||
} else if (unitPart == "pt") {
|
||||
multiplier = 1.33f; // Approximate pt to px conversion
|
||||
}
|
||||
// px is default (multiplier = 1.0)
|
||||
|
||||
char* endPtr = nullptr;
|
||||
const float numericValue = std::strtof(numPart.c_str(), &endPtr);
|
||||
|
||||
if (endPtr == numPart.c_str()) return 0.0f; // No number parsed
|
||||
|
||||
return numericValue * multiplier;
|
||||
}
|
||||
|
||||
int8_t CssParser::interpretSpacing(const std::string& val) {
|
||||
const std::string v = normalized(val);
|
||||
if (v.empty()) return 0;
|
||||
|
||||
// For spacing, we convert to "lines" (discrete units for e-ink)
|
||||
// 1em ≈ 1 line, percentages based on ~30 lines per page
|
||||
|
||||
float multiplier = 0.0f;
|
||||
size_t unitStart = v.size();
|
||||
|
||||
for (size_t i = 0; i < v.size(); ++i) {
|
||||
const char c = v[i];
|
||||
if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') {
|
||||
unitStart = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const std::string numPart = v.substr(0, unitStart);
|
||||
const std::string unitPart = v.substr(unitStart);
|
||||
|
||||
if (unitPart == "em" || unitPart == "rem") {
|
||||
multiplier = 1.0f; // 1em = 1 line
|
||||
} else if (unitPart == "%") {
|
||||
multiplier = 0.3f; // ~30 lines per page, so 10% = 3 lines
|
||||
} else {
|
||||
return 0; // Unsupported unit for spacing
|
||||
}
|
||||
|
||||
char* endPtr = nullptr;
|
||||
const float numericValue = std::strtof(numPart.c_str(), &endPtr);
|
||||
|
||||
if (endPtr == numPart.c_str()) return 0;
|
||||
|
||||
int lines = static_cast<int>(numericValue * multiplier);
|
||||
|
||||
// Clamp to reasonable range (0-2 lines)
|
||||
if (lines < 0) lines = 0;
|
||||
if (lines > 2) lines = 2;
|
||||
|
||||
return static_cast<int8_t>(lines);
|
||||
}
|
||||
|
||||
// Declaration parsing
|
||||
|
||||
CssStyle CssParser::parseDeclarations(const std::string& declBlock) {
|
||||
CssStyle style;
|
||||
|
||||
// Split declarations by semicolon
|
||||
const auto declarations = splitOnChar(declBlock, ';');
|
||||
|
||||
for (const auto& decl : declarations) {
|
||||
// Find colon separator
|
||||
const size_t colonPos = decl.find(':');
|
||||
if (colonPos == std::string::npos || colonPos == 0) continue;
|
||||
|
||||
std::string propName = normalized(decl.substr(0, colonPos));
|
||||
std::string propValue = normalized(decl.substr(colonPos + 1));
|
||||
|
||||
if (propName.empty() || propValue.empty()) continue;
|
||||
|
||||
// Match property and set value
|
||||
if (propName == "text-align") {
|
||||
const TextAlign align = interpretAlignment(propValue);
|
||||
if (align != TextAlign::None) {
|
||||
style.alignment = align;
|
||||
style.defined.alignment = 1;
|
||||
}
|
||||
} else if (propName == "font-style") {
|
||||
style.fontStyle = interpretFontStyle(propValue);
|
||||
style.defined.fontStyle = 1;
|
||||
} else if (propName == "font-weight") {
|
||||
style.fontWeight = interpretFontWeight(propValue);
|
||||
style.defined.fontWeight = 1;
|
||||
} else if (propName == "text-decoration" || propName == "text-decoration-line") {
|
||||
style.decoration = interpretDecoration(propValue);
|
||||
style.defined.decoration = 1;
|
||||
} else if (propName == "text-indent") {
|
||||
style.indentPixels = interpretLength(propValue);
|
||||
style.defined.indent = 1;
|
||||
} else if (propName == "margin-top") {
|
||||
const int8_t spacing = interpretSpacing(propValue);
|
||||
if (spacing > 0) {
|
||||
style.marginTop = spacing;
|
||||
style.defined.marginTop = 1;
|
||||
}
|
||||
} else if (propName == "margin-bottom") {
|
||||
const int8_t spacing = interpretSpacing(propValue);
|
||||
if (spacing > 0) {
|
||||
style.marginBottom = spacing;
|
||||
style.defined.marginBottom = 1;
|
||||
}
|
||||
} else if (propName == "padding-top") {
|
||||
const int8_t spacing = interpretSpacing(propValue);
|
||||
if (spacing > 0) {
|
||||
style.paddingTop = spacing;
|
||||
style.defined.paddingTop = 1;
|
||||
}
|
||||
} else if (propName == "padding-bottom") {
|
||||
const int8_t spacing = interpretSpacing(propValue);
|
||||
if (spacing > 0) {
|
||||
style.paddingBottom = spacing;
|
||||
style.defined.paddingBottom = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return style;
|
||||
}
|
||||
|
||||
// Rule processing
|
||||
|
||||
void CssParser::processRuleBlock(const std::string& selectorGroup,
|
||||
const std::string& declarations) {
|
||||
const CssStyle style = parseDeclarations(declarations);
|
||||
|
||||
// Only store if any properties were set
|
||||
if (!style.defined.anySet()) return;
|
||||
|
||||
// Handle comma-separated selectors
|
||||
const auto selectors = splitOnChar(selectorGroup, ',');
|
||||
|
||||
for (const auto& sel : selectors) {
|
||||
// Normalize the selector
|
||||
std::string key = normalized(sel);
|
||||
if (key.empty()) continue;
|
||||
|
||||
// Store or merge with existing
|
||||
auto it = rulesBySelector_.find(key);
|
||||
if (it != rulesBySelector_.end()) {
|
||||
it->second.applyOver(style);
|
||||
} else {
|
||||
rulesBySelector_[key] = style;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Main parsing entry point
|
||||
|
||||
bool CssParser::loadFromStream(FsFile& source) {
|
||||
if (!source) {
|
||||
Serial.printf("[%lu] [CSS] Cannot read from invalid file\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read file content
|
||||
const std::string content = readFileContent(source);
|
||||
if (content.empty()) {
|
||||
return true; // Empty file is valid
|
||||
}
|
||||
|
||||
// Remove comments
|
||||
const std::string cleaned = stripComments(content);
|
||||
|
||||
// Parse rules
|
||||
size_t pos = 0;
|
||||
std::string selector, body;
|
||||
|
||||
while (extractNextRule(cleaned, pos, selector, body)) {
|
||||
processRuleBlock(selector, body);
|
||||
}
|
||||
|
||||
Serial.printf("[%lu] [CSS] Parsed %zu rules\n", millis(), rulesBySelector_.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Style resolution
|
||||
|
||||
CssStyle CssParser::resolveStyle(const std::string& tagName,
|
||||
const std::string& classAttr) const {
|
||||
CssStyle result;
|
||||
const std::string tag = normalized(tagName);
|
||||
|
||||
// 1. Apply element-level style (lowest priority)
|
||||
const auto tagIt = rulesBySelector_.find(tag);
|
||||
if (tagIt != rulesBySelector_.end()) {
|
||||
result.applyOver(tagIt->second);
|
||||
}
|
||||
|
||||
// 2. Apply class styles (medium priority)
|
||||
if (!classAttr.empty()) {
|
||||
const auto classes = splitWhitespace(classAttr);
|
||||
|
||||
for (const auto& cls : classes) {
|
||||
std::string classKey = "." + normalized(cls);
|
||||
|
||||
auto classIt = rulesBySelector_.find(classKey);
|
||||
if (classIt != rulesBySelector_.end()) {
|
||||
result.applyOver(classIt->second);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Apply element.class styles (higher priority)
|
||||
for (const auto& cls : classes) {
|
||||
std::string combinedKey = tag + "." + normalized(cls);
|
||||
|
||||
auto combinedIt = rulesBySelector_.find(combinedKey);
|
||||
if (combinedIt != rulesBySelector_.end()) {
|
||||
result.applyOver(combinedIt->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Inline style parsing (static - doesn't need rule database)
|
||||
|
||||
CssStyle CssParser::parseInlineStyle(const std::string& styleValue) {
|
||||
return parseDeclarations(styleValue);
|
||||
}
|
||||
100
lib/Epub/Epub/css/CssParser.h
Normal file
100
lib/Epub/Epub/css/CssParser.h
Normal file
@ -0,0 +1,100 @@
|
||||
#pragma once
|
||||
|
||||
#include <SdFat.h>
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "CssStyle.h"
|
||||
|
||||
/**
|
||||
* Lightweight CSS parser for EPUB stylesheets
|
||||
*
|
||||
* Parses CSS files and extracts styling information relevant for e-ink display.
|
||||
* Uses a two-phase approach: first tokenizes the CSS content, then builds
|
||||
* a rule database that can be queried during HTML parsing.
|
||||
*
|
||||
* Supported selectors:
|
||||
* - Element selectors: p, div, h1, etc.
|
||||
* - Class selectors: .classname
|
||||
* - Combined: element.classname
|
||||
* - Grouped: selector1, selector2 { }
|
||||
*
|
||||
* Not supported (silently ignored):
|
||||
* - Descendant/child selectors
|
||||
* - Pseudo-classes and pseudo-elements
|
||||
* - Media queries (content is skipped)
|
||||
* - @import, @font-face, etc.
|
||||
*/
|
||||
class CssParser {
|
||||
public:
|
||||
CssParser() = default;
|
||||
~CssParser() = default;
|
||||
|
||||
// Non-copyable
|
||||
CssParser(const CssParser&) = delete;
|
||||
CssParser& operator=(const CssParser&) = delete;
|
||||
|
||||
/**
|
||||
* Load and parse CSS from a file stream.
|
||||
* Can be called multiple times to accumulate rules from multiple stylesheets.
|
||||
* @param source Open file handle to read from
|
||||
* @return true if parsing completed (even if no rules found)
|
||||
*/
|
||||
bool loadFromStream(FsFile& source);
|
||||
|
||||
/**
|
||||
* Look up the style for an HTML element, considering tag name and class attributes.
|
||||
* Applies CSS cascade: element style < class style < element.class style
|
||||
*
|
||||
* @param tagName The HTML element name (e.g., "p", "div")
|
||||
* @param classAttr The class attribute value (may contain multiple space-separated classes)
|
||||
* @return Combined style with all applicable rules merged
|
||||
*/
|
||||
[[nodiscard]] CssStyle resolveStyle(const std::string& tagName,
|
||||
const std::string& classAttr) const;
|
||||
|
||||
/**
|
||||
* Parse an inline style attribute string.
|
||||
* @param styleValue The value of a style="" attribute
|
||||
* @return Parsed style properties
|
||||
*/
|
||||
[[nodiscard]] static CssStyle parseInlineStyle(const std::string& styleValue);
|
||||
|
||||
/**
|
||||
* Check if any rules have been loaded
|
||||
*/
|
||||
[[nodiscard]] bool empty() const { return rulesBySelector_.empty(); }
|
||||
|
||||
/**
|
||||
* Get count of loaded rule sets
|
||||
*/
|
||||
[[nodiscard]] size_t ruleCount() const { return rulesBySelector_.size(); }
|
||||
|
||||
/**
|
||||
* Clear all loaded rules
|
||||
*/
|
||||
void clear() { rulesBySelector_.clear(); }
|
||||
|
||||
private:
|
||||
// Storage: maps normalized selector -> style properties
|
||||
std::unordered_map<std::string, CssStyle> rulesBySelector_;
|
||||
|
||||
// Internal parsing helpers
|
||||
void processRuleBlock(const std::string& selectorGroup, const std::string& declarations);
|
||||
static CssStyle parseDeclarations(const std::string& declBlock);
|
||||
|
||||
// Individual property value parsers
|
||||
static TextAlign interpretAlignment(const std::string& val);
|
||||
static CssFontStyle interpretFontStyle(const std::string& val);
|
||||
static CssFontWeight interpretFontWeight(const std::string& val);
|
||||
static CssTextDecoration interpretDecoration(const std::string& val);
|
||||
static float interpretLength(const std::string& val, float emSize = 16.0f);
|
||||
static int8_t interpretSpacing(const std::string& val);
|
||||
|
||||
// String utilities
|
||||
static std::string normalized(const std::string& s);
|
||||
static std::vector<std::string> splitOnChar(const std::string& s, char delimiter);
|
||||
static std::vector<std::string> splitWhitespace(const std::string& s);
|
||||
};
|
||||
140
lib/Epub/Epub/css/CssStyle.h
Normal file
140
lib/Epub/Epub/css/CssStyle.h
Normal file
@ -0,0 +1,140 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
// Text alignment options matching CSS text-align property
|
||||
enum class TextAlign : uint8_t {
|
||||
None = 0,
|
||||
Left = 1,
|
||||
Right = 2,
|
||||
Center = 3,
|
||||
Justify = 4
|
||||
};
|
||||
|
||||
// Font style options matching CSS font-style property
|
||||
enum class CssFontStyle : uint8_t {
|
||||
Normal = 0,
|
||||
Italic = 1
|
||||
};
|
||||
|
||||
// Font weight options - CSS supports 100-900, we simplify to normal/bold
|
||||
enum class CssFontWeight : uint8_t {
|
||||
Normal = 0,
|
||||
Bold = 1
|
||||
};
|
||||
|
||||
// Text decoration options
|
||||
enum class CssTextDecoration : uint8_t {
|
||||
None = 0,
|
||||
Underline = 1
|
||||
};
|
||||
|
||||
// Bitmask for tracking which properties have been explicitly set
|
||||
struct CssPropertyFlags {
|
||||
uint16_t alignment : 1;
|
||||
uint16_t fontStyle : 1;
|
||||
uint16_t fontWeight : 1;
|
||||
uint16_t decoration : 1;
|
||||
uint16_t indent : 1;
|
||||
uint16_t marginTop : 1;
|
||||
uint16_t marginBottom : 1;
|
||||
uint16_t paddingTop : 1;
|
||||
uint16_t paddingBottom : 1;
|
||||
uint16_t reserved : 7;
|
||||
|
||||
CssPropertyFlags() : alignment(0), fontStyle(0), fontWeight(0), decoration(0),
|
||||
indent(0), marginTop(0), marginBottom(0),
|
||||
paddingTop(0), paddingBottom(0), reserved(0) {}
|
||||
|
||||
[[nodiscard]] bool anySet() const {
|
||||
return alignment || fontStyle || fontWeight || decoration ||
|
||||
indent || marginTop || marginBottom || paddingTop || paddingBottom;
|
||||
}
|
||||
|
||||
void clearAll() {
|
||||
alignment = fontStyle = fontWeight = decoration = indent = 0;
|
||||
marginTop = marginBottom = paddingTop = paddingBottom = 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Represents a collection of CSS style properties
|
||||
// Only stores properties relevant to e-ink text rendering
|
||||
struct CssStyle {
|
||||
TextAlign alignment = TextAlign::None;
|
||||
CssFontStyle fontStyle = CssFontStyle::Normal;
|
||||
CssFontWeight fontWeight = CssFontWeight::Normal;
|
||||
CssTextDecoration decoration = CssTextDecoration::None;
|
||||
|
||||
float indentPixels = 0.0f; // First-line indent in pixels
|
||||
int8_t marginTop = 0; // Vertical spacing before block (in lines, 0-2)
|
||||
int8_t marginBottom = 0; // Vertical spacing after block (in lines, 0-2)
|
||||
int8_t paddingTop = 0; // Padding before (in lines, 0-2)
|
||||
int8_t paddingBottom = 0; // Padding after (in lines, 0-2)
|
||||
|
||||
CssPropertyFlags defined; // Tracks which properties were explicitly set
|
||||
|
||||
// Apply properties from another style, only overwriting if the other style
|
||||
// has that property explicitly defined
|
||||
void applyOver(const CssStyle& base) {
|
||||
if (base.defined.alignment) {
|
||||
alignment = base.alignment;
|
||||
defined.alignment = 1;
|
||||
}
|
||||
if (base.defined.fontStyle) {
|
||||
fontStyle = base.fontStyle;
|
||||
defined.fontStyle = 1;
|
||||
}
|
||||
if (base.defined.fontWeight) {
|
||||
fontWeight = base.fontWeight;
|
||||
defined.fontWeight = 1;
|
||||
}
|
||||
if (base.defined.decoration) {
|
||||
decoration = base.decoration;
|
||||
defined.decoration = 1;
|
||||
}
|
||||
if (base.defined.indent) {
|
||||
indentPixels = base.indentPixels;
|
||||
defined.indent = 1;
|
||||
}
|
||||
if (base.defined.marginTop) {
|
||||
marginTop = base.marginTop;
|
||||
defined.marginTop = 1;
|
||||
}
|
||||
if (base.defined.marginBottom) {
|
||||
marginBottom = base.marginBottom;
|
||||
defined.marginBottom = 1;
|
||||
}
|
||||
if (base.defined.paddingTop) {
|
||||
paddingTop = base.paddingTop;
|
||||
defined.paddingTop = 1;
|
||||
}
|
||||
if (base.defined.paddingBottom) {
|
||||
paddingBottom = base.paddingBottom;
|
||||
defined.paddingBottom = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Compatibility accessors for existing code that uses hasX pattern
|
||||
[[nodiscard]] bool hasTextAlign() const { return defined.alignment; }
|
||||
[[nodiscard]] bool hasFontStyle() const { return defined.fontStyle; }
|
||||
[[nodiscard]] bool hasFontWeight() const { return defined.fontWeight; }
|
||||
[[nodiscard]] bool hasTextDecoration() const { return defined.decoration; }
|
||||
[[nodiscard]] bool hasTextIndent() const { return defined.indent; }
|
||||
[[nodiscard]] bool hasMarginTop() const { return defined.marginTop; }
|
||||
[[nodiscard]] bool hasMarginBottom() const { return defined.marginBottom; }
|
||||
[[nodiscard]] bool hasPaddingTop() const { return defined.paddingTop; }
|
||||
[[nodiscard]] bool hasPaddingBottom() const { return defined.paddingBottom; }
|
||||
|
||||
// Merge another style (alias for applyOver for compatibility)
|
||||
void merge(const CssStyle& other) { applyOver(other); }
|
||||
|
||||
void reset() {
|
||||
alignment = TextAlign::None;
|
||||
fontStyle = CssFontStyle::Normal;
|
||||
fontWeight = CssFontWeight::Normal;
|
||||
decoration = CssTextDecoration::None;
|
||||
indentPixels = 0.0f;
|
||||
marginTop = marginBottom = paddingTop = paddingBottom = 0;
|
||||
defined.clearAll();
|
||||
}
|
||||
};
|
||||
@ -22,6 +22,9 @@ constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]);
|
||||
const char* ITALIC_TAGS[] = {"i", "em"};
|
||||
constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]);
|
||||
|
||||
const char* UNDERLINE_TAGS[] = {"u", "ins"};
|
||||
constexpr int NUM_UNDERLINE_TAGS = sizeof(UNDERLINE_TAGS) / sizeof(UNDERLINE_TAGS[0]);
|
||||
|
||||
const char* IMAGE_TAGS[] = {"img"};
|
||||
constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
|
||||
|
||||
@ -40,18 +43,55 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create a BlockStyle from CSS style properties
|
||||
BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle) {
|
||||
BlockStyle blockStyle;
|
||||
blockStyle.marginTop = static_cast<int8_t>(cssStyle.marginTop + cssStyle.paddingTop);
|
||||
blockStyle.marginBottom = static_cast<int8_t>(cssStyle.marginBottom + cssStyle.paddingBottom);
|
||||
blockStyle.paddingTop = cssStyle.paddingTop;
|
||||
blockStyle.paddingBottom = cssStyle.paddingBottom;
|
||||
blockStyle.textIndent = static_cast<int16_t>(cssStyle.indentPixels);
|
||||
return blockStyle;
|
||||
}
|
||||
|
||||
// Update effective bold/italic/underline based on block style and inline style stack
|
||||
void ChapterHtmlSlimParser::updateEffectiveInlineStyle() {
|
||||
// Start with block-level styles
|
||||
effectiveBold = currentBlockStyle.hasFontWeight() && currentBlockStyle.fontWeight == CssFontWeight::Bold;
|
||||
effectiveItalic = currentBlockStyle.hasFontStyle() && currentBlockStyle.fontStyle == CssFontStyle::Italic;
|
||||
effectiveUnderline = currentBlockStyle.hasTextDecoration() && currentBlockStyle.decoration == CssTextDecoration::Underline;
|
||||
|
||||
// Apply inline style stack in order
|
||||
for (const auto& entry : inlineStyleStack) {
|
||||
if (entry.hasBold) {
|
||||
effectiveBold = entry.bold;
|
||||
}
|
||||
if (entry.hasItalic) {
|
||||
effectiveItalic = entry.italic;
|
||||
}
|
||||
if (entry.hasUnderline) {
|
||||
effectiveUnderline = entry.underline;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// start a new text block if needed
|
||||
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) {
|
||||
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) {
|
||||
if (currentTextBlock) {
|
||||
// already have a text block running and it is empty - just reuse it
|
||||
if (currentTextBlock->isEmpty()) {
|
||||
currentTextBlock->setStyle(style);
|
||||
currentTextBlock->setBlockStyle(blockStyle);
|
||||
return;
|
||||
}
|
||||
|
||||
makePages();
|
||||
}
|
||||
currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing));
|
||||
currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, blockStyle));
|
||||
}
|
||||
|
||||
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) {
|
||||
startNewTextBlock(style, BlockStyle{});
|
||||
}
|
||||
|
||||
void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
|
||||
@ -63,6 +103,19 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
|
||||
return;
|
||||
}
|
||||
|
||||
// Extract class and style attributes for CSS processing
|
||||
std::string classAttr;
|
||||
std::string styleAttr;
|
||||
if (atts != nullptr) {
|
||||
for (int i = 0; atts[i]; i += 2) {
|
||||
if (strcmp(atts[i], "class") == 0) {
|
||||
classAttr = atts[i + 1];
|
||||
} else if (strcmp(atts[i], "style") == 0) {
|
||||
styleAttr = atts[i + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Special handling for tables - show placeholder text instead of dropping silently
|
||||
if (strcmp(name, "table") == 0) {
|
||||
// Add placeholder text
|
||||
@ -120,22 +173,152 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
|
||||
}
|
||||
}
|
||||
|
||||
// Determine if this is a block element
|
||||
bool isBlockElement =
|
||||
matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS);
|
||||
|
||||
// Compute CSS style for this element
|
||||
CssStyle cssStyle;
|
||||
if (self->cssParser) {
|
||||
// Get combined tag + class styles
|
||||
cssStyle = self->cssParser->resolveStyle(name, classAttr);
|
||||
// Merge inline style (highest priority)
|
||||
if (!styleAttr.empty()) {
|
||||
CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr);
|
||||
cssStyle.merge(inlineStyle);
|
||||
}
|
||||
}
|
||||
|
||||
if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
|
||||
self->startNewTextBlock(TextBlock::CENTER_ALIGN);
|
||||
// Headers: center aligned, bold, apply CSS overrides
|
||||
TextBlock::Style alignment = TextBlock::CENTER_ALIGN;
|
||||
if (cssStyle.hasTextAlign()) {
|
||||
switch (cssStyle.alignment) {
|
||||
case TextAlign::Left:
|
||||
alignment = TextBlock::LEFT_ALIGN;
|
||||
break;
|
||||
case TextAlign::Right:
|
||||
alignment = TextBlock::RIGHT_ALIGN;
|
||||
break;
|
||||
case TextAlign::Center:
|
||||
alignment = TextBlock::CENTER_ALIGN;
|
||||
break;
|
||||
case TextAlign::Justify:
|
||||
alignment = TextBlock::JUSTIFIED;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
self->currentBlockStyle = cssStyle;
|
||||
self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle));
|
||||
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
|
||||
self->updateEffectiveInlineStyle();
|
||||
} else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) {
|
||||
if (strcmp(name, "br") == 0) {
|
||||
self->startNewTextBlock(self->currentTextBlock->getStyle());
|
||||
} else {
|
||||
self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment);
|
||||
// Determine alignment from CSS or default
|
||||
auto alignment = static_cast<TextBlock::Style>(self->paragraphAlignment);
|
||||
if (cssStyle.hasTextAlign()) {
|
||||
switch (cssStyle.alignment) {
|
||||
case TextAlign::Left:
|
||||
alignment = TextBlock::LEFT_ALIGN;
|
||||
break;
|
||||
case TextAlign::Right:
|
||||
alignment = TextBlock::RIGHT_ALIGN;
|
||||
break;
|
||||
case TextAlign::Center:
|
||||
alignment = TextBlock::CENTER_ALIGN;
|
||||
break;
|
||||
case TextAlign::Justify:
|
||||
alignment = TextBlock::JUSTIFIED;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
self->currentBlockStyle = cssStyle;
|
||||
self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle));
|
||||
self->updateEffectiveInlineStyle();
|
||||
|
||||
if (strcmp(name, "li") == 0) {
|
||||
self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR);
|
||||
}
|
||||
}
|
||||
} else if (matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS)) {
|
||||
self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth);
|
||||
// Push inline style entry for underline tag
|
||||
StyleStackEntry entry;
|
||||
entry.depth = self->depth; // Track depth for matching pop
|
||||
entry.hasUnderline = true;
|
||||
entry.underline = true;
|
||||
if (cssStyle.hasFontWeight()) {
|
||||
entry.hasBold = true;
|
||||
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
|
||||
}
|
||||
if (cssStyle.hasFontStyle()) {
|
||||
entry.hasItalic = true;
|
||||
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
|
||||
}
|
||||
self->inlineStyleStack.push_back(entry);
|
||||
self->updateEffectiveInlineStyle();
|
||||
} else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) {
|
||||
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
|
||||
// Push inline style entry for bold tag
|
||||
StyleStackEntry entry;
|
||||
entry.depth = self->depth; // Track depth for matching pop
|
||||
entry.hasBold = true;
|
||||
entry.bold = true;
|
||||
if (cssStyle.hasFontStyle()) {
|
||||
entry.hasItalic = true;
|
||||
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
|
||||
}
|
||||
if (cssStyle.hasTextDecoration()) {
|
||||
entry.hasUnderline = true;
|
||||
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
|
||||
}
|
||||
self->inlineStyleStack.push_back(entry);
|
||||
self->updateEffectiveInlineStyle();
|
||||
} else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) {
|
||||
self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth);
|
||||
// Push inline style entry for italic tag
|
||||
StyleStackEntry entry;
|
||||
entry.depth = self->depth; // Track depth for matching pop
|
||||
entry.hasItalic = true;
|
||||
entry.italic = true;
|
||||
if (cssStyle.hasFontWeight()) {
|
||||
entry.hasBold = true;
|
||||
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
|
||||
}
|
||||
if (cssStyle.hasTextDecoration()) {
|
||||
entry.hasUnderline = true;
|
||||
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
|
||||
}
|
||||
self->inlineStyleStack.push_back(entry);
|
||||
self->updateEffectiveInlineStyle();
|
||||
} else if (strcmp(name, "span") == 0 || !isBlockElement) {
|
||||
// Handle span and other inline elements for CSS styling
|
||||
if (cssStyle.hasFontWeight() || cssStyle.hasFontStyle() || cssStyle.hasTextDecoration()) {
|
||||
StyleStackEntry entry;
|
||||
entry.depth = self->depth; // Track depth for matching pop
|
||||
if (cssStyle.hasFontWeight()) {
|
||||
entry.hasBold = true;
|
||||
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
|
||||
}
|
||||
if (cssStyle.hasFontStyle()) {
|
||||
entry.hasItalic = true;
|
||||
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
|
||||
}
|
||||
if (cssStyle.hasTextDecoration()) {
|
||||
entry.hasUnderline = true;
|
||||
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
|
||||
}
|
||||
self->inlineStyleStack.push_back(entry);
|
||||
self->updateEffectiveInlineStyle();
|
||||
}
|
||||
}
|
||||
|
||||
self->depth += 1;
|
||||
@ -149,12 +332,17 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
||||
return;
|
||||
}
|
||||
|
||||
// Determine font style from depth-based tracking and CSS effective style
|
||||
const bool isBold = self->boldUntilDepth < self->depth || self->effectiveBold;
|
||||
const bool isItalic = self->italicUntilDepth < self->depth || self->effectiveItalic;
|
||||
const bool isUnderline = self->underlineUntilDepth < self->depth || self->effectiveUnderline;
|
||||
|
||||
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
|
||||
if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) {
|
||||
if (isBold && isItalic) {
|
||||
fontStyle = EpdFontFamily::BOLD_ITALIC;
|
||||
} else if (self->boldUntilDepth < self->depth) {
|
||||
} else if (isBold) {
|
||||
fontStyle = EpdFontFamily::BOLD;
|
||||
} else if (self->italicUntilDepth < self->depth) {
|
||||
} else if (isItalic) {
|
||||
fontStyle = EpdFontFamily::ITALIC;
|
||||
}
|
||||
|
||||
@ -163,7 +351,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
||||
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
|
||||
if (self->partWordBufferIndex > 0) {
|
||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline);
|
||||
self->partWordBufferIndex = 0;
|
||||
}
|
||||
// Skip the whitespace char
|
||||
@ -202,7 +390,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
||||
// If we're about to run out of space, then cut the word off and start a new one
|
||||
if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
|
||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline);
|
||||
self->partWordBufferIndex = 0;
|
||||
}
|
||||
|
||||
@ -224,27 +412,42 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
||||
void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) {
|
||||
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
|
||||
|
||||
if (self->partWordBufferIndex > 0) {
|
||||
// Only flush out part word buffer if we're closing a block tag or are at the top of the HTML file.
|
||||
// We don't want to flush out content when closing inline tags like <span>.
|
||||
// Currently this also flushes out on closing <b> and <i> tags, but they are line tags so that shouldn't happen,
|
||||
// text styling needs to be overhauled to fix it.
|
||||
const bool shouldBreakText =
|
||||
matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) ||
|
||||
matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || self->depth == 1;
|
||||
// Check if any style state will change after we decrement depth
|
||||
// If so, we MUST flush the partWordBuffer with the CURRENT style first
|
||||
// Note: depth hasn't been decremented yet, so we check against (depth - 1)
|
||||
const bool willPopStyleStack = !self->inlineStyleStack.empty() &&
|
||||
self->inlineStyleStack.back().depth == self->depth - 1;
|
||||
const bool willClearBold = self->boldUntilDepth == self->depth - 1;
|
||||
const bool willClearItalic = self->italicUntilDepth == self->depth - 1;
|
||||
const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1;
|
||||
|
||||
const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline;
|
||||
|
||||
// Flush buffer with current style BEFORE any style changes
|
||||
if (self->partWordBufferIndex > 0) {
|
||||
// Flush if style will change OR if we're closing a block/structural element
|
||||
const bool shouldFlush = styleWillChange ||
|
||||
matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) ||
|
||||
matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) ||
|
||||
matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || self->depth == 1;
|
||||
|
||||
if (shouldFlush) {
|
||||
// Use combined depth-based and CSS-based style
|
||||
const bool isBold = self->boldUntilDepth < self->depth || self->effectiveBold;
|
||||
const bool isItalic = self->italicUntilDepth < self->depth || self->effectiveItalic;
|
||||
const bool isUnderline = self->underlineUntilDepth < self->depth || self->effectiveUnderline;
|
||||
|
||||
if (shouldBreakText) {
|
||||
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
|
||||
if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) {
|
||||
if (isBold && isItalic) {
|
||||
fontStyle = EpdFontFamily::BOLD_ITALIC;
|
||||
} else if (self->boldUntilDepth < self->depth) {
|
||||
} else if (isBold) {
|
||||
fontStyle = EpdFontFamily::BOLD;
|
||||
} else if (self->italicUntilDepth < self->depth) {
|
||||
} else if (isItalic) {
|
||||
fontStyle = EpdFontFamily::ITALIC;
|
||||
}
|
||||
|
||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline);
|
||||
self->partWordBufferIndex = 0;
|
||||
}
|
||||
}
|
||||
@ -256,15 +459,33 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
|
||||
self->skipUntilDepth = INT_MAX;
|
||||
}
|
||||
|
||||
// Leaving bold
|
||||
// Leaving bold tag
|
||||
if (self->boldUntilDepth == self->depth) {
|
||||
self->boldUntilDepth = INT_MAX;
|
||||
}
|
||||
|
||||
// Leaving italic
|
||||
// Leaving italic tag
|
||||
if (self->italicUntilDepth == self->depth) {
|
||||
self->italicUntilDepth = INT_MAX;
|
||||
}
|
||||
|
||||
// Leaving underline tag
|
||||
if (self->underlineUntilDepth == self->depth) {
|
||||
self->underlineUntilDepth = INT_MAX;
|
||||
}
|
||||
|
||||
// Pop from inline style stack if we pushed an entry at this depth
|
||||
// This handles all inline elements: b, i, u, span, etc.
|
||||
if (!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth) {
|
||||
self->inlineStyleStack.pop_back();
|
||||
self->updateEffectiveInlineStyle();
|
||||
}
|
||||
|
||||
// Clear block style when leaving block elements
|
||||
if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
|
||||
self->currentBlockStyle.reset();
|
||||
self->updateEffectiveInlineStyle();
|
||||
}
|
||||
}
|
||||
|
||||
bool ChapterHtmlSlimParser::parseAndBuildPages() {
|
||||
@ -384,10 +605,23 @@ void ChapterHtmlSlimParser::makePages() {
|
||||
}
|
||||
|
||||
const int lineHeight = renderer.getLineHeight(fontId) * lineCompression;
|
||||
|
||||
// Apply marginTop before the paragraph
|
||||
const BlockStyle& blockStyle = currentTextBlock->getBlockStyle();
|
||||
if (blockStyle.marginTop > 0) {
|
||||
currentPageNextY += lineHeight * blockStyle.marginTop;
|
||||
}
|
||||
|
||||
currentTextBlock->layoutAndExtractLines(
|
||||
renderer, fontId, viewportWidth,
|
||||
[this](const std::shared_ptr<TextBlock>& textBlock) { addLineToPage(textBlock); });
|
||||
// Extra paragraph spacing if enabled
|
||||
|
||||
// Apply marginBottom after the paragraph
|
||||
if (blockStyle.marginBottom > 0) {
|
||||
currentPageNextY += lineHeight * blockStyle.marginBottom;
|
||||
}
|
||||
|
||||
// Extra paragraph spacing if enabled (default behavior)
|
||||
if (extraParagraphSpacing) {
|
||||
currentPageNextY += lineHeight / 2;
|
||||
}
|
||||
|
||||
@ -8,6 +8,8 @@
|
||||
|
||||
#include "../ParsedText.h"
|
||||
#include "../blocks/TextBlock.h"
|
||||
#include "../css/CssParser.h"
|
||||
#include "../css/CssStyle.h"
|
||||
|
||||
class Page;
|
||||
class GfxRenderer;
|
||||
@ -23,6 +25,7 @@ class ChapterHtmlSlimParser {
|
||||
int skipUntilDepth = INT_MAX;
|
||||
int boldUntilDepth = INT_MAX;
|
||||
int italicUntilDepth = INT_MAX;
|
||||
int underlineUntilDepth = INT_MAX;
|
||||
// buffer for building up words from characters, will auto break if longer than this
|
||||
// leave one char at end for null pointer
|
||||
char partWordBuffer[MAX_WORD_SIZE + 1] = {};
|
||||
@ -36,8 +39,24 @@ class ChapterHtmlSlimParser {
|
||||
uint8_t paragraphAlignment;
|
||||
uint16_t viewportWidth;
|
||||
uint16_t viewportHeight;
|
||||
const CssParser* cssParser;
|
||||
|
||||
// Style tracking (replaces depth-based approach)
|
||||
struct StyleStackEntry {
|
||||
int depth = 0;
|
||||
bool hasBold = false, bold = false;
|
||||
bool hasItalic = false, italic = false;
|
||||
bool hasUnderline = false, underline = false;
|
||||
};
|
||||
std::vector<StyleStackEntry> inlineStyleStack;
|
||||
CssStyle currentBlockStyle;
|
||||
bool effectiveBold = false;
|
||||
bool effectiveItalic = false;
|
||||
bool effectiveUnderline = false;
|
||||
|
||||
void updateEffectiveInlineStyle();
|
||||
void startNewTextBlock(TextBlock::Style style);
|
||||
void startNewTextBlock(TextBlock::Style style, const BlockStyle& blockStyle);
|
||||
void makePages();
|
||||
// XML callbacks
|
||||
static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts);
|
||||
@ -50,7 +69,8 @@ class ChapterHtmlSlimParser {
|
||||
const uint8_t paragraphAlignment, const uint16_t viewportWidth,
|
||||
const uint16_t viewportHeight,
|
||||
const std::function<void(std::unique_ptr<Page>)>& completePageFn,
|
||||
const std::function<void(int)>& progressFn = nullptr)
|
||||
const std::function<void(int)>& progressFn = nullptr,
|
||||
const CssParser* cssParser = nullptr)
|
||||
: filepath(filepath),
|
||||
renderer(renderer),
|
||||
fontId(fontId),
|
||||
@ -60,7 +80,8 @@ class ChapterHtmlSlimParser {
|
||||
viewportWidth(viewportWidth),
|
||||
viewportHeight(viewportHeight),
|
||||
completePageFn(completePageFn),
|
||||
progressFn(progressFn) {}
|
||||
progressFn(progressFn),
|
||||
cssParser(cssParser) {}
|
||||
~ChapterHtmlSlimParser() = default;
|
||||
bool parseAndBuildPages();
|
||||
void addLineToPage(std::shared_ptr<TextBlock> line);
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
|
||||
namespace {
|
||||
constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml";
|
||||
constexpr char MEDIA_TYPE_CSS[] = "text/css";
|
||||
constexpr char itemCacheFile[] = "/.items.bin";
|
||||
} // namespace
|
||||
|
||||
@ -192,6 +193,11 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
|
||||
}
|
||||
}
|
||||
|
||||
// Collect CSS files
|
||||
if (mediaType == MEDIA_TYPE_CSS) {
|
||||
self->cssFiles.push_back(href);
|
||||
}
|
||||
|
||||
// EPUB 3: Check for nav document (properties contains "nav")
|
||||
if (!properties.empty() && self->tocNavPath.empty()) {
|
||||
// Properties is space-separated, check if "nav" is present as a word
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
#pragma once
|
||||
#include <Print.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "Epub.h"
|
||||
#include "expat.h"
|
||||
|
||||
@ -38,6 +40,7 @@ class ContentOpfParser final : public Print {
|
||||
std::string tocNavPath; // EPUB 3 nav document path
|
||||
std::string coverItemHref;
|
||||
std::string textReferenceHref;
|
||||
std::vector<std::string> cssFiles; // CSS stylesheet paths
|
||||
|
||||
explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize,
|
||||
BookMetadataCache* cache)
|
||||
|
||||
@ -449,6 +449,20 @@ int GfxRenderer::getSpaceWidth(const int fontId) const {
|
||||
return fontMap.at(fontId).getGlyph(' ', EpdFontFamily::REGULAR)->advanceX;
|
||||
}
|
||||
|
||||
int GfxRenderer::getIndentWidth(const int fontId, const char* text) const {
|
||||
if (fontMap.count(fontId) == 0) {
|
||||
Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId);
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t cp;
|
||||
int width = 0;
|
||||
while ((cp = utf8NextCodepoint(reinterpret_cast<const uint8_t**>(&text)))) {
|
||||
width += fontMap.at(fontId).getGlyph(cp, EpdFontFamily::REGULAR)->advanceX;
|
||||
}
|
||||
return width;
|
||||
}
|
||||
|
||||
int GfxRenderer::getFontAscenderSize(const int fontId) const {
|
||||
if (fontMap.count(fontId) == 0) {
|
||||
Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId);
|
||||
|
||||
@ -78,6 +78,7 @@ class GfxRenderer {
|
||||
void drawText(int fontId, int x, int y, const char* text, bool black = true,
|
||||
EpdFontFamily::Style style = EpdFontFamily::REGULAR) const;
|
||||
int getSpaceWidth(int fontId) const;
|
||||
int getIndentWidth(int fontId, const char* text) const;
|
||||
int getFontAscenderSize(int fontId) const;
|
||||
int getLineHeight(int fontId) const;
|
||||
std::string truncatedText(int fontId, const char* text, int maxWidth,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user