From 94ce987f2cfb72063e1c5223bbb57273c9eb3a26 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Sat, 17 Jan 2026 17:57:04 -0500 Subject: [PATCH 1/5] feat: Add CSS parsing and CSS support in EPUBs --- lib/Epub/Epub.cpp | 57 ++ lib/Epub/Epub.h | 5 + lib/Epub/Epub/BookMetadataCache.cpp | 24 +- lib/Epub/Epub/BookMetadataCache.h | 2 + lib/Epub/Epub/ParsedText.cpp | 32 +- lib/Epub/Epub/ParsedText.h | 12 +- lib/Epub/Epub/Section.cpp | 4 +- lib/Epub/Epub/blocks/BlockStyle.h | 17 + lib/Epub/Epub/blocks/TextBlock.cpp | 86 ++- lib/Epub/Epub/blocks/TextBlock.h | 21 +- lib/Epub/Epub/css/CssParser.cpp | 503 ++++++++++++++++++ lib/Epub/Epub/css/CssParser.h | 100 ++++ lib/Epub/Epub/css/CssStyle.h | 140 +++++ .../Epub/parsers/ChapterHtmlSlimParser.cpp | 284 +++++++++- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h | 25 +- lib/Epub/Epub/parsers/ContentOpfParser.cpp | 6 + lib/Epub/Epub/parsers/ContentOpfParser.h | 3 + lib/GfxRenderer/GfxRenderer.cpp | 14 + lib/GfxRenderer/GfxRenderer.h | 1 + 19 files changed, 1290 insertions(+), 46 deletions(-) create mode 100644 lib/Epub/Epub/blocks/BlockStyle.h create mode 100644 lib/Epub/Epub/css/CssParser.cpp create mode 100644 lib/Epub/Epub/css/CssParser.h create mode 100644 lib/Epub/Epub/css/CssStyle.h diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index 1b33772..07a7a8f 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -85,6 +85,9 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) { tocNavItem = opfParser.tocNavPath; } + // Copy CSS files to metadata + bookMetadata.cssFiles = opfParser.cssFiles; + Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis()); return true; } @@ -203,6 +206,55 @@ bool Epub::parseTocNavFile() const { return true; } +bool Epub::parseCssFiles() { + if (!bookMetadataCache || !bookMetadataCache->isLoaded()) { + Serial.printf("[%lu] [EBP] Cannot parse CSS, cache not loaded\n", millis()); + return false; + } + + // Always create CssParser - needed for inline style parsing even without CSS files + cssParser.reset(new CssParser()); + + const auto& cssFiles = bookMetadataCache->coreMetadata.cssFiles; + if (cssFiles.empty()) { + Serial.printf("[%lu] [EBP] No CSS files to parse, but CssParser created for inline styles\n", millis()); + return true; + } + + for (const auto& cssPath : cssFiles) { + Serial.printf("[%lu] [EBP] Parsing CSS file: %s\n", millis(), cssPath.c_str()); + + // Extract CSS file to temp location + const auto tmpCssPath = getCachePath() + "/.tmp.css"; + FsFile tempCssFile; + if (!SdMan.openFileForWrite("EBP", tmpCssPath, tempCssFile)) { + Serial.printf("[%lu] [EBP] Could not create temp CSS file\n", millis()); + continue; + } + if (!readItemContentsToStream(cssPath, tempCssFile, 1024)) { + Serial.printf("[%lu] [EBP] Could not read CSS file: %s\n", millis(), cssPath.c_str()); + tempCssFile.close(); + SdMan.remove(tmpCssPath.c_str()); + continue; + } + tempCssFile.close(); + + // Parse the CSS file + if (!SdMan.openFileForRead("EBP", tmpCssPath, tempCssFile)) { + Serial.printf("[%lu] [EBP] Could not open temp CSS file for reading\n", millis()); + SdMan.remove(tmpCssPath.c_str()); + continue; + } + cssParser->loadFromStream(tempCssFile); + tempCssFile.close(); + SdMan.remove(tmpCssPath.c_str()); + } + + Serial.printf("[%lu] [EBP] Loaded %zu CSS style rules from %zu files\n", millis(), cssParser->ruleCount(), + cssFiles.size()); + return true; +} + // load in the meta data for the epub file bool Epub::load(const bool buildIfMissing) { Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); @@ -212,6 +264,8 @@ bool Epub::load(const bool buildIfMissing) { // Try to load existing cache first if (bookMetadataCache->load()) { + // Parse CSS files from loaded cache + parseCssFiles(); Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); return true; } @@ -299,6 +353,9 @@ bool Epub::load(const bool buildIfMissing) { return false; } + // Parse CSS files after cache reload + parseCssFiles(); + Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); return true; } diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 91062aa..af9d0bc 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -8,6 +8,7 @@ #include #include "Epub/BookMetadataCache.h" +#include "Epub/css/CssParser.h" class ZipFile; @@ -24,11 +25,14 @@ class Epub { std::string cachePath; // Spine and TOC cache std::unique_ptr bookMetadataCache; + // CSS parser for styling + std::unique_ptr cssParser; bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseTocNcxFile() const; bool parseTocNavFile() const; + bool parseCssFiles(); public: explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { @@ -63,4 +67,5 @@ class Epub { size_t getBookSize() const; uint8_t calculateProgress(int currentSpineIndex, float currentSpineRead) const; + const CssParser* getCssParser() const { return cssParser.get(); } }; diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index 52e4809..c276c5e 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -9,7 +9,7 @@ #include "FsHelpers.h" namespace { -constexpr uint8_t BOOK_CACHE_VERSION = 4; +constexpr uint8_t BOOK_CACHE_VERSION = 5; constexpr char bookBinFile[] = "/book.bin"; constexpr char tmpSpineBinFile[] = "/spine.bin.tmp"; constexpr char tmpTocBinFile[] = "/toc.bin.tmp"; @@ -87,8 +87,13 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta constexpr uint32_t headerASize = sizeof(BOOK_CACHE_VERSION) + /* LUT Offset */ sizeof(uint32_t) + sizeof(spineCount) + sizeof(tocCount); + // Calculate CSS files size: count + each string (length + data) + uint32_t cssFilesSize = sizeof(uint16_t); // count + for (const auto& css : metadata.cssFiles) { + cssFilesSize += sizeof(uint32_t) + css.size(); + } const uint32_t metadataSize = metadata.title.size() + metadata.author.size() + metadata.coverItemHref.size() + - metadata.textReferenceHref.size() + sizeof(uint32_t) * 4; + metadata.textReferenceHref.size() + sizeof(uint32_t) * 4 + cssFilesSize; const uint32_t lutSize = sizeof(uint32_t) * spineCount + sizeof(uint32_t) * tocCount; const uint32_t lutOffset = headerASize + metadataSize; @@ -102,6 +107,11 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta serialization::writeString(bookFile, metadata.author); serialization::writeString(bookFile, metadata.coverItemHref); serialization::writeString(bookFile, metadata.textReferenceHref); + // CSS files + serialization::writePod(bookFile, static_cast(metadata.cssFiles.size())); + for (const auto& css : metadata.cssFiles) { + serialization::writeString(bookFile, css); + } // Loop through spine entries, writing LUT positions spineFile.seek(0); @@ -291,6 +301,16 @@ bool BookMetadataCache::load() { serialization::readString(bookFile, coreMetadata.author); serialization::readString(bookFile, coreMetadata.coverItemHref); serialization::readString(bookFile, coreMetadata.textReferenceHref); + // CSS files + uint16_t cssCount; + serialization::readPod(bookFile, cssCount); + coreMetadata.cssFiles.clear(); + coreMetadata.cssFiles.reserve(cssCount); + for (uint16_t i = 0; i < cssCount; i++) { + std::string cssPath; + serialization::readString(bookFile, cssPath); + coreMetadata.cssFiles.push_back(std::move(cssPath)); + } loaded = true; Serial.printf("[%lu] [BMC] Loaded cache data: %d spine, %d TOC entries\n", millis(), spineCount, tocCount); diff --git a/lib/Epub/Epub/BookMetadataCache.h b/lib/Epub/Epub/BookMetadataCache.h index 5f1862c..7d703ad 100644 --- a/lib/Epub/Epub/BookMetadataCache.h +++ b/lib/Epub/Epub/BookMetadataCache.h @@ -3,6 +3,7 @@ #include #include +#include class BookMetadataCache { public: @@ -11,6 +12,7 @@ class BookMetadataCache { std::string author; std::string coverItemHref; std::string textReferenceHref; + std::vector cssFiles; }; struct SpineEntry { diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 3c37e31..634f141 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -10,11 +10,12 @@ constexpr int MAX_COST = std::numeric_limits::max(); -void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) { +void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline) { if (word.empty()) return; words.push_back(std::move(word)); wordStyles.push_back(fontStyle); + wordUnderlines.push_back(underline); } // Consumes data to minimize memory usage @@ -42,17 +43,33 @@ std::vector ParsedText::calculateWordWidths(const GfxRenderer& rendere std::vector wordWidths; wordWidths.reserve(totalWordCount); - // add em-space at the beginning of first word in paragraph to indent - if ((style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) && !extraParagraphSpacing) { + // Apply text indent: either from CSS blockStyle or default em-space for justified/left-aligned + const bool shouldIndent = (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) && !extraParagraphSpacing; + if (blockStyle.textIndent > 0) { + // CSS text-indent is handled via first word width adjustment + // We'll add the indent value directly to the first word's width + } else if (shouldIndent) { + // Default: add em-space at the beginning of first word in paragraph to indent std::string& first_word = words.front(); first_word.insert(0, "\xe2\x80\x83"); } auto wordsIt = words.begin(); auto wordStylesIt = wordStyles.begin(); + bool isFirst = true; while (wordsIt != words.end()) { - wordWidths.push_back(renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt)); + uint16_t width = renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt); + + // Add CSS text-indent to first word width + if (isFirst && blockStyle.textIndent > 0 && shouldIndent) { + width += static_cast(blockStyle.textIndent); + isFirst = false; + } else { + isFirst = false; + } + + wordWidths.push_back(width); std::advance(wordsIt, 1); std::advance(wordStylesIt, 1); @@ -182,14 +199,19 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const // Iterators always start at the beginning as we are moving content with splice below auto wordEndIt = words.begin(); auto wordStyleEndIt = wordStyles.begin(); + auto wordUnderlineEndIt = wordUnderlines.begin(); std::advance(wordEndIt, lineWordCount); std::advance(wordStyleEndIt, lineWordCount); + std::advance(wordUnderlineEndIt, lineWordCount); // *** CRITICAL STEP: CONSUME DATA USING SPLICE *** std::list lineWords; lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt); std::list lineWordStyles; lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt); + std::list lineWordUnderlines; + lineWordUnderlines.splice(lineWordUnderlines.begin(), wordUnderlines, wordUnderlines.begin(), wordUnderlineEndIt); - processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style)); + processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style, + blockStyle, std::move(lineWordUnderlines))); } diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index 4b851a9..6f417dc 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -8,6 +8,7 @@ #include #include +#include "blocks/BlockStyle.h" #include "blocks/TextBlock.h" class GfxRenderer; @@ -15,7 +16,9 @@ class GfxRenderer; class ParsedText { std::list words; std::list wordStyles; + std::list wordUnderlines; // Track underline per word TextBlock::Style style; + BlockStyle blockStyle; bool extraParagraphSpacing; std::vector computeLineBreaks(int pageWidth, int spaceWidth, const std::vector& wordWidths) const; @@ -25,13 +28,16 @@ class ParsedText { std::vector calculateWordWidths(const GfxRenderer& renderer, int fontId); public: - explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing) - : style(style), extraParagraphSpacing(extraParagraphSpacing) {} + explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing, + const BlockStyle& blockStyle = BlockStyle()) + : style(style), blockStyle(blockStyle), extraParagraphSpacing(extraParagraphSpacing) {} ~ParsedText() = default; - void addWord(std::string word, EpdFontFamily::Style fontStyle); + void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false); void setStyle(const TextBlock::Style style) { this->style = style; } + void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } TextBlock::Style getStyle() const { return style; } + const BlockStyle& getBlockStyle() const { return blockStyle; } size_t size() const { return words.size(); } bool isEmpty() const { return words.empty(); } void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth, diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index 18b81aa..d240432 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -7,7 +7,7 @@ #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 9; +constexpr uint8_t SECTION_FILE_VERSION = 10; constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t); } // namespace @@ -179,7 +179,7 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, viewportHeight, [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, - progressFn); + progressFn, epub->getCssParser()); success = visitor.parseAndBuildPages(); SdMan.remove(tmpHtmlPath.c_str()); diff --git a/lib/Epub/Epub/blocks/BlockStyle.h b/lib/Epub/Epub/blocks/BlockStyle.h new file mode 100644 index 0000000..2b073b6 --- /dev/null +++ b/lib/Epub/Epub/blocks/BlockStyle.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +/** + * BlockStyle - Block-level CSS properties for paragraphs + * + * Used to track margin/padding spacing and text indentation for block elements. + * Padding is treated similarly to margins for rendering purposes. + */ +struct BlockStyle { + int8_t marginTop = 0; // 0-2 lines + int8_t marginBottom = 0; // 0-2 lines + int8_t paddingTop = 0; // 0-2 lines (treated same as margin) + int8_t paddingBottom = 0; // 0-2 lines (treated same as margin) + int16_t textIndent = 0; // pixels +}; diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index 2a15aef..4fa7da7 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -14,13 +14,40 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int auto wordIt = words.begin(); auto wordStylesIt = wordStyles.begin(); auto wordXposIt = wordXpos.begin(); - + auto wordUnderlineIt = wordUnderlines.begin(); for (size_t i = 0; i < words.size(); i++) { - renderer.drawText(fontId, *wordXposIt + x, y, wordIt->c_str(), true, *wordStylesIt); + const int wordX = *wordXposIt + x; + renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, *wordStylesIt); + + // Draw underline if word is underlined + if (wordUnderlineIt != wordUnderlines.end() && *wordUnderlineIt) { + const std::string& w = *wordIt; + const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), *wordStylesIt); + // y is the top of the text line; add ascender to reach baseline, then offset 2px below + const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2; + + int startX = wordX; + int underlineWidth = fullWordWidth; + + // if word starts with em-space ("\xe2\x80\x83"), account for the additional indent before drawing the line + if (w.size() >= 3 && static_cast(w[0]) == 0xE2 && static_cast(w[1]) == 0x80 && + static_cast(w[2]) == 0x83) { + const char* visiblePtr = w.c_str() + 3; + const int prefixWidth = renderer.getIndentWidth(fontId, std::string("\xe2\x80\x83").c_str()); + const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, *wordStylesIt); + startX = wordX + prefixWidth; + underlineWidth = visibleWidth; + } + + renderer.drawLine(startX, underlineY, startX + underlineWidth, underlineY, true); + } std::advance(wordIt, 1); std::advance(wordStylesIt, 1); std::advance(wordXposIt, 1); + if (wordUnderlineIt != wordUnderlines.end()) { + std::advance(wordUnderlineIt, 1); + } } } @@ -37,9 +64,35 @@ bool TextBlock::serialize(FsFile& file) const { for (auto x : wordXpos) serialization::writePod(file, x); for (auto s : wordStyles) serialization::writePod(file, s); - // Block style + // Underline flags (packed as bytes, 8 words per byte) + uint8_t underlineByte = 0; + int bitIndex = 0; + auto underlineIt = wordUnderlines.begin(); + for (size_t i = 0; i < words.size(); i++) { + if (underlineIt != wordUnderlines.end() && *underlineIt) { + underlineByte |= 1 << bitIndex; + } + bitIndex++; + if (bitIndex == 8 || i == words.size() - 1) { + serialization::writePod(file, underlineByte); + underlineByte = 0; + bitIndex = 0; + } + if (underlineIt != wordUnderlines.end()) { + ++underlineIt; + } + } + + // Block style (alignment) serialization::writePod(file, style); + // Block style (margins/padding/indent) + serialization::writePod(file, blockStyle.marginTop); + serialization::writePod(file, blockStyle.marginBottom); + serialization::writePod(file, blockStyle.paddingTop); + serialization::writePod(file, blockStyle.paddingBottom); + serialization::writePod(file, blockStyle.textIndent); + return true; } @@ -48,7 +101,9 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { std::list words; std::list wordXpos; std::list wordStyles; + std::list wordUnderlines; Style style; + BlockStyle blockStyle; // Word count serialization::readPod(file, wc); @@ -67,8 +122,29 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { for (auto& x : wordXpos) serialization::readPod(file, x); for (auto& s : wordStyles) serialization::readPod(file, s); - // Block style + // Underline flags (packed as bytes, 8 words per byte) + wordUnderlines.resize(wc, false); + auto underlineIt = wordUnderlines.begin(); + const int bytesNeeded = (wc + 7) / 8; + for (int byteIdx = 0; byteIdx < bytesNeeded; byteIdx++) { + uint8_t underlineByte; + serialization::readPod(file, underlineByte); + for (int bit = 0; bit < 8 && underlineIt != wordUnderlines.end(); bit++) { + *underlineIt = (underlineByte & 1 << bit) != 0; + ++underlineIt; + } + } + + // Block style (alignment) serialization::readPod(file, style); - return std::unique_ptr(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style)); + // Block style (margins/padding/indent) + serialization::readPod(file, blockStyle.marginTop); + serialization::readPod(file, blockStyle.marginBottom); + serialization::readPod(file, blockStyle.paddingTop); + serialization::readPod(file, blockStyle.paddingBottom); + serialization::readPod(file, blockStyle.textIndent); + + return std::unique_ptr(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style, + blockStyle, std::move(wordUnderlines))); } diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index 415a18f..68f5c7f 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -7,6 +7,7 @@ #include #include "Block.h" +#include "BlockStyle.h" // Represents a line of text on a page class TextBlock final : public Block { @@ -22,15 +23,31 @@ class TextBlock final : public Block { std::list words; std::list wordXpos; std::list wordStyles; + std::list wordUnderlines; // Track underline per word Style style; + BlockStyle blockStyle; public: explicit TextBlock(std::list words, std::list word_xpos, - std::list word_styles, const Style style) - : words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), style(style) {} + std::list word_styles, const Style style, + const BlockStyle& blockStyle = BlockStyle(), + std::list word_underlines = std::list()) + : words(std::move(words)), + wordXpos(std::move(word_xpos)), + wordStyles(std::move(word_styles)), + wordUnderlines(std::move(word_underlines)), + style(style), + blockStyle(blockStyle) { + // Ensure underlines list matches words list size + while (this->wordUnderlines.size() < this->words.size()) { + this->wordUnderlines.push_back(false); + } + } ~TextBlock() override = default; void setStyle(const Style style) { this->style = style; } + void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } Style getStyle() const { return style; } + const BlockStyle& getBlockStyle() const { return blockStyle; } bool isEmpty() override { return words.empty(); } void layout(GfxRenderer& renderer) override {}; // given a renderer works out where to break the words into lines diff --git a/lib/Epub/Epub/css/CssParser.cpp b/lib/Epub/Epub/css/CssParser.cpp new file mode 100644 index 0000000..7ef3f83 --- /dev/null +++ b/lib/Epub/Epub/css/CssParser.cpp @@ -0,0 +1,503 @@ +#include "CssParser.h" + +#include + +#include +#include + +namespace { + +// Buffer size for reading CSS files +constexpr size_t READ_BUFFER_SIZE = 512; + +// Maximum CSS file size we'll process (prevent memory issues) +constexpr size_t MAX_CSS_SIZE = 64 * 1024; + +// Check if character is CSS whitespace +bool isCssWhitespace(const char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; +} + +// Read entire file into string (with size limit) +std::string readFileContent(FsFile& file) { + std::string content; + content.reserve(std::min(static_cast(file.size()), MAX_CSS_SIZE)); + + char buffer[READ_BUFFER_SIZE]; + while (file.available() && content.size() < MAX_CSS_SIZE) { + const int bytesRead = file.read(buffer, sizeof(buffer)); + if (bytesRead <= 0) break; + content.append(buffer, bytesRead); + } + return content; +} + +// Remove CSS comments (/* ... */) from content +std::string stripComments(const std::string& css) { + std::string result; + result.reserve(css.size()); + + size_t pos = 0; + while (pos < css.size()) { + // Look for start of comment + if (pos + 1 < css.size() && css[pos] == '/' && css[pos + 1] == '*') { + // Find end of comment + const size_t endPos = css.find("*/", pos + 2); + if (endPos == std::string::npos) { + // Unterminated comment - skip rest of file + break; + } + pos = endPos + 2; + } else { + result.push_back(css[pos]); + ++pos; + } + } + return result; +} + +// Skip @-rules (like @media, @import, @font-face) +// Returns position after the @-rule +size_t skipAtRule(const std::string& css, const size_t start) { + // Find the end - either semicolon (simple @-rule) or matching brace + size_t pos = start + 1; // Skip the '@' + + // Skip identifier + while (pos < css.size() && (std::isalnum(css[pos]) || css[pos] == '-')) { + ++pos; + } + + // Look for { or ; + int braceDepth = 0; + while (pos < css.size()) { + const char c = css[pos]; + if (c == '{') { + ++braceDepth; + } else if (c == '}') { + --braceDepth; + if (braceDepth == 0) { + return pos + 1; + } + } else if (c == ';' && braceDepth == 0) { + return pos + 1; + } + ++pos; + } + return css.size(); +} + +// Extract next rule from CSS content +// Returns true if a rule was found, with selector and body filled +bool extractNextRule(const std::string& css, size_t& pos, + std::string& selector, std::string& body) { + selector.clear(); + body.clear(); + + // Skip whitespace and @-rules until we find a regular rule + while (pos < css.size()) { + // Skip whitespace + while (pos < css.size() && isCssWhitespace(css[pos])) { + ++pos; + } + + if (pos >= css.size()) return false; + + // Handle @-rules iteratively (avoids recursion/stack overflow) + if (css[pos] == '@') { + pos = skipAtRule(css, pos); + continue; // Try again after skipping the @-rule + } + + break; // Found start of a regular rule + } + + if (pos >= css.size()) return false; + + // Find opening brace + const size_t bracePos = css.find('{', pos); + if (bracePos == std::string::npos) return false; + + // Extract selector (everything before the brace) + selector = css.substr(pos, bracePos - pos); + + // Find matching closing brace + int depth = 1; + const size_t bodyStart = bracePos + 1; + size_t bodyEnd = bodyStart; + + while (bodyEnd < css.size() && depth > 0) { + if (css[bodyEnd] == '{') ++depth; + else if (css[bodyEnd] == '}') --depth; + ++bodyEnd; + } + + // Extract body (between braces) + if (bodyEnd > bodyStart) { + body = css.substr(bodyStart, bodyEnd - bodyStart - 1); + } + + pos = bodyEnd; + return true; +} + +} // anonymous namespace + +// String utilities implementation + +std::string CssParser::normalized(const std::string& s) { + std::string result; + result.reserve(s.size()); + + bool inSpace = true; // Start true to skip leading space + for (const char c : s) { + if (isCssWhitespace(c)) { + if (!inSpace) { + result.push_back(' '); + inSpace = true; + } + } else { + result.push_back(static_cast(std::tolower(static_cast(c)))); + inSpace = false; + } + } + + // Remove trailing space + if (!result.empty() && result.back() == ' ') { + result.pop_back(); + } + return result; +} + +std::vector CssParser::splitOnChar(const std::string& s, const char delimiter) { + std::vector parts; + size_t start = 0; + + for (size_t i = 0; i <= s.size(); ++i) { + if (i == s.size() || s[i] == delimiter) { + std::string part = s.substr(start, i - start); + std::string trimmed = normalized(part); + if (!trimmed.empty()) { + parts.push_back(trimmed); + } + start = i + 1; + } + } + return parts; +} + +std::vector CssParser::splitWhitespace(const std::string& s) { + std::vector parts; + size_t start = 0; + bool inWord = false; + + for (size_t i = 0; i <= s.size(); ++i) { + const bool isSpace = i == s.size() || isCssWhitespace(s[i]); + if (isSpace && inWord) { + parts.push_back(s.substr(start, i - start)); + inWord = false; + } else if (!isSpace && !inWord) { + start = i; + inWord = true; + } + } + return parts; +} + +// Property value interpreters + +TextAlign CssParser::interpretAlignment(const std::string& val) { + const std::string v = normalized(val); + + if (v == "left" || v == "start") return TextAlign::Left; + if (v == "right" || v == "end") return TextAlign::Right; + if (v == "center") return TextAlign::Center; + if (v == "justify") return TextAlign::Justify; + + return TextAlign::None; +} + +CssFontStyle CssParser::interpretFontStyle(const std::string& val) { + const std::string v = normalized(val); + + if (v == "italic" || v == "oblique") return CssFontStyle::Italic; + return CssFontStyle::Normal; +} + +CssFontWeight CssParser::interpretFontWeight(const std::string& val) { + const std::string v = normalized(val); + + // Named values + if (v == "bold" || v == "bolder") return CssFontWeight::Bold; + if (v == "normal" || v == "lighter") return CssFontWeight::Normal; + + // Numeric values: 100-900 + // CSS spec: 400 = normal, 700 = bold + // We use: 0-400 = normal, 700+ = bold, 500-600 = normal (conservative) + char* endPtr = nullptr; + const long numericWeight = std::strtol(v.c_str(), &endPtr, 10); + + // If we parsed a number and consumed the whole string + if (endPtr != v.c_str() && *endPtr == '\0') { + return numericWeight >= 700 ? CssFontWeight::Bold : CssFontWeight::Normal; + } + + return CssFontWeight::Normal; +} + +CssTextDecoration CssParser::interpretDecoration(const std::string& val) { + const std::string v = normalized(val); + + // text-decoration can have multiple space-separated values + if (v.find("underline") != std::string::npos) { + return CssTextDecoration::Underline; + } + return CssTextDecoration::None; +} + +float CssParser::interpretLength(const std::string& val, const float emSize) { + const std::string v = normalized(val); + if (v.empty()) return 0.0f; + + // Determine unit and multiplier + float multiplier = 1.0f; + size_t unitStart = v.size(); + + // Find where the number ends + for (size_t i = 0; i < v.size(); ++i) { + const char c = v[i]; + if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') { + unitStart = i; + break; + } + } + + const std::string numPart = v.substr(0, unitStart); + const std::string unitPart = v.substr(unitStart); + + // Handle units + if (unitPart == "em" || unitPart == "rem") { + multiplier = emSize; + } else if (unitPart == "pt") { + multiplier = 1.33f; // Approximate pt to px conversion + } + // px is default (multiplier = 1.0) + + char* endPtr = nullptr; + const float numericValue = std::strtof(numPart.c_str(), &endPtr); + + if (endPtr == numPart.c_str()) return 0.0f; // No number parsed + + return numericValue * multiplier; +} + +int8_t CssParser::interpretSpacing(const std::string& val) { + const std::string v = normalized(val); + if (v.empty()) return 0; + + // For spacing, we convert to "lines" (discrete units for e-ink) + // 1em ≈ 1 line, percentages based on ~30 lines per page + + float multiplier = 0.0f; + size_t unitStart = v.size(); + + for (size_t i = 0; i < v.size(); ++i) { + const char c = v[i]; + if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') { + unitStart = i; + break; + } + } + + const std::string numPart = v.substr(0, unitStart); + const std::string unitPart = v.substr(unitStart); + + if (unitPart == "em" || unitPart == "rem") { + multiplier = 1.0f; // 1em = 1 line + } else if (unitPart == "%") { + multiplier = 0.3f; // ~30 lines per page, so 10% = 3 lines + } else { + return 0; // Unsupported unit for spacing + } + + char* endPtr = nullptr; + const float numericValue = std::strtof(numPart.c_str(), &endPtr); + + if (endPtr == numPart.c_str()) return 0; + + int lines = static_cast(numericValue * multiplier); + + // Clamp to reasonable range (0-2 lines) + if (lines < 0) lines = 0; + if (lines > 2) lines = 2; + + return static_cast(lines); +} + +// Declaration parsing + +CssStyle CssParser::parseDeclarations(const std::string& declBlock) { + CssStyle style; + + // Split declarations by semicolon + const auto declarations = splitOnChar(declBlock, ';'); + + for (const auto& decl : declarations) { + // Find colon separator + const size_t colonPos = decl.find(':'); + if (colonPos == std::string::npos || colonPos == 0) continue; + + std::string propName = normalized(decl.substr(0, colonPos)); + std::string propValue = normalized(decl.substr(colonPos + 1)); + + if (propName.empty() || propValue.empty()) continue; + + // Match property and set value + if (propName == "text-align") { + const TextAlign align = interpretAlignment(propValue); + if (align != TextAlign::None) { + style.alignment = align; + style.defined.alignment = 1; + } + } else if (propName == "font-style") { + style.fontStyle = interpretFontStyle(propValue); + style.defined.fontStyle = 1; + } else if (propName == "font-weight") { + style.fontWeight = interpretFontWeight(propValue); + style.defined.fontWeight = 1; + } else if (propName == "text-decoration" || propName == "text-decoration-line") { + style.decoration = interpretDecoration(propValue); + style.defined.decoration = 1; + } else if (propName == "text-indent") { + style.indentPixels = interpretLength(propValue); + style.defined.indent = 1; + } else if (propName == "margin-top") { + const int8_t spacing = interpretSpacing(propValue); + if (spacing > 0) { + style.marginTop = spacing; + style.defined.marginTop = 1; + } + } else if (propName == "margin-bottom") { + const int8_t spacing = interpretSpacing(propValue); + if (spacing > 0) { + style.marginBottom = spacing; + style.defined.marginBottom = 1; + } + } else if (propName == "padding-top") { + const int8_t spacing = interpretSpacing(propValue); + if (spacing > 0) { + style.paddingTop = spacing; + style.defined.paddingTop = 1; + } + } else if (propName == "padding-bottom") { + const int8_t spacing = interpretSpacing(propValue); + if (spacing > 0) { + style.paddingBottom = spacing; + style.defined.paddingBottom = 1; + } + } + } + + return style; +} + +// Rule processing + +void CssParser::processRuleBlock(const std::string& selectorGroup, + const std::string& declarations) { + const CssStyle style = parseDeclarations(declarations); + + // Only store if any properties were set + if (!style.defined.anySet()) return; + + // Handle comma-separated selectors + const auto selectors = splitOnChar(selectorGroup, ','); + + for (const auto& sel : selectors) { + // Normalize the selector + std::string key = normalized(sel); + if (key.empty()) continue; + + // Store or merge with existing + auto it = rulesBySelector_.find(key); + if (it != rulesBySelector_.end()) { + it->second.applyOver(style); + } else { + rulesBySelector_[key] = style; + } + } +} + +// Main parsing entry point + +bool CssParser::loadFromStream(FsFile& source) { + if (!source) { + Serial.printf("[%lu] [CSS] Cannot read from invalid file\n", millis()); + return false; + } + + // Read file content + const std::string content = readFileContent(source); + if (content.empty()) { + return true; // Empty file is valid + } + + // Remove comments + const std::string cleaned = stripComments(content); + + // Parse rules + size_t pos = 0; + std::string selector, body; + + while (extractNextRule(cleaned, pos, selector, body)) { + processRuleBlock(selector, body); + } + + Serial.printf("[%lu] [CSS] Parsed %zu rules\n", millis(), rulesBySelector_.size()); + return true; +} + +// Style resolution + +CssStyle CssParser::resolveStyle(const std::string& tagName, + const std::string& classAttr) const { + CssStyle result; + const std::string tag = normalized(tagName); + + // 1. Apply element-level style (lowest priority) + const auto tagIt = rulesBySelector_.find(tag); + if (tagIt != rulesBySelector_.end()) { + result.applyOver(tagIt->second); + } + + // 2. Apply class styles (medium priority) + if (!classAttr.empty()) { + const auto classes = splitWhitespace(classAttr); + + for (const auto& cls : classes) { + std::string classKey = "." + normalized(cls); + + auto classIt = rulesBySelector_.find(classKey); + if (classIt != rulesBySelector_.end()) { + result.applyOver(classIt->second); + } + } + + // 3. Apply element.class styles (higher priority) + for (const auto& cls : classes) { + std::string combinedKey = tag + "." + normalized(cls); + + auto combinedIt = rulesBySelector_.find(combinedKey); + if (combinedIt != rulesBySelector_.end()) { + result.applyOver(combinedIt->second); + } + } + } + + return result; +} + +// Inline style parsing (static - doesn't need rule database) + +CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { + return parseDeclarations(styleValue); +} diff --git a/lib/Epub/Epub/css/CssParser.h b/lib/Epub/Epub/css/CssParser.h new file mode 100644 index 0000000..a10e902 --- /dev/null +++ b/lib/Epub/Epub/css/CssParser.h @@ -0,0 +1,100 @@ +#pragma once + +#include + +#include +#include +#include + +#include "CssStyle.h" + +/** + * Lightweight CSS parser for EPUB stylesheets + * + * Parses CSS files and extracts styling information relevant for e-ink display. + * Uses a two-phase approach: first tokenizes the CSS content, then builds + * a rule database that can be queried during HTML parsing. + * + * Supported selectors: + * - Element selectors: p, div, h1, etc. + * - Class selectors: .classname + * - Combined: element.classname + * - Grouped: selector1, selector2 { } + * + * Not supported (silently ignored): + * - Descendant/child selectors + * - Pseudo-classes and pseudo-elements + * - Media queries (content is skipped) + * - @import, @font-face, etc. + */ +class CssParser { + public: + CssParser() = default; + ~CssParser() = default; + + // Non-copyable + CssParser(const CssParser&) = delete; + CssParser& operator=(const CssParser&) = delete; + + /** + * Load and parse CSS from a file stream. + * Can be called multiple times to accumulate rules from multiple stylesheets. + * @param source Open file handle to read from + * @return true if parsing completed (even if no rules found) + */ + bool loadFromStream(FsFile& source); + + /** + * Look up the style for an HTML element, considering tag name and class attributes. + * Applies CSS cascade: element style < class style < element.class style + * + * @param tagName The HTML element name (e.g., "p", "div") + * @param classAttr The class attribute value (may contain multiple space-separated classes) + * @return Combined style with all applicable rules merged + */ + [[nodiscard]] CssStyle resolveStyle(const std::string& tagName, + const std::string& classAttr) const; + + /** + * Parse an inline style attribute string. + * @param styleValue The value of a style="" attribute + * @return Parsed style properties + */ + [[nodiscard]] static CssStyle parseInlineStyle(const std::string& styleValue); + + /** + * Check if any rules have been loaded + */ + [[nodiscard]] bool empty() const { return rulesBySelector_.empty(); } + + /** + * Get count of loaded rule sets + */ + [[nodiscard]] size_t ruleCount() const { return rulesBySelector_.size(); } + + /** + * Clear all loaded rules + */ + void clear() { rulesBySelector_.clear(); } + + private: + // Storage: maps normalized selector -> style properties + std::unordered_map rulesBySelector_; + + // Internal parsing helpers + void processRuleBlock(const std::string& selectorGroup, const std::string& declarations); + static CssStyle parseDeclarations(const std::string& declBlock); + + // Individual property value parsers + static TextAlign interpretAlignment(const std::string& val); + static CssFontStyle interpretFontStyle(const std::string& val); + static CssFontWeight interpretFontWeight(const std::string& val); + static CssTextDecoration interpretDecoration(const std::string& val); + static float interpretLength(const std::string& val, float emSize = 16.0f); + static int8_t interpretSpacing(const std::string& val); + + // String utilities + static std::string normalized(const std::string& s); + static std::vector splitOnChar(const std::string& s, char delimiter); + static std::vector splitWhitespace(const std::string& s); +}; diff --git a/lib/Epub/Epub/css/CssStyle.h b/lib/Epub/Epub/css/CssStyle.h new file mode 100644 index 0000000..ea6a9d3 --- /dev/null +++ b/lib/Epub/Epub/css/CssStyle.h @@ -0,0 +1,140 @@ +#pragma once + +#include + +// Text alignment options matching CSS text-align property +enum class TextAlign : uint8_t { + None = 0, + Left = 1, + Right = 2, + Center = 3, + Justify = 4 +}; + +// Font style options matching CSS font-style property +enum class CssFontStyle : uint8_t { + Normal = 0, + Italic = 1 +}; + +// Font weight options - CSS supports 100-900, we simplify to normal/bold +enum class CssFontWeight : uint8_t { + Normal = 0, + Bold = 1 +}; + +// Text decoration options +enum class CssTextDecoration : uint8_t { + None = 0, + Underline = 1 +}; + +// Bitmask for tracking which properties have been explicitly set +struct CssPropertyFlags { + uint16_t alignment : 1; + uint16_t fontStyle : 1; + uint16_t fontWeight : 1; + uint16_t decoration : 1; + uint16_t indent : 1; + uint16_t marginTop : 1; + uint16_t marginBottom : 1; + uint16_t paddingTop : 1; + uint16_t paddingBottom : 1; + uint16_t reserved : 7; + + CssPropertyFlags() : alignment(0), fontStyle(0), fontWeight(0), decoration(0), + indent(0), marginTop(0), marginBottom(0), + paddingTop(0), paddingBottom(0), reserved(0) {} + + [[nodiscard]] bool anySet() const { + return alignment || fontStyle || fontWeight || decoration || + indent || marginTop || marginBottom || paddingTop || paddingBottom; + } + + void clearAll() { + alignment = fontStyle = fontWeight = decoration = indent = 0; + marginTop = marginBottom = paddingTop = paddingBottom = 0; + } +}; + +// Represents a collection of CSS style properties +// Only stores properties relevant to e-ink text rendering +struct CssStyle { + TextAlign alignment = TextAlign::None; + CssFontStyle fontStyle = CssFontStyle::Normal; + CssFontWeight fontWeight = CssFontWeight::Normal; + CssTextDecoration decoration = CssTextDecoration::None; + + float indentPixels = 0.0f; // First-line indent in pixels + int8_t marginTop = 0; // Vertical spacing before block (in lines, 0-2) + int8_t marginBottom = 0; // Vertical spacing after block (in lines, 0-2) + int8_t paddingTop = 0; // Padding before (in lines, 0-2) + int8_t paddingBottom = 0; // Padding after (in lines, 0-2) + + CssPropertyFlags defined; // Tracks which properties were explicitly set + + // Apply properties from another style, only overwriting if the other style + // has that property explicitly defined + void applyOver(const CssStyle& base) { + if (base.defined.alignment) { + alignment = base.alignment; + defined.alignment = 1; + } + if (base.defined.fontStyle) { + fontStyle = base.fontStyle; + defined.fontStyle = 1; + } + if (base.defined.fontWeight) { + fontWeight = base.fontWeight; + defined.fontWeight = 1; + } + if (base.defined.decoration) { + decoration = base.decoration; + defined.decoration = 1; + } + if (base.defined.indent) { + indentPixels = base.indentPixels; + defined.indent = 1; + } + if (base.defined.marginTop) { + marginTop = base.marginTop; + defined.marginTop = 1; + } + if (base.defined.marginBottom) { + marginBottom = base.marginBottom; + defined.marginBottom = 1; + } + if (base.defined.paddingTop) { + paddingTop = base.paddingTop; + defined.paddingTop = 1; + } + if (base.defined.paddingBottom) { + paddingBottom = base.paddingBottom; + defined.paddingBottom = 1; + } + } + + // Compatibility accessors for existing code that uses hasX pattern + [[nodiscard]] bool hasTextAlign() const { return defined.alignment; } + [[nodiscard]] bool hasFontStyle() const { return defined.fontStyle; } + [[nodiscard]] bool hasFontWeight() const { return defined.fontWeight; } + [[nodiscard]] bool hasTextDecoration() const { return defined.decoration; } + [[nodiscard]] bool hasTextIndent() const { return defined.indent; } + [[nodiscard]] bool hasMarginTop() const { return defined.marginTop; } + [[nodiscard]] bool hasMarginBottom() const { return defined.marginBottom; } + [[nodiscard]] bool hasPaddingTop() const { return defined.paddingTop; } + [[nodiscard]] bool hasPaddingBottom() const { return defined.paddingBottom; } + + // Merge another style (alias for applyOver for compatibility) + void merge(const CssStyle& other) { applyOver(other); } + + void reset() { + alignment = TextAlign::None; + fontStyle = CssFontStyle::Normal; + fontWeight = CssFontWeight::Normal; + decoration = CssTextDecoration::None; + indentPixels = 0.0f; + marginTop = marginBottom = paddingTop = paddingBottom = 0; + defined.clearAll(); + } +}; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index acddd81..0143a56 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -22,6 +22,9 @@ constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]); const char* ITALIC_TAGS[] = {"i", "em"}; constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]); +const char* UNDERLINE_TAGS[] = {"u", "ins"}; +constexpr int NUM_UNDERLINE_TAGS = sizeof(UNDERLINE_TAGS) / sizeof(UNDERLINE_TAGS[0]); + const char* IMAGE_TAGS[] = {"img"}; constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]); @@ -40,18 +43,55 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } +// Create a BlockStyle from CSS style properties +BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle) { + BlockStyle blockStyle; + blockStyle.marginTop = static_cast(cssStyle.marginTop + cssStyle.paddingTop); + blockStyle.marginBottom = static_cast(cssStyle.marginBottom + cssStyle.paddingBottom); + blockStyle.paddingTop = cssStyle.paddingTop; + blockStyle.paddingBottom = cssStyle.paddingBottom; + blockStyle.textIndent = static_cast(cssStyle.indentPixels); + return blockStyle; +} + +// Update effective bold/italic/underline based on block style and inline style stack +void ChapterHtmlSlimParser::updateEffectiveInlineStyle() { + // Start with block-level styles + effectiveBold = currentBlockStyle.hasFontWeight() && currentBlockStyle.fontWeight == CssFontWeight::Bold; + effectiveItalic = currentBlockStyle.hasFontStyle() && currentBlockStyle.fontStyle == CssFontStyle::Italic; + effectiveUnderline = currentBlockStyle.hasTextDecoration() && currentBlockStyle.decoration == CssTextDecoration::Underline; + + // Apply inline style stack in order + for (const auto& entry : inlineStyleStack) { + if (entry.hasBold) { + effectiveBold = entry.bold; + } + if (entry.hasItalic) { + effectiveItalic = entry.italic; + } + if (entry.hasUnderline) { + effectiveUnderline = entry.underline; + } + } +} + // start a new text block if needed -void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { +void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) { if (currentTextBlock) { // already have a text block running and it is empty - just reuse it if (currentTextBlock->isEmpty()) { currentTextBlock->setStyle(style); + currentTextBlock->setBlockStyle(blockStyle); return; } makePages(); } - currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing)); + currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, blockStyle)); +} + +void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { + startNewTextBlock(style, BlockStyle{}); } void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { @@ -63,6 +103,19 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* return; } + // Extract class and style attributes for CSS processing + std::string classAttr; + std::string styleAttr; + if (atts != nullptr) { + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], "class") == 0) { + classAttr = atts[i + 1]; + } else if (strcmp(atts[i], "style") == 0) { + styleAttr = atts[i + 1]; + } + } + } + // Special handling for tables - show placeholder text instead of dropping silently if (strcmp(name, "table") == 0) { // Add placeholder text @@ -120,22 +173,152 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } } + // Determine if this is a block element + bool isBlockElement = + matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); + + // Compute CSS style for this element + CssStyle cssStyle; + if (self->cssParser) { + // Get combined tag + class styles + cssStyle = self->cssParser->resolveStyle(name, classAttr); + // Merge inline style (highest priority) + if (!styleAttr.empty()) { + CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr); + cssStyle.merge(inlineStyle); + } + } + if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { - self->startNewTextBlock(TextBlock::CENTER_ALIGN); + // Headers: center aligned, bold, apply CSS overrides + TextBlock::Style alignment = TextBlock::CENTER_ALIGN; + if (cssStyle.hasTextAlign()) { + switch (cssStyle.alignment) { + case TextAlign::Left: + alignment = TextBlock::LEFT_ALIGN; + break; + case TextAlign::Right: + alignment = TextBlock::RIGHT_ALIGN; + break; + case TextAlign::Center: + alignment = TextBlock::CENTER_ALIGN; + break; + case TextAlign::Justify: + alignment = TextBlock::JUSTIFIED; + break; + default: + break; + } + } + + self->currentBlockStyle = cssStyle; + self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle)); self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); + self->updateEffectiveInlineStyle(); } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (strcmp(name, "br") == 0) { self->startNewTextBlock(self->currentTextBlock->getStyle()); } else { - self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment); + // Determine alignment from CSS or default + auto alignment = static_cast(self->paragraphAlignment); + if (cssStyle.hasTextAlign()) { + switch (cssStyle.alignment) { + case TextAlign::Left: + alignment = TextBlock::LEFT_ALIGN; + break; + case TextAlign::Right: + alignment = TextBlock::RIGHT_ALIGN; + break; + case TextAlign::Center: + alignment = TextBlock::CENTER_ALIGN; + break; + case TextAlign::Justify: + alignment = TextBlock::JUSTIFIED; + break; + default: + break; + } + } + + self->currentBlockStyle = cssStyle; + self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle)); + self->updateEffectiveInlineStyle(); + if (strcmp(name, "li") == 0) { self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR); } } + } else if (matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS)) { + self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth); + // Push inline style entry for underline tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasUnderline = true; + entry.underline = true; + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; + } + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); } else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) { self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); + // Push inline style entry for bold tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasBold = true; + entry.bold = true; + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.decoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); } else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth); + // Push inline style entry for italic tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasItalic = true; + entry.italic = true; + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.decoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } else if (strcmp(name, "span") == 0 || !isBlockElement) { + // Handle span and other inline elements for CSS styling + if (cssStyle.hasFontWeight() || cssStyle.hasFontStyle() || cssStyle.hasTextDecoration()) { + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; + } + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.decoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } } self->depth += 1; @@ -149,12 +332,17 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char return; } + // Determine font style from depth-based tracking and CSS effective style + const bool isBold = self->boldUntilDepth < self->depth || self->effectiveBold; + const bool isItalic = self->italicUntilDepth < self->depth || self->effectiveItalic; + const bool isUnderline = self->underlineUntilDepth < self->depth || self->effectiveUnderline; + EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { + if (isBold && isItalic) { fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (self->boldUntilDepth < self->depth) { + } else if (isBold) { fontStyle = EpdFontFamily::BOLD; - } else if (self->italicUntilDepth < self->depth) { + } else if (isItalic) { fontStyle = EpdFontFamily::ITALIC; } @@ -163,7 +351,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it if (self->partWordBufferIndex > 0) { self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); + self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline); self->partWordBufferIndex = 0; } // Skip the whitespace char @@ -202,7 +390,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char // If we're about to run out of space, then cut the word off and start a new one if (self->partWordBufferIndex >= MAX_WORD_SIZE) { self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); + self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline); self->partWordBufferIndex = 0; } @@ -224,27 +412,42 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) { auto* self = static_cast(userData); - if (self->partWordBufferIndex > 0) { - // Only flush out part word buffer if we're closing a block tag or are at the top of the HTML file. - // We don't want to flush out content when closing inline tags like . - // Currently this also flushes out on closing and tags, but they are line tags so that shouldn't happen, - // text styling needs to be overhauled to fix it. - const bool shouldBreakText = - matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || - matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || self->depth == 1; + // Check if any style state will change after we decrement depth + // If so, we MUST flush the partWordBuffer with the CURRENT style first + // Note: depth hasn't been decremented yet, so we check against (depth - 1) + const bool willPopStyleStack = !self->inlineStyleStack.empty() && + self->inlineStyleStack.back().depth == self->depth - 1; + const bool willClearBold = self->boldUntilDepth == self->depth - 1; + const bool willClearItalic = self->italicUntilDepth == self->depth - 1; + const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1; + + const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline; + + // Flush buffer with current style BEFORE any style changes + if (self->partWordBufferIndex > 0) { + // Flush if style will change OR if we're closing a block/structural element + const bool shouldFlush = styleWillChange || + matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || + matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || + matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || self->depth == 1; + + if (shouldFlush) { + // Use combined depth-based and CSS-based style + const bool isBold = self->boldUntilDepth < self->depth || self->effectiveBold; + const bool isItalic = self->italicUntilDepth < self->depth || self->effectiveItalic; + const bool isUnderline = self->underlineUntilDepth < self->depth || self->effectiveUnderline; - if (shouldBreakText) { EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { + if (isBold && isItalic) { fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (self->boldUntilDepth < self->depth) { + } else if (isBold) { fontStyle = EpdFontFamily::BOLD; - } else if (self->italicUntilDepth < self->depth) { + } else if (isItalic) { fontStyle = EpdFontFamily::ITALIC; } self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); + self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline); self->partWordBufferIndex = 0; } } @@ -256,15 +459,33 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n self->skipUntilDepth = INT_MAX; } - // Leaving bold + // Leaving bold tag if (self->boldUntilDepth == self->depth) { self->boldUntilDepth = INT_MAX; } - // Leaving italic + // Leaving italic tag if (self->italicUntilDepth == self->depth) { self->italicUntilDepth = INT_MAX; } + + // Leaving underline tag + if (self->underlineUntilDepth == self->depth) { + self->underlineUntilDepth = INT_MAX; + } + + // Pop from inline style stack if we pushed an entry at this depth + // This handles all inline elements: b, i, u, span, etc. + if (!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth) { + self->inlineStyleStack.pop_back(); + self->updateEffectiveInlineStyle(); + } + + // Clear block style when leaving block elements + if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { + self->currentBlockStyle.reset(); + self->updateEffectiveInlineStyle(); + } } bool ChapterHtmlSlimParser::parseAndBuildPages() { @@ -384,10 +605,23 @@ void ChapterHtmlSlimParser::makePages() { } const int lineHeight = renderer.getLineHeight(fontId) * lineCompression; + + // Apply marginTop before the paragraph + const BlockStyle& blockStyle = currentTextBlock->getBlockStyle(); + if (blockStyle.marginTop > 0) { + currentPageNextY += lineHeight * blockStyle.marginTop; + } + currentTextBlock->layoutAndExtractLines( renderer, fontId, viewportWidth, [this](const std::shared_ptr& textBlock) { addLineToPage(textBlock); }); - // Extra paragraph spacing if enabled + + // Apply marginBottom after the paragraph + if (blockStyle.marginBottom > 0) { + currentPageNextY += lineHeight * blockStyle.marginBottom; + } + + // Extra paragraph spacing if enabled (default behavior) if (extraParagraphSpacing) { currentPageNextY += lineHeight / 2; } diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index c559e15..c3f963c 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -8,6 +8,8 @@ #include "../ParsedText.h" #include "../blocks/TextBlock.h" +#include "../css/CssParser.h" +#include "../css/CssStyle.h" class Page; class GfxRenderer; @@ -23,6 +25,7 @@ class ChapterHtmlSlimParser { int skipUntilDepth = INT_MAX; int boldUntilDepth = INT_MAX; int italicUntilDepth = INT_MAX; + int underlineUntilDepth = INT_MAX; // buffer for building up words from characters, will auto break if longer than this // leave one char at end for null pointer char partWordBuffer[MAX_WORD_SIZE + 1] = {}; @@ -36,8 +39,24 @@ class ChapterHtmlSlimParser { uint8_t paragraphAlignment; uint16_t viewportWidth; uint16_t viewportHeight; + const CssParser* cssParser; + // Style tracking (replaces depth-based approach) + struct StyleStackEntry { + int depth = 0; + bool hasBold = false, bold = false; + bool hasItalic = false, italic = false; + bool hasUnderline = false, underline = false; + }; + std::vector inlineStyleStack; + CssStyle currentBlockStyle; + bool effectiveBold = false; + bool effectiveItalic = false; + bool effectiveUnderline = false; + + void updateEffectiveInlineStyle(); void startNewTextBlock(TextBlock::Style style); + void startNewTextBlock(TextBlock::Style style, const BlockStyle& blockStyle); void makePages(); // XML callbacks static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts); @@ -50,7 +69,8 @@ class ChapterHtmlSlimParser { const uint8_t paragraphAlignment, const uint16_t viewportWidth, const uint16_t viewportHeight, const std::function)>& completePageFn, - const std::function& progressFn = nullptr) + const std::function& progressFn = nullptr, + const CssParser* cssParser = nullptr) : filepath(filepath), renderer(renderer), fontId(fontId), @@ -60,7 +80,8 @@ class ChapterHtmlSlimParser { viewportWidth(viewportWidth), viewportHeight(viewportHeight), completePageFn(completePageFn), - progressFn(progressFn) {} + progressFn(progressFn), + cssParser(cssParser) {} ~ChapterHtmlSlimParser() = default; bool parseAndBuildPages(); void addLineToPage(std::shared_ptr line); diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.cpp b/lib/Epub/Epub/parsers/ContentOpfParser.cpp index aee7e57..d403e76 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.cpp +++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp @@ -8,6 +8,7 @@ namespace { constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml"; +constexpr char MEDIA_TYPE_CSS[] = "text/css"; constexpr char itemCacheFile[] = "/.items.bin"; } // namespace @@ -192,6 +193,11 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name } } + // Collect CSS files + if (mediaType == MEDIA_TYPE_CSS) { + self->cssFiles.push_back(href); + } + // EPUB 3: Check for nav document (properties contains "nav") if (!properties.empty() && self->tocNavPath.empty()) { // Properties is space-separated, check if "nav" is present as a word diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.h b/lib/Epub/Epub/parsers/ContentOpfParser.h index 1940aaa..317f58e 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.h +++ b/lib/Epub/Epub/parsers/ContentOpfParser.h @@ -1,6 +1,8 @@ #pragma once #include +#include + #include "Epub.h" #include "expat.h" @@ -38,6 +40,7 @@ class ContentOpfParser final : public Print { std::string tocNavPath; // EPUB 3 nav document path std::string coverItemHref; std::string textReferenceHref; + std::vector cssFiles; // CSS stylesheet paths explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize, BookMetadataCache* cache) diff --git a/lib/GfxRenderer/GfxRenderer.cpp b/lib/GfxRenderer/GfxRenderer.cpp index 7072fed..f35eb02 100644 --- a/lib/GfxRenderer/GfxRenderer.cpp +++ b/lib/GfxRenderer/GfxRenderer.cpp @@ -449,6 +449,20 @@ int GfxRenderer::getSpaceWidth(const int fontId) const { return fontMap.at(fontId).getGlyph(' ', EpdFontFamily::REGULAR)->advanceX; } +int GfxRenderer::getIndentWidth(const int fontId, const char* text) const { + if (fontMap.count(fontId) == 0) { + Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId); + return 0; + } + + uint32_t cp; + int width = 0; + while ((cp = utf8NextCodepoint(reinterpret_cast(&text)))) { + width += fontMap.at(fontId).getGlyph(cp, EpdFontFamily::REGULAR)->advanceX; + } + return width; +} + int GfxRenderer::getFontAscenderSize(const int fontId) const { if (fontMap.count(fontId) == 0) { Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId); diff --git a/lib/GfxRenderer/GfxRenderer.h b/lib/GfxRenderer/GfxRenderer.h index b1fea69..ae9f483 100644 --- a/lib/GfxRenderer/GfxRenderer.h +++ b/lib/GfxRenderer/GfxRenderer.h @@ -78,6 +78,7 @@ class GfxRenderer { void drawText(int fontId, int x, int y, const char* text, bool black = true, EpdFontFamily::Style style = EpdFontFamily::REGULAR) const; int getSpaceWidth(int fontId) const; + int getIndentWidth(int fontId, const char* text) const; int getFontAscenderSize(int fontId) const; int getLineHeight(int fontId) const; std::string truncatedText(int fontId, const char* text, int maxWidth, From be10b90a71377fb0637206051eb80887f65b456f Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Sat, 17 Jan 2026 18:35:44 -0500 Subject: [PATCH 2/5] formatting: run clang-format-fix --- lib/Epub/Epub/Section.cpp | 4 +- lib/Epub/Epub/blocks/TextBlock.h | 3 +- lib/Epub/Epub/css/CssParser.cpp | 23 ++++---- lib/Epub/Epub/css/CssParser.h | 3 +- lib/Epub/Epub/css/CssStyle.h | 53 ++++++++----------- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 22 ++++---- 6 files changed, 46 insertions(+), 62 deletions(-) diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index d240432..c90a3a6 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -178,8 +178,8 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c ChapterHtmlSlimParser visitor( tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, viewportHeight, - [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, - progressFn, epub->getCssParser()); + [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, progressFn, + epub->getCssParser()); success = visitor.parseAndBuildPages(); SdMan.remove(tmpHtmlPath.c_str()); diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index 68f5c7f..e7993fe 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -30,8 +30,7 @@ class TextBlock final : public Block { public: explicit TextBlock(std::list words, std::list word_xpos, std::list word_styles, const Style style, - const BlockStyle& blockStyle = BlockStyle(), - std::list word_underlines = std::list()) + const BlockStyle& blockStyle = BlockStyle(), std::list word_underlines = std::list()) : words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), diff --git a/lib/Epub/Epub/css/CssParser.cpp b/lib/Epub/Epub/css/CssParser.cpp index 7ef3f83..b62f0b5 100644 --- a/lib/Epub/Epub/css/CssParser.cpp +++ b/lib/Epub/Epub/css/CssParser.cpp @@ -14,9 +14,7 @@ constexpr size_t READ_BUFFER_SIZE = 512; constexpr size_t MAX_CSS_SIZE = 64 * 1024; // Check if character is CSS whitespace -bool isCssWhitespace(const char c) { - return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; -} +bool isCssWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; } // Read entire file into string (with size limit) std::string readFileContent(FsFile& file) { @@ -88,8 +86,7 @@ size_t skipAtRule(const std::string& css, const size_t start) { // Extract next rule from CSS content // Returns true if a rule was found, with selector and body filled -bool extractNextRule(const std::string& css, size_t& pos, - std::string& selector, std::string& body) { +bool extractNextRule(const std::string& css, size_t& pos, std::string& selector, std::string& body) { selector.clear(); body.clear(); @@ -126,8 +123,10 @@ bool extractNextRule(const std::string& css, size_t& pos, size_t bodyEnd = bodyStart; while (bodyEnd < css.size() && depth > 0) { - if (css[bodyEnd] == '{') ++depth; - else if (css[bodyEnd] == '}') --depth; + if (css[bodyEnd] == '{') + ++depth; + else if (css[bodyEnd] == '}') + --depth; ++bodyEnd; } @@ -402,8 +401,7 @@ CssStyle CssParser::parseDeclarations(const std::string& declBlock) { // Rule processing -void CssParser::processRuleBlock(const std::string& selectorGroup, - const std::string& declarations) { +void CssParser::processRuleBlock(const std::string& selectorGroup, const std::string& declarations) { const CssStyle style = parseDeclarations(declarations); // Only store if any properties were set @@ -458,8 +456,7 @@ bool CssParser::loadFromStream(FsFile& source) { // Style resolution -CssStyle CssParser::resolveStyle(const std::string& tagName, - const std::string& classAttr) const { +CssStyle CssParser::resolveStyle(const std::string& tagName, const std::string& classAttr) const { CssStyle result; const std::string tag = normalized(tagName); @@ -498,6 +495,4 @@ CssStyle CssParser::resolveStyle(const std::string& tagName, // Inline style parsing (static - doesn't need rule database) -CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { - return parseDeclarations(styleValue); -} +CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { return parseDeclarations(styleValue); } diff --git a/lib/Epub/Epub/css/CssParser.h b/lib/Epub/Epub/css/CssParser.h index a10e902..a180236 100644 --- a/lib/Epub/Epub/css/CssParser.h +++ b/lib/Epub/Epub/css/CssParser.h @@ -52,8 +52,7 @@ class CssParser { * @param classAttr The class attribute value (may contain multiple space-separated classes) * @return Combined style with all applicable rules merged */ - [[nodiscard]] CssStyle resolveStyle(const std::string& tagName, - const std::string& classAttr) const; + [[nodiscard]] CssStyle resolveStyle(const std::string& tagName, const std::string& classAttr) const; /** * Parse an inline style attribute string. diff --git a/lib/Epub/Epub/css/CssStyle.h b/lib/Epub/Epub/css/CssStyle.h index ea6a9d3..8333161 100644 --- a/lib/Epub/Epub/css/CssStyle.h +++ b/lib/Epub/Epub/css/CssStyle.h @@ -3,31 +3,16 @@ #include // Text alignment options matching CSS text-align property -enum class TextAlign : uint8_t { - None = 0, - Left = 1, - Right = 2, - Center = 3, - Justify = 4 -}; +enum class TextAlign : uint8_t { None = 0, Left = 1, Right = 2, Center = 3, Justify = 4 }; // Font style options matching CSS font-style property -enum class CssFontStyle : uint8_t { - Normal = 0, - Italic = 1 -}; +enum class CssFontStyle : uint8_t { Normal = 0, Italic = 1 }; // Font weight options - CSS supports 100-900, we simplify to normal/bold -enum class CssFontWeight : uint8_t { - Normal = 0, - Bold = 1 -}; +enum class CssFontWeight : uint8_t { Normal = 0, Bold = 1 }; // Text decoration options -enum class CssTextDecoration : uint8_t { - None = 0, - Underline = 1 -}; +enum class CssTextDecoration : uint8_t { None = 0, Underline = 1 }; // Bitmask for tracking which properties have been explicitly set struct CssPropertyFlags { @@ -42,13 +27,21 @@ struct CssPropertyFlags { uint16_t paddingBottom : 1; uint16_t reserved : 7; - CssPropertyFlags() : alignment(0), fontStyle(0), fontWeight(0), decoration(0), - indent(0), marginTop(0), marginBottom(0), - paddingTop(0), paddingBottom(0), reserved(0) {} + CssPropertyFlags() + : alignment(0), + fontStyle(0), + fontWeight(0), + decoration(0), + indent(0), + marginTop(0), + marginBottom(0), + paddingTop(0), + paddingBottom(0), + reserved(0) {} [[nodiscard]] bool anySet() const { - return alignment || fontStyle || fontWeight || decoration || - indent || marginTop || marginBottom || paddingTop || paddingBottom; + return alignment || fontStyle || fontWeight || decoration || indent || marginTop || marginBottom || paddingTop || + paddingBottom; } void clearAll() { @@ -65,13 +58,13 @@ struct CssStyle { CssFontWeight fontWeight = CssFontWeight::Normal; CssTextDecoration decoration = CssTextDecoration::None; - float indentPixels = 0.0f; // First-line indent in pixels - int8_t marginTop = 0; // Vertical spacing before block (in lines, 0-2) - int8_t marginBottom = 0; // Vertical spacing after block (in lines, 0-2) - int8_t paddingTop = 0; // Padding before (in lines, 0-2) - int8_t paddingBottom = 0; // Padding after (in lines, 0-2) + float indentPixels = 0.0f; // First-line indent in pixels + int8_t marginTop = 0; // Vertical spacing before block (in lines, 0-2) + int8_t marginBottom = 0; // Vertical spacing after block (in lines, 0-2) + int8_t paddingTop = 0; // Padding before (in lines, 0-2) + int8_t paddingBottom = 0; // Padding after (in lines, 0-2) - CssPropertyFlags defined; // Tracks which properties were explicitly set + CssPropertyFlags defined; // Tracks which properties were explicitly set // Apply properties from another style, only overwriting if the other style // has that property explicitly defined diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 0143a56..923ed68 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -59,7 +59,8 @@ void ChapterHtmlSlimParser::updateEffectiveInlineStyle() { // Start with block-level styles effectiveBold = currentBlockStyle.hasFontWeight() && currentBlockStyle.fontWeight == CssFontWeight::Bold; effectiveItalic = currentBlockStyle.hasFontStyle() && currentBlockStyle.fontStyle == CssFontStyle::Italic; - effectiveUnderline = currentBlockStyle.hasTextDecoration() && currentBlockStyle.decoration == CssTextDecoration::Underline; + effectiveUnderline = + currentBlockStyle.hasTextDecoration() && currentBlockStyle.decoration == CssTextDecoration::Underline; // Apply inline style stack in order for (const auto& entry : inlineStyleStack) { @@ -90,9 +91,7 @@ void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, cons currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, blockStyle)); } -void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { - startNewTextBlock(style, BlockStyle{}); -} +void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { startNewTextBlock(style, BlockStyle{}); } void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { auto* self = static_cast(userData); @@ -174,8 +173,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } // Determine if this is a block element - bool isBlockElement = - matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); + bool isBlockElement = matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); // Compute CSS style for this element CssStyle cssStyle; @@ -415,8 +413,8 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n // Check if any style state will change after we decrement depth // If so, we MUST flush the partWordBuffer with the CURRENT style first // Note: depth hasn't been decremented yet, so we check against (depth - 1) - const bool willPopStyleStack = !self->inlineStyleStack.empty() && - self->inlineStyleStack.back().depth == self->depth - 1; + const bool willPopStyleStack = + !self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth - 1; const bool willClearBold = self->boldUntilDepth == self->depth - 1; const bool willClearItalic = self->italicUntilDepth == self->depth - 1; const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1; @@ -426,10 +424,10 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n // Flush buffer with current style BEFORE any style changes if (self->partWordBufferIndex > 0) { // Flush if style will change OR if we're closing a block/structural element - const bool shouldFlush = styleWillChange || - matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || - matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || - matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || self->depth == 1; + const bool shouldFlush = styleWillChange || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || + matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || + matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || + matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || self->depth == 1; if (shouldFlush) { // Use combined depth-based and CSS-based style From 750a6ee1d8ff5e1d438172b7fabe9e94ad552adc Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Mon, 19 Jan 2026 22:39:40 -0600 Subject: [PATCH 3/5] rerun clang-format --- lib/Epub/Epub/Section.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index d33cbb3..a9c5c28 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -186,8 +186,8 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c ChapterHtmlSlimParser visitor( tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, viewportHeight, hyphenationEnabled, - [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, - progressFn, epub->getCssParser()); + [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, progressFn, + epub->getCssParser()); Hyphenator::setPreferredLanguage(epub->getLanguage()); success = visitor.parseAndBuildPages(); From 5c9412b1419bf798a76ee07ff97a358f8d384789 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Mon, 19 Jan 2026 23:09:35 -0600 Subject: [PATCH 4/5] fix compilation errors --- lib/Epub/Epub/ParsedText.cpp | 3 ++- lib/Epub/Epub/ParsedText.h | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 5e3f298..6ae1896 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -99,7 +99,8 @@ std::vector ParsedText::calculateWordWidths(const GfxRenderer& rendere uint16_t width = measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt); // Add CSS text-indent to first word width - if (isFirst && blockStyle.textIndent > 0 && shouldIndent) { + if (isFirst && blockStyle.textIndent > 0 && (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) && + !extraParagraphSpacing) { width += static_cast(blockStyle.textIndent); isFirst = false; } else { diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index fcc797a..cc2596c 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -36,9 +36,11 @@ class ParsedText { public: explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing, - const bool hyphenationEnabled = false, - const BlockStyle& blockStyle = BlockStyle())) - : style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled, blockStyle(blockStyle)) {} + const bool hyphenationEnabled = false, const BlockStyle& blockStyle = BlockStyle()) + : style(style), + blockStyle(blockStyle), + extraParagraphSpacing(extraParagraphSpacing), + hyphenationEnabled(hyphenationEnabled) {} ~ParsedText() = default; void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false); From 8f3d226bf3af88fad24ed9c0e7564638d44c5c77 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 20 Jan 2026 10:27:55 -0600 Subject: [PATCH 5/5] increment versions to prevent error when opening cached EPUBs --- lib/Epub/Epub/BookMetadataCache.cpp | 2 +- lib/Epub/Epub/Section.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index 790c368..47ba227 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -9,7 +9,7 @@ #include "FsHelpers.h" namespace { -constexpr uint8_t BOOK_CACHE_VERSION = 5; +constexpr uint8_t BOOK_CACHE_VERSION = 6; constexpr char bookBinFile[] = "/book.bin"; constexpr char tmpSpineBinFile[] = "/spine.bin.tmp"; constexpr char tmpTocBinFile[] = "/toc.bin.tmp"; diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index a9c5c28..f346bd8 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -8,7 +8,7 @@ #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 10; +constexpr uint8_t SECTION_FILE_VERSION = 11; constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(uint32_t);