feat: Add CSS parsing and CSS support in EPUBs (#411)
## Summary * **What is the goal of this PR?** - Adds basic CSS parsing to EPUBs and determine the CSS rules when rendering to the screen so that text is styled correctly. Currently supports bold, underline, italics, margin, padding, and text alignment ## Additional Context - My main reason for wanting this is that the book I'm currently reading, Carl's Doomsday Scenario (2nd in the Dungeon Crawler Carl series), relies _a lot_ on styled text for telling parts of the story. When text is bolded, it's supposed to be a message that's rendered "on-screen" in the story. When characters are "chatting" with each other, the text is bolded and their names are underlined. Plus, normal emphasis is provided with italicizing words here and there. So, this greatly improves my experience reading this book on the Xteink, and I figured it was useful enough for others too. - For transparency: I'm a software engineer, but I'm mostly frontend and TypeScript/JavaScript. It's been _years_ since I did any C/C++, so I would not be surprised if I'm doing something dumb along the way in this code. Please don't hesitate to ask for changes if something looks off. I heavily relied on Claude Code for help, and I had a lot of inspiration from how [microreader](https://github.com/CidVonHighwind/microreader) achieves their CSS parsing and styling. I did give this as good of a code review as I could and went through everything, and _it works on my machine_ 😄 ### Before   ### After   --- ### AI Usage Did you use AI tools to help write this code? **YES**, Claude Code
This commit is contained in:
697
lib/Epub/Epub/css/CssParser.cpp
Normal file
697
lib/Epub/Epub/css/CssParser.cpp
Normal file
@@ -0,0 +1,697 @@
|
||||
#include "CssParser.h"
|
||||
|
||||
#include <HardwareSerial.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
|
||||
namespace {
|
||||
|
||||
// Buffer size for reading CSS files
|
||||
constexpr size_t READ_BUFFER_SIZE = 512;
|
||||
|
||||
// Maximum CSS file size we'll process (prevent memory issues)
|
||||
constexpr size_t MAX_CSS_SIZE = 64 * 1024;
|
||||
|
||||
// Check if character is CSS whitespace
|
||||
bool isCssWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; }
|
||||
|
||||
// Read entire file into string (with size limit)
|
||||
std::string readFileContent(FsFile& file) {
|
||||
std::string content;
|
||||
content.reserve(std::min(static_cast<size_t>(file.size()), MAX_CSS_SIZE));
|
||||
|
||||
char buffer[READ_BUFFER_SIZE];
|
||||
while (file.available() && content.size() < MAX_CSS_SIZE) {
|
||||
const int bytesRead = file.read(buffer, sizeof(buffer));
|
||||
if (bytesRead <= 0) break;
|
||||
content.append(buffer, bytesRead);
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
// Remove CSS comments (/* ... */) from content
|
||||
std::string stripComments(const std::string& css) {
|
||||
std::string result;
|
||||
result.reserve(css.size());
|
||||
|
||||
size_t pos = 0;
|
||||
while (pos < css.size()) {
|
||||
// Look for start of comment
|
||||
if (pos + 1 < css.size() && css[pos] == '/' && css[pos + 1] == '*') {
|
||||
// Find end of comment
|
||||
const size_t endPos = css.find("*/", pos + 2);
|
||||
if (endPos == std::string::npos) {
|
||||
// Unterminated comment - skip rest of file
|
||||
break;
|
||||
}
|
||||
pos = endPos + 2;
|
||||
} else {
|
||||
result.push_back(css[pos]);
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Skip @-rules (like @media, @import, @font-face)
|
||||
// Returns position after the @-rule
|
||||
size_t skipAtRule(const std::string& css, const size_t start) {
|
||||
// Find the end - either semicolon (simple @-rule) or matching brace
|
||||
size_t pos = start + 1; // Skip the '@'
|
||||
|
||||
// Skip identifier
|
||||
while (pos < css.size() && (std::isalnum(css[pos]) || css[pos] == '-')) {
|
||||
++pos;
|
||||
}
|
||||
|
||||
// Look for { or ;
|
||||
int braceDepth = 0;
|
||||
while (pos < css.size()) {
|
||||
const char c = css[pos];
|
||||
if (c == '{') {
|
||||
++braceDepth;
|
||||
} else if (c == '}') {
|
||||
--braceDepth;
|
||||
if (braceDepth == 0) {
|
||||
return pos + 1;
|
||||
}
|
||||
} else if (c == ';' && braceDepth == 0) {
|
||||
return pos + 1;
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
return css.size();
|
||||
}
|
||||
|
||||
// Extract next rule from CSS content
|
||||
// Returns true if a rule was found, with selector and body filled
|
||||
bool extractNextRule(const std::string& css, size_t& pos, std::string& selector, std::string& body) {
|
||||
selector.clear();
|
||||
body.clear();
|
||||
|
||||
// Skip whitespace and @-rules until we find a regular rule
|
||||
while (pos < css.size()) {
|
||||
// Skip whitespace
|
||||
while (pos < css.size() && isCssWhitespace(css[pos])) {
|
||||
++pos;
|
||||
}
|
||||
|
||||
if (pos >= css.size()) return false;
|
||||
|
||||
// Handle @-rules iteratively (avoids recursion/stack overflow)
|
||||
if (css[pos] == '@') {
|
||||
pos = skipAtRule(css, pos);
|
||||
continue; // Try again after skipping the @-rule
|
||||
}
|
||||
|
||||
break; // Found start of a regular rule
|
||||
}
|
||||
|
||||
if (pos >= css.size()) return false;
|
||||
|
||||
// Find opening brace
|
||||
const size_t bracePos = css.find('{', pos);
|
||||
if (bracePos == std::string::npos) return false;
|
||||
|
||||
// Extract selector (everything before the brace)
|
||||
selector = css.substr(pos, bracePos - pos);
|
||||
|
||||
// Find matching closing brace
|
||||
int depth = 1;
|
||||
const size_t bodyStart = bracePos + 1;
|
||||
size_t bodyEnd = bodyStart;
|
||||
|
||||
while (bodyEnd < css.size() && depth > 0) {
|
||||
if (css[bodyEnd] == '{')
|
||||
++depth;
|
||||
else if (css[bodyEnd] == '}')
|
||||
--depth;
|
||||
++bodyEnd;
|
||||
}
|
||||
|
||||
// Extract body (between braces)
|
||||
if (bodyEnd > bodyStart) {
|
||||
body = css.substr(bodyStart, bodyEnd - bodyStart - 1);
|
||||
}
|
||||
|
||||
pos = bodyEnd;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
// String utilities implementation
|
||||
|
||||
std::string CssParser::normalized(const std::string& s) {
|
||||
std::string result;
|
||||
result.reserve(s.size());
|
||||
|
||||
bool inSpace = true; // Start true to skip leading space
|
||||
for (const char c : s) {
|
||||
if (isCssWhitespace(c)) {
|
||||
if (!inSpace) {
|
||||
result.push_back(' ');
|
||||
inSpace = true;
|
||||
}
|
||||
} else {
|
||||
result.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(c))));
|
||||
inSpace = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove trailing space
|
||||
if (!result.empty() && result.back() == ' ') {
|
||||
result.pop_back();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> CssParser::splitOnChar(const std::string& s, const char delimiter) {
|
||||
std::vector<std::string> parts;
|
||||
size_t start = 0;
|
||||
|
||||
for (size_t i = 0; i <= s.size(); ++i) {
|
||||
if (i == s.size() || s[i] == delimiter) {
|
||||
std::string part = s.substr(start, i - start);
|
||||
std::string trimmed = normalized(part);
|
||||
if (!trimmed.empty()) {
|
||||
parts.push_back(trimmed);
|
||||
}
|
||||
start = i + 1;
|
||||
}
|
||||
}
|
||||
return parts;
|
||||
}
|
||||
|
||||
std::vector<std::string> CssParser::splitWhitespace(const std::string& s) {
|
||||
std::vector<std::string> parts;
|
||||
size_t start = 0;
|
||||
bool inWord = false;
|
||||
|
||||
for (size_t i = 0; i <= s.size(); ++i) {
|
||||
const bool isSpace = i == s.size() || isCssWhitespace(s[i]);
|
||||
if (isSpace && inWord) {
|
||||
parts.push_back(s.substr(start, i - start));
|
||||
inWord = false;
|
||||
} else if (!isSpace && !inWord) {
|
||||
start = i;
|
||||
inWord = true;
|
||||
}
|
||||
}
|
||||
return parts;
|
||||
}
|
||||
|
||||
// Property value interpreters
|
||||
|
||||
CssTextAlign CssParser::interpretAlignment(const std::string& val) {
|
||||
const std::string v = normalized(val);
|
||||
|
||||
if (v == "left" || v == "start") return CssTextAlign::Left;
|
||||
if (v == "right" || v == "end") return CssTextAlign::Right;
|
||||
if (v == "center") return CssTextAlign::Center;
|
||||
if (v == "justify") return CssTextAlign::Justify;
|
||||
|
||||
return CssTextAlign::Left;
|
||||
}
|
||||
|
||||
CssFontStyle CssParser::interpretFontStyle(const std::string& val) {
|
||||
const std::string v = normalized(val);
|
||||
|
||||
if (v == "italic" || v == "oblique") return CssFontStyle::Italic;
|
||||
return CssFontStyle::Normal;
|
||||
}
|
||||
|
||||
CssFontWeight CssParser::interpretFontWeight(const std::string& val) {
|
||||
const std::string v = normalized(val);
|
||||
|
||||
// Named values
|
||||
if (v == "bold" || v == "bolder") return CssFontWeight::Bold;
|
||||
if (v == "normal" || v == "lighter") return CssFontWeight::Normal;
|
||||
|
||||
// Numeric values: 100-900
|
||||
// CSS spec: 400 = normal, 700 = bold
|
||||
// We use: 0-400 = normal, 700+ = bold, 500-600 = normal (conservative)
|
||||
char* endPtr = nullptr;
|
||||
const long numericWeight = std::strtol(v.c_str(), &endPtr, 10);
|
||||
|
||||
// If we parsed a number and consumed the whole string
|
||||
if (endPtr != v.c_str() && *endPtr == '\0') {
|
||||
return numericWeight >= 700 ? CssFontWeight::Bold : CssFontWeight::Normal;
|
||||
}
|
||||
|
||||
return CssFontWeight::Normal;
|
||||
}
|
||||
|
||||
CssTextDecoration CssParser::interpretDecoration(const std::string& val) {
|
||||
const std::string v = normalized(val);
|
||||
|
||||
// text-decoration can have multiple space-separated values
|
||||
if (v.find("underline") != std::string::npos) {
|
||||
return CssTextDecoration::Underline;
|
||||
}
|
||||
return CssTextDecoration::None;
|
||||
}
|
||||
|
||||
CssLength CssParser::interpretLength(const std::string& val) {
|
||||
const std::string v = normalized(val);
|
||||
if (v.empty()) return CssLength{};
|
||||
|
||||
// Find where the number ends
|
||||
size_t unitStart = v.size();
|
||||
for (size_t i = 0; i < v.size(); ++i) {
|
||||
const char c = v[i];
|
||||
if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') {
|
||||
unitStart = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const std::string numPart = v.substr(0, unitStart);
|
||||
const std::string unitPart = v.substr(unitStart);
|
||||
|
||||
// Parse numeric value
|
||||
char* endPtr = nullptr;
|
||||
const float numericValue = std::strtof(numPart.c_str(), &endPtr);
|
||||
if (endPtr == numPart.c_str()) return CssLength{}; // No number parsed
|
||||
|
||||
// Determine unit type (preserve for deferred resolution)
|
||||
auto unit = CssUnit::Pixels;
|
||||
if (unitPart == "em") {
|
||||
unit = CssUnit::Em;
|
||||
} else if (unitPart == "rem") {
|
||||
unit = CssUnit::Rem;
|
||||
} else if (unitPart == "pt") {
|
||||
unit = CssUnit::Points;
|
||||
}
|
||||
// px and unitless default to Pixels
|
||||
|
||||
return CssLength{numericValue, unit};
|
||||
}
|
||||
|
||||
int8_t CssParser::interpretSpacing(const std::string& val) {
|
||||
const std::string v = normalized(val);
|
||||
if (v.empty()) return 0;
|
||||
|
||||
// For spacing, we convert to "lines" (discrete units for e-ink)
|
||||
// 1em ≈ 1 line, percentages based on ~30 lines per page
|
||||
|
||||
float multiplier = 0.0f;
|
||||
size_t unitStart = v.size();
|
||||
|
||||
for (size_t i = 0; i < v.size(); ++i) {
|
||||
const char c = v[i];
|
||||
if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') {
|
||||
unitStart = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const std::string numPart = v.substr(0, unitStart);
|
||||
const std::string unitPart = v.substr(unitStart);
|
||||
|
||||
if (unitPart == "em" || unitPart == "rem") {
|
||||
multiplier = 1.0f; // 1em = 1 line
|
||||
} else if (unitPart == "%") {
|
||||
multiplier = 0.3f; // ~30 lines per page, so 10% = 3 lines
|
||||
} else {
|
||||
return 0; // Unsupported unit for spacing
|
||||
}
|
||||
|
||||
char* endPtr = nullptr;
|
||||
const float numericValue = std::strtof(numPart.c_str(), &endPtr);
|
||||
|
||||
if (endPtr == numPart.c_str()) return 0;
|
||||
|
||||
int lines = static_cast<int>(numericValue * multiplier);
|
||||
|
||||
// Clamp to reasonable range (0-2 lines)
|
||||
if (lines < 0) lines = 0;
|
||||
if (lines > 2) lines = 2;
|
||||
|
||||
return static_cast<int8_t>(lines);
|
||||
}
|
||||
|
||||
// Declaration parsing
|
||||
|
||||
CssStyle CssParser::parseDeclarations(const std::string& declBlock) {
|
||||
CssStyle style;
|
||||
|
||||
// Split declarations by semicolon
|
||||
const auto declarations = splitOnChar(declBlock, ';');
|
||||
|
||||
for (const auto& decl : declarations) {
|
||||
// Find colon separator
|
||||
const size_t colonPos = decl.find(':');
|
||||
if (colonPos == std::string::npos || colonPos == 0) continue;
|
||||
|
||||
std::string propName = normalized(decl.substr(0, colonPos));
|
||||
std::string propValue = normalized(decl.substr(colonPos + 1));
|
||||
|
||||
if (propName.empty() || propValue.empty()) continue;
|
||||
|
||||
// Match property and set value
|
||||
if (propName == "text-align") {
|
||||
style.textAlign = interpretAlignment(propValue);
|
||||
style.defined.textAlign = 1;
|
||||
} else if (propName == "font-style") {
|
||||
style.fontStyle = interpretFontStyle(propValue);
|
||||
style.defined.fontStyle = 1;
|
||||
} else if (propName == "font-weight") {
|
||||
style.fontWeight = interpretFontWeight(propValue);
|
||||
style.defined.fontWeight = 1;
|
||||
} else if (propName == "text-decoration" || propName == "text-decoration-line") {
|
||||
style.textDecoration = interpretDecoration(propValue);
|
||||
style.defined.textDecoration = 1;
|
||||
} else if (propName == "text-indent") {
|
||||
style.textIndent = interpretLength(propValue);
|
||||
style.defined.textIndent = 1;
|
||||
} else if (propName == "margin-top") {
|
||||
style.marginTop = interpretLength(propValue);
|
||||
style.defined.marginTop = 1;
|
||||
} else if (propName == "margin-bottom") {
|
||||
style.marginBottom = interpretLength(propValue);
|
||||
style.defined.marginBottom = 1;
|
||||
} else if (propName == "margin-left") {
|
||||
style.marginLeft = interpretLength(propValue);
|
||||
style.defined.marginLeft = 1;
|
||||
} else if (propName == "margin-right") {
|
||||
style.marginRight = interpretLength(propValue);
|
||||
style.defined.marginRight = 1;
|
||||
} else if (propName == "margin") {
|
||||
// Shorthand: 1-4 values for top, right, bottom, left
|
||||
const auto values = splitWhitespace(propValue);
|
||||
if (!values.empty()) {
|
||||
style.marginTop = interpretLength(values[0]);
|
||||
style.marginRight = values.size() >= 2 ? interpretLength(values[1]) : style.marginTop;
|
||||
style.marginBottom = values.size() >= 3 ? interpretLength(values[2]) : style.marginTop;
|
||||
style.marginLeft = values.size() >= 4 ? interpretLength(values[3]) : style.marginRight;
|
||||
style.defined.marginTop = style.defined.marginRight = style.defined.marginBottom = style.defined.marginLeft = 1;
|
||||
}
|
||||
} else if (propName == "padding-top") {
|
||||
style.paddingTop = interpretLength(propValue);
|
||||
style.defined.paddingTop = 1;
|
||||
} else if (propName == "padding-bottom") {
|
||||
style.paddingBottom = interpretLength(propValue);
|
||||
style.defined.paddingBottom = 1;
|
||||
} else if (propName == "padding-left") {
|
||||
style.paddingLeft = interpretLength(propValue);
|
||||
style.defined.paddingLeft = 1;
|
||||
} else if (propName == "padding-right") {
|
||||
style.paddingRight = interpretLength(propValue);
|
||||
style.defined.paddingRight = 1;
|
||||
} else if (propName == "padding") {
|
||||
// Shorthand: 1-4 values for top, right, bottom, left
|
||||
const auto values = splitWhitespace(propValue);
|
||||
if (!values.empty()) {
|
||||
style.paddingTop = interpretLength(values[0]);
|
||||
style.paddingRight = values.size() >= 2 ? interpretLength(values[1]) : style.paddingTop;
|
||||
style.paddingBottom = values.size() >= 3 ? interpretLength(values[2]) : style.paddingTop;
|
||||
style.paddingLeft = values.size() >= 4 ? interpretLength(values[3]) : style.paddingRight;
|
||||
style.defined.paddingTop = style.defined.paddingRight = style.defined.paddingBottom =
|
||||
style.defined.paddingLeft = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return style;
|
||||
}
|
||||
|
||||
// Rule processing
|
||||
|
||||
void CssParser::processRuleBlock(const std::string& selectorGroup, const std::string& declarations) {
|
||||
const CssStyle style = parseDeclarations(declarations);
|
||||
|
||||
// Only store if any properties were set
|
||||
if (!style.defined.anySet()) return;
|
||||
|
||||
// Handle comma-separated selectors
|
||||
const auto selectors = splitOnChar(selectorGroup, ',');
|
||||
|
||||
for (const auto& sel : selectors) {
|
||||
// Normalize the selector
|
||||
std::string key = normalized(sel);
|
||||
if (key.empty()) continue;
|
||||
|
||||
// Store or merge with existing
|
||||
auto it = rulesBySelector_.find(key);
|
||||
if (it != rulesBySelector_.end()) {
|
||||
it->second.applyOver(style);
|
||||
} else {
|
||||
rulesBySelector_[key] = style;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Main parsing entry point
|
||||
|
||||
bool CssParser::loadFromStream(FsFile& source) {
|
||||
if (!source) {
|
||||
Serial.printf("[%lu] [CSS] Cannot read from invalid file\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read file content
|
||||
const std::string content = readFileContent(source);
|
||||
if (content.empty()) {
|
||||
return true; // Empty file is valid
|
||||
}
|
||||
|
||||
// Remove comments
|
||||
const std::string cleaned = stripComments(content);
|
||||
|
||||
// Parse rules
|
||||
size_t pos = 0;
|
||||
std::string selector, body;
|
||||
|
||||
while (extractNextRule(cleaned, pos, selector, body)) {
|
||||
processRuleBlock(selector, body);
|
||||
}
|
||||
|
||||
Serial.printf("[%lu] [CSS] Parsed %zu rules\n", millis(), rulesBySelector_.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Style resolution
|
||||
|
||||
CssStyle CssParser::resolveStyle(const std::string& tagName, const std::string& classAttr) const {
|
||||
CssStyle result;
|
||||
const std::string tag = normalized(tagName);
|
||||
|
||||
// 1. Apply element-level style (lowest priority)
|
||||
const auto tagIt = rulesBySelector_.find(tag);
|
||||
if (tagIt != rulesBySelector_.end()) {
|
||||
result.applyOver(tagIt->second);
|
||||
}
|
||||
|
||||
// 2. Apply class styles (medium priority)
|
||||
if (!classAttr.empty()) {
|
||||
const auto classes = splitWhitespace(classAttr);
|
||||
|
||||
for (const auto& cls : classes) {
|
||||
std::string classKey = "." + normalized(cls);
|
||||
|
||||
auto classIt = rulesBySelector_.find(classKey);
|
||||
if (classIt != rulesBySelector_.end()) {
|
||||
result.applyOver(classIt->second);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Apply element.class styles (higher priority)
|
||||
for (const auto& cls : classes) {
|
||||
std::string combinedKey = tag + "." + normalized(cls);
|
||||
|
||||
auto combinedIt = rulesBySelector_.find(combinedKey);
|
||||
if (combinedIt != rulesBySelector_.end()) {
|
||||
result.applyOver(combinedIt->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Inline style parsing (static - doesn't need rule database)
|
||||
|
||||
CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { return parseDeclarations(styleValue); }
|
||||
|
||||
// Cache serialization
|
||||
|
||||
// Cache format version - increment when format changes
|
||||
constexpr uint8_t CSS_CACHE_VERSION = 1;
|
||||
|
||||
bool CssParser::saveToCache(FsFile& file) const {
|
||||
if (!file) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Write version
|
||||
file.write(CSS_CACHE_VERSION);
|
||||
|
||||
// Write rule count
|
||||
const auto ruleCount = static_cast<uint16_t>(rulesBySelector_.size());
|
||||
file.write(reinterpret_cast<const uint8_t*>(&ruleCount), sizeof(ruleCount));
|
||||
|
||||
// Write each rule: selector string + CssStyle fields
|
||||
for (const auto& pair : rulesBySelector_) {
|
||||
// Write selector string (length-prefixed)
|
||||
const auto selectorLen = static_cast<uint16_t>(pair.first.size());
|
||||
file.write(reinterpret_cast<const uint8_t*>(&selectorLen), sizeof(selectorLen));
|
||||
file.write(reinterpret_cast<const uint8_t*>(pair.first.data()), selectorLen);
|
||||
|
||||
// Write CssStyle fields (all are POD types)
|
||||
const CssStyle& style = pair.second;
|
||||
file.write(static_cast<uint8_t>(style.textAlign));
|
||||
file.write(static_cast<uint8_t>(style.fontStyle));
|
||||
file.write(static_cast<uint8_t>(style.fontWeight));
|
||||
file.write(static_cast<uint8_t>(style.textDecoration));
|
||||
|
||||
// Write CssLength fields (value + unit)
|
||||
auto writeLength = [&file](const CssLength& len) {
|
||||
file.write(reinterpret_cast<const uint8_t*>(&len.value), sizeof(len.value));
|
||||
file.write(static_cast<uint8_t>(len.unit));
|
||||
};
|
||||
|
||||
writeLength(style.textIndent);
|
||||
writeLength(style.marginTop);
|
||||
writeLength(style.marginBottom);
|
||||
writeLength(style.marginLeft);
|
||||
writeLength(style.marginRight);
|
||||
writeLength(style.paddingTop);
|
||||
writeLength(style.paddingBottom);
|
||||
writeLength(style.paddingLeft);
|
||||
writeLength(style.paddingRight);
|
||||
|
||||
// Write defined flags as uint16_t
|
||||
uint16_t definedBits = 0;
|
||||
if (style.defined.textAlign) definedBits |= 1 << 0;
|
||||
if (style.defined.fontStyle) definedBits |= 1 << 1;
|
||||
if (style.defined.fontWeight) definedBits |= 1 << 2;
|
||||
if (style.defined.textDecoration) definedBits |= 1 << 3;
|
||||
if (style.defined.textIndent) definedBits |= 1 << 4;
|
||||
if (style.defined.marginTop) definedBits |= 1 << 5;
|
||||
if (style.defined.marginBottom) definedBits |= 1 << 6;
|
||||
if (style.defined.marginLeft) definedBits |= 1 << 7;
|
||||
if (style.defined.marginRight) definedBits |= 1 << 8;
|
||||
if (style.defined.paddingTop) definedBits |= 1 << 9;
|
||||
if (style.defined.paddingBottom) definedBits |= 1 << 10;
|
||||
if (style.defined.paddingLeft) definedBits |= 1 << 11;
|
||||
if (style.defined.paddingRight) definedBits |= 1 << 12;
|
||||
file.write(reinterpret_cast<const uint8_t*>(&definedBits), sizeof(definedBits));
|
||||
}
|
||||
|
||||
Serial.printf("[%lu] [CSS] Saved %u rules to cache\n", millis(), ruleCount);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CssParser::loadFromCache(FsFile& file) {
|
||||
if (!file) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Clear existing rules
|
||||
clear();
|
||||
|
||||
// Read and verify version
|
||||
uint8_t version = 0;
|
||||
if (file.read(&version, 1) != 1 || version != CSS_CACHE_VERSION) {
|
||||
Serial.printf("[%lu] [CSS] Cache version mismatch (got %u, expected %u)\n", millis(), version, CSS_CACHE_VERSION);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read rule count
|
||||
uint16_t ruleCount = 0;
|
||||
if (file.read(&ruleCount, sizeof(ruleCount)) != sizeof(ruleCount)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read each rule
|
||||
for (uint16_t i = 0; i < ruleCount; ++i) {
|
||||
// Read selector string
|
||||
uint16_t selectorLen = 0;
|
||||
if (file.read(&selectorLen, sizeof(selectorLen)) != sizeof(selectorLen)) {
|
||||
rulesBySelector_.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string selector;
|
||||
selector.resize(selectorLen);
|
||||
if (file.read(&selector[0], selectorLen) != selectorLen) {
|
||||
rulesBySelector_.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read CssStyle fields
|
||||
CssStyle style;
|
||||
uint8_t enumVal;
|
||||
|
||||
if (file.read(&enumVal, 1) != 1) {
|
||||
rulesBySelector_.clear();
|
||||
return false;
|
||||
}
|
||||
style.textAlign = static_cast<CssTextAlign>(enumVal);
|
||||
|
||||
if (file.read(&enumVal, 1) != 1) {
|
||||
rulesBySelector_.clear();
|
||||
return false;
|
||||
}
|
||||
style.fontStyle = static_cast<CssFontStyle>(enumVal);
|
||||
|
||||
if (file.read(&enumVal, 1) != 1) {
|
||||
rulesBySelector_.clear();
|
||||
return false;
|
||||
}
|
||||
style.fontWeight = static_cast<CssFontWeight>(enumVal);
|
||||
|
||||
if (file.read(&enumVal, 1) != 1) {
|
||||
rulesBySelector_.clear();
|
||||
return false;
|
||||
}
|
||||
style.textDecoration = static_cast<CssTextDecoration>(enumVal);
|
||||
|
||||
// Read CssLength fields
|
||||
auto readLength = [&file](CssLength& len) -> bool {
|
||||
if (file.read(&len.value, sizeof(len.value)) != sizeof(len.value)) {
|
||||
return false;
|
||||
}
|
||||
uint8_t unitVal;
|
||||
if (file.read(&unitVal, 1) != 1) {
|
||||
return false;
|
||||
}
|
||||
len.unit = static_cast<CssUnit>(unitVal);
|
||||
return true;
|
||||
};
|
||||
|
||||
if (!readLength(style.textIndent) || !readLength(style.marginTop) || !readLength(style.marginBottom) ||
|
||||
!readLength(style.marginLeft) || !readLength(style.marginRight) || !readLength(style.paddingTop) ||
|
||||
!readLength(style.paddingBottom) || !readLength(style.paddingLeft) || !readLength(style.paddingRight)) {
|
||||
rulesBySelector_.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read defined flags
|
||||
uint16_t definedBits = 0;
|
||||
if (file.read(&definedBits, sizeof(definedBits)) != sizeof(definedBits)) {
|
||||
rulesBySelector_.clear();
|
||||
return false;
|
||||
}
|
||||
style.defined.textAlign = (definedBits & 1 << 0) != 0;
|
||||
style.defined.fontStyle = (definedBits & 1 << 1) != 0;
|
||||
style.defined.fontWeight = (definedBits & 1 << 2) != 0;
|
||||
style.defined.textDecoration = (definedBits & 1 << 3) != 0;
|
||||
style.defined.textIndent = (definedBits & 1 << 4) != 0;
|
||||
style.defined.marginTop = (definedBits & 1 << 5) != 0;
|
||||
style.defined.marginBottom = (definedBits & 1 << 6) != 0;
|
||||
style.defined.marginLeft = (definedBits & 1 << 7) != 0;
|
||||
style.defined.marginRight = (definedBits & 1 << 8) != 0;
|
||||
style.defined.paddingTop = (definedBits & 1 << 9) != 0;
|
||||
style.defined.paddingBottom = (definedBits & 1 << 10) != 0;
|
||||
style.defined.paddingLeft = (definedBits & 1 << 11) != 0;
|
||||
style.defined.paddingRight = (definedBits & 1 << 12) != 0;
|
||||
|
||||
rulesBySelector_[selector] = style;
|
||||
}
|
||||
|
||||
Serial.printf("[%lu] [CSS] Loaded %u rules from cache\n", millis(), ruleCount);
|
||||
return true;
|
||||
}
|
||||
114
lib/Epub/Epub/css/CssParser.h
Normal file
114
lib/Epub/Epub/css/CssParser.h
Normal file
@@ -0,0 +1,114 @@
|
||||
#pragma once
|
||||
|
||||
#include <SdFat.h>
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "CssStyle.h"
|
||||
|
||||
/**
|
||||
* Lightweight CSS parser for EPUB stylesheets
|
||||
*
|
||||
* Parses CSS files and extracts styling information relevant for e-ink display.
|
||||
* Uses a two-phase approach: first tokenizes the CSS content, then builds
|
||||
* a rule database that can be queried during HTML parsing.
|
||||
*
|
||||
* Supported selectors:
|
||||
* - Element selectors: p, div, h1, etc.
|
||||
* - Class selectors: .classname
|
||||
* - Combined: element.classname
|
||||
* - Grouped: selector1, selector2 { }
|
||||
*
|
||||
* Not supported (silently ignored):
|
||||
* - Descendant/child selectors
|
||||
* - Pseudo-classes and pseudo-elements
|
||||
* - Media queries (content is skipped)
|
||||
* - @import, @font-face, etc.
|
||||
*/
|
||||
class CssParser {
|
||||
public:
|
||||
CssParser() = default;
|
||||
~CssParser() = default;
|
||||
|
||||
// Non-copyable
|
||||
CssParser(const CssParser&) = delete;
|
||||
CssParser& operator=(const CssParser&) = delete;
|
||||
|
||||
/**
|
||||
* Load and parse CSS from a file stream.
|
||||
* Can be called multiple times to accumulate rules from multiple stylesheets.
|
||||
* @param source Open file handle to read from
|
||||
* @return true if parsing completed (even if no rules found)
|
||||
*/
|
||||
bool loadFromStream(FsFile& source);
|
||||
|
||||
/**
|
||||
* Look up the style for an HTML element, considering tag name and class attributes.
|
||||
* Applies CSS cascade: element style < class style < element.class style
|
||||
*
|
||||
* @param tagName The HTML element name (e.g., "p", "div")
|
||||
* @param classAttr The class attribute value (may contain multiple space-separated classes)
|
||||
* @return Combined style with all applicable rules merged
|
||||
*/
|
||||
[[nodiscard]] CssStyle resolveStyle(const std::string& tagName, const std::string& classAttr) const;
|
||||
|
||||
/**
|
||||
* Parse an inline style attribute string.
|
||||
* @param styleValue The value of a style="" attribute
|
||||
* @return Parsed style properties
|
||||
*/
|
||||
[[nodiscard]] static CssStyle parseInlineStyle(const std::string& styleValue);
|
||||
|
||||
/**
|
||||
* Check if any rules have been loaded
|
||||
*/
|
||||
[[nodiscard]] bool empty() const { return rulesBySelector_.empty(); }
|
||||
|
||||
/**
|
||||
* Get count of loaded rule sets
|
||||
*/
|
||||
[[nodiscard]] size_t ruleCount() const { return rulesBySelector_.size(); }
|
||||
|
||||
/**
|
||||
* Clear all loaded rules
|
||||
*/
|
||||
void clear() { rulesBySelector_.clear(); }
|
||||
|
||||
/**
|
||||
* Save parsed CSS rules to a cache file.
|
||||
* @param file Open file handle to write to
|
||||
* @return true if cache was written successfully
|
||||
*/
|
||||
bool saveToCache(FsFile& file) const;
|
||||
|
||||
/**
|
||||
* Load CSS rules from a cache file.
|
||||
* Clears any existing rules before loading.
|
||||
* @param file Open file handle to read from
|
||||
* @return true if cache was loaded successfully
|
||||
*/
|
||||
bool loadFromCache(FsFile& file);
|
||||
|
||||
private:
|
||||
// Storage: maps normalized selector -> style properties
|
||||
std::unordered_map<std::string, CssStyle> rulesBySelector_;
|
||||
|
||||
// Internal parsing helpers
|
||||
void processRuleBlock(const std::string& selectorGroup, const std::string& declarations);
|
||||
static CssStyle parseDeclarations(const std::string& declBlock);
|
||||
|
||||
// Individual property value parsers
|
||||
static CssTextAlign interpretAlignment(const std::string& val);
|
||||
static CssFontStyle interpretFontStyle(const std::string& val);
|
||||
static CssFontWeight interpretFontWeight(const std::string& val);
|
||||
static CssTextDecoration interpretDecoration(const std::string& val);
|
||||
static CssLength interpretLength(const std::string& val);
|
||||
static int8_t interpretSpacing(const std::string& val);
|
||||
|
||||
// String utilities
|
||||
static std::string normalized(const std::string& s);
|
||||
static std::vector<std::string> splitOnChar(const std::string& s, char delimiter);
|
||||
static std::vector<std::string> splitWhitespace(const std::string& s);
|
||||
};
|
||||
191
lib/Epub/Epub/css/CssStyle.h
Normal file
191
lib/Epub/Epub/css/CssStyle.h
Normal file
@@ -0,0 +1,191 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
// Matches order of PARAGRAPH_ALIGNMENT in CrossPointSettings
|
||||
enum class CssTextAlign : uint8_t { Justify = 0, Left = 1, Center = 2, Right = 3 };
|
||||
enum class CssUnit : uint8_t { Pixels = 0, Em = 1, Rem = 2, Points = 3 };
|
||||
|
||||
// Represents a CSS length value with its unit, allowing deferred resolution to pixels
|
||||
struct CssLength {
|
||||
float value = 0.0f;
|
||||
CssUnit unit = CssUnit::Pixels;
|
||||
|
||||
CssLength() = default;
|
||||
CssLength(const float v, const CssUnit u) : value(v), unit(u) {}
|
||||
|
||||
// Convenience constructor for pixel values (most common case)
|
||||
explicit CssLength(const float pixels) : value(pixels) {}
|
||||
|
||||
// Resolve to pixels given the current em size (font line height)
|
||||
[[nodiscard]] float toPixels(const float emSize) const {
|
||||
switch (unit) {
|
||||
case CssUnit::Em:
|
||||
case CssUnit::Rem:
|
||||
return value * emSize;
|
||||
case CssUnit::Points:
|
||||
return value * 1.33f; // Approximate pt to px conversion
|
||||
default:
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve to int16_t pixels (for BlockStyle fields)
|
||||
[[nodiscard]] int16_t toPixelsInt16(const float emSize) const { return static_cast<int16_t>(toPixels(emSize)); }
|
||||
};
|
||||
|
||||
// Font style options matching CSS font-style property
|
||||
enum class CssFontStyle : uint8_t { Normal = 0, Italic = 1 };
|
||||
|
||||
// Font weight options - CSS supports 100-900, we simplify to normal/bold
|
||||
enum class CssFontWeight : uint8_t { Normal = 0, Bold = 1 };
|
||||
|
||||
// Text decoration options
|
||||
enum class CssTextDecoration : uint8_t { None = 0, Underline = 1 };
|
||||
|
||||
// Bitmask for tracking which properties have been explicitly set
|
||||
struct CssPropertyFlags {
|
||||
uint16_t textAlign : 1;
|
||||
uint16_t fontStyle : 1;
|
||||
uint16_t fontWeight : 1;
|
||||
uint16_t textDecoration : 1;
|
||||
uint16_t textIndent : 1;
|
||||
uint16_t marginTop : 1;
|
||||
uint16_t marginBottom : 1;
|
||||
uint16_t marginLeft : 1;
|
||||
uint16_t marginRight : 1;
|
||||
uint16_t paddingTop : 1;
|
||||
uint16_t paddingBottom : 1;
|
||||
uint16_t paddingLeft : 1;
|
||||
uint16_t paddingRight : 1;
|
||||
|
||||
CssPropertyFlags()
|
||||
: textAlign(0),
|
||||
fontStyle(0),
|
||||
fontWeight(0),
|
||||
textDecoration(0),
|
||||
textIndent(0),
|
||||
marginTop(0),
|
||||
marginBottom(0),
|
||||
marginLeft(0),
|
||||
marginRight(0),
|
||||
paddingTop(0),
|
||||
paddingBottom(0),
|
||||
paddingLeft(0),
|
||||
paddingRight(0) {}
|
||||
|
||||
[[nodiscard]] bool anySet() const {
|
||||
return textAlign || fontStyle || fontWeight || textDecoration || textIndent || marginTop || marginBottom ||
|
||||
marginLeft || marginRight || paddingTop || paddingBottom || paddingLeft || paddingRight;
|
||||
}
|
||||
|
||||
void clearAll() {
|
||||
textAlign = fontStyle = fontWeight = textDecoration = textIndent = 0;
|
||||
marginTop = marginBottom = marginLeft = marginRight = 0;
|
||||
paddingTop = paddingBottom = paddingLeft = paddingRight = 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Represents a collection of CSS style properties
|
||||
// Only stores properties relevant to e-ink text rendering
|
||||
// Length values are stored as CssLength (value + unit) for deferred resolution
|
||||
struct CssStyle {
|
||||
CssTextAlign textAlign = CssTextAlign::Left;
|
||||
CssFontStyle fontStyle = CssFontStyle::Normal;
|
||||
CssFontWeight fontWeight = CssFontWeight::Normal;
|
||||
CssTextDecoration textDecoration = CssTextDecoration::None;
|
||||
|
||||
CssLength textIndent; // First-line indent (deferred resolution)
|
||||
CssLength marginTop; // Vertical spacing before block
|
||||
CssLength marginBottom; // Vertical spacing after block
|
||||
CssLength marginLeft; // Horizontal spacing left of block
|
||||
CssLength marginRight; // Horizontal spacing right of block
|
||||
CssLength paddingTop; // Padding before
|
||||
CssLength paddingBottom; // Padding after
|
||||
CssLength paddingLeft; // Padding left
|
||||
CssLength paddingRight; // Padding right
|
||||
|
||||
CssPropertyFlags defined; // Tracks which properties were explicitly set
|
||||
|
||||
// Apply properties from another style, only overwriting if the other style
|
||||
// has that property explicitly defined
|
||||
void applyOver(const CssStyle& base) {
|
||||
if (base.hasTextAlign()) {
|
||||
textAlign = base.textAlign;
|
||||
defined.textAlign = 1;
|
||||
}
|
||||
if (base.hasFontStyle()) {
|
||||
fontStyle = base.fontStyle;
|
||||
defined.fontStyle = 1;
|
||||
}
|
||||
if (base.hasFontWeight()) {
|
||||
fontWeight = base.fontWeight;
|
||||
defined.fontWeight = 1;
|
||||
}
|
||||
if (base.hasTextDecoration()) {
|
||||
textDecoration = base.textDecoration;
|
||||
defined.textDecoration = 1;
|
||||
}
|
||||
if (base.hasTextIndent()) {
|
||||
textIndent = base.textIndent;
|
||||
defined.textIndent = 1;
|
||||
}
|
||||
if (base.hasMarginTop()) {
|
||||
marginTop = base.marginTop;
|
||||
defined.marginTop = 1;
|
||||
}
|
||||
if (base.hasMarginBottom()) {
|
||||
marginBottom = base.marginBottom;
|
||||
defined.marginBottom = 1;
|
||||
}
|
||||
if (base.hasMarginLeft()) {
|
||||
marginLeft = base.marginLeft;
|
||||
defined.marginLeft = 1;
|
||||
}
|
||||
if (base.hasMarginRight()) {
|
||||
marginRight = base.marginRight;
|
||||
defined.marginRight = 1;
|
||||
}
|
||||
if (base.hasPaddingTop()) {
|
||||
paddingTop = base.paddingTop;
|
||||
defined.paddingTop = 1;
|
||||
}
|
||||
if (base.hasPaddingBottom()) {
|
||||
paddingBottom = base.paddingBottom;
|
||||
defined.paddingBottom = 1;
|
||||
}
|
||||
if (base.hasPaddingLeft()) {
|
||||
paddingLeft = base.paddingLeft;
|
||||
defined.paddingLeft = 1;
|
||||
}
|
||||
if (base.hasPaddingRight()) {
|
||||
paddingRight = base.paddingRight;
|
||||
defined.paddingRight = 1;
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] bool hasTextAlign() const { return defined.textAlign; }
|
||||
[[nodiscard]] bool hasFontStyle() const { return defined.fontStyle; }
|
||||
[[nodiscard]] bool hasFontWeight() const { return defined.fontWeight; }
|
||||
[[nodiscard]] bool hasTextDecoration() const { return defined.textDecoration; }
|
||||
[[nodiscard]] bool hasTextIndent() const { return defined.textIndent; }
|
||||
[[nodiscard]] bool hasMarginTop() const { return defined.marginTop; }
|
||||
[[nodiscard]] bool hasMarginBottom() const { return defined.marginBottom; }
|
||||
[[nodiscard]] bool hasMarginLeft() const { return defined.marginLeft; }
|
||||
[[nodiscard]] bool hasMarginRight() const { return defined.marginRight; }
|
||||
[[nodiscard]] bool hasPaddingTop() const { return defined.paddingTop; }
|
||||
[[nodiscard]] bool hasPaddingBottom() const { return defined.paddingBottom; }
|
||||
[[nodiscard]] bool hasPaddingLeft() const { return defined.paddingLeft; }
|
||||
[[nodiscard]] bool hasPaddingRight() const { return defined.paddingRight; }
|
||||
|
||||
void reset() {
|
||||
textAlign = CssTextAlign::Left;
|
||||
fontStyle = CssFontStyle::Normal;
|
||||
fontWeight = CssFontWeight::Normal;
|
||||
textDecoration = CssTextDecoration::None;
|
||||
textIndent = CssLength{};
|
||||
marginTop = marginBottom = marginLeft = marginRight = CssLength{};
|
||||
paddingTop = paddingBottom = paddingLeft = paddingRight = CssLength{};
|
||||
defined.clearAll();
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user