## Summary * **What is the goal of this PR?** - Adds basic CSS parsing to EPUBs and determine the CSS rules when rendering to the screen so that text is styled correctly. Currently supports bold, underline, italics, margin, padding, and text alignment ## Additional Context - My main reason for wanting this is that the book I'm currently reading, Carl's Doomsday Scenario (2nd in the Dungeon Crawler Carl series), relies _a lot_ on styled text for telling parts of the story. When text is bolded, it's supposed to be a message that's rendered "on-screen" in the story. When characters are "chatting" with each other, the text is bolded and their names are underlined. Plus, normal emphasis is provided with italicizing words here and there. So, this greatly improves my experience reading this book on the Xteink, and I figured it was useful enough for others too. - For transparency: I'm a software engineer, but I'm mostly frontend and TypeScript/JavaScript. It's been _years_ since I did any C/C++, so I would not be surprised if I'm doing something dumb along the way in this code. Please don't hesitate to ask for changes if something looks off. I heavily relied on Claude Code for help, and I had a lot of inspiration from how [microreader](https://github.com/CidVonHighwind/microreader) achieves their CSS parsing and styling. I did give this as good of a code review as I could and went through everything, and _it works on my machine_ 😄 ### Before   ### After   --- ### AI Usage Did you use AI tools to help write this code? **YES**, Claude Code
79 lines
2.1 KiB
C++
79 lines
2.1 KiB
C++
#pragma once
|
|
#include <Print.h>
|
|
|
|
#include <algorithm>
|
|
#include <vector>
|
|
|
|
#include "Epub.h"
|
|
#include "expat.h"
|
|
|
|
class BookMetadataCache;
|
|
|
|
class ContentOpfParser final : public Print {
|
|
enum ParserState {
|
|
START,
|
|
IN_PACKAGE,
|
|
IN_METADATA,
|
|
IN_BOOK_TITLE,
|
|
IN_BOOK_AUTHOR,
|
|
IN_BOOK_LANGUAGE,
|
|
IN_MANIFEST,
|
|
IN_SPINE,
|
|
IN_GUIDE,
|
|
};
|
|
|
|
const std::string& cachePath;
|
|
const std::string& baseContentPath;
|
|
size_t remainingSize;
|
|
XML_Parser parser = nullptr;
|
|
ParserState state = START;
|
|
BookMetadataCache* cache;
|
|
FsFile tempItemStore;
|
|
std::string coverItemId;
|
|
|
|
// Index for fast idref→href lookup (used only for large EPUBs)
|
|
struct ItemIndexEntry {
|
|
uint32_t idHash; // FNV-1a hash of itemId
|
|
uint16_t idLen; // length for collision reduction
|
|
uint32_t fileOffset; // offset in .items.bin
|
|
};
|
|
std::vector<ItemIndexEntry> itemIndex;
|
|
bool useItemIndex = false;
|
|
|
|
static constexpr uint16_t LARGE_SPINE_THRESHOLD = 400;
|
|
|
|
// FNV-1a hash function
|
|
static uint32_t fnvHash(const std::string& s) {
|
|
uint32_t hash = 2166136261u;
|
|
for (char c : s) {
|
|
hash ^= static_cast<uint8_t>(c);
|
|
hash *= 16777619u;
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
|
|
static void characterData(void* userData, const XML_Char* s, int len);
|
|
static void endElement(void* userData, const XML_Char* name);
|
|
|
|
public:
|
|
std::string title;
|
|
std::string author;
|
|
std::string language;
|
|
std::string tocNcxPath;
|
|
std::string tocNavPath; // EPUB 3 nav document path
|
|
std::string coverItemHref;
|
|
std::string textReferenceHref;
|
|
std::vector<std::string> cssFiles; // CSS stylesheet paths
|
|
|
|
explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize,
|
|
BookMetadataCache* cache)
|
|
: cachePath(cachePath), baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {}
|
|
~ContentOpfParser() override;
|
|
|
|
bool setup();
|
|
|
|
size_t write(uint8_t) override;
|
|
size_t write(const uint8_t* buffer, size_t size) override;
|
|
};
|