crosspoint-reader/lib/StarDict/DictHtmlParser.h
2026-01-22 12:42:01 -05:00

65 lines
2.0 KiB
C++

#pragma once
#include <Epub/blocks/TextBlock.h>
#include <functional>
#include <memory>
#include <string>
class GfxRenderer;
/**
* DictHtmlParser parses HTML dictionary definitions into ParsedText.
*
* Supports:
* - Bold: <b>, <strong>
* - Italic: <i>, <em>
* - Underline: <u>, <ins>
* - Lists: <ol>, <li> with numbering/bullets
* - Block elements: <p>, <br>, <hr>, </html> (entry separator)
* - HTML entities: numeric (&#NNN;, &#xHHH;) and named (&amp;, etc.)
* - Superscript: <sup> rendered as ^text
*/
class DictHtmlParser {
public:
/**
* Parse HTML definition and populate ParsedText with styled words.
* Each paragraph/block creates a separate ParsedText via the callback.
*
* @param html The HTML definition text
* @param fontId Font ID for text width calculations
* @param renderer Reference to renderer for layout
* @param onParagraph Callback invoked for each paragraph/block of text
*/
static void parse(const std::string& html, int fontId, const GfxRenderer& renderer, uint16_t viewportWidth,
const std::function<void(std::shared_ptr<TextBlock>)>& onTextBlock);
private:
// Decode HTML entity at position i (starting with '&')
static std::string decodeEntity(const std::string& html, size_t& i);
// Extract tag name from position (after '<')
static std::string extractTagName(const std::string& html, size_t start, bool& isClosing);
// Check if tag is a block-level element
static bool isBlockTag(const std::string& tagName);
// Check if tag starts/ends bold
static bool isBoldTag(const std::string& tagName);
// Check if tag starts/ends italic
static bool isItalicTag(const std::string& tagName);
// Check if tag starts/ends underline
static bool isUnderlineTag(const std::string& tagName);
// Check if tag is superscript
static bool isSuperscriptTag(const std::string& tagName);
// Check if tag is list item
static bool isListItemTag(const std::string& tagName);
// Check if tag starts ordered list
static bool isOrderedListTag(const std::string& tagName);
};