sort of working dictionary

2026-01-22 12:42:01 -05:00
parent ff22a82563
commit 9493fb1f18
24 changed files with 2887 additions and 11 deletions
--- a/lib/Epub/Epub/Page.h
+++ b/lib/Epub/Epub/Page.h
@@ -31,6 +31,9 @@ class PageLine final : public PageElement {
  void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) override;
  bool serialize(FsFile& file) override;
  static std::unique_ptr<PageLine> deserialize(FsFile& file);
+
+  // Getter for word selection support
+  const std::shared_ptr<TextBlock>& getTextBlock() const { return block; }
 };

 class Page {
--- a/lib/Epub/Epub/blocks/TextBlock.h
+++ b/lib/Epub/Epub/blocks/TextBlock.h
@@ -48,6 +48,12 @@ class TextBlock final : public Block {
  Style getStyle() const { return style; }
  const BlockStyle& getBlockStyle() const { return blockStyle; }
  bool isEmpty() override { return words.empty(); }
+
+  // Getters for word selection support
+  const std::list<std::string>& getWords() const { return words; }
+  const std::list<uint16_t>& getWordXPositions() const { return wordXpos; }
+  const std::list<EpdFontFamily::Style>& getWordStyles() const { return wordStyles; }
+  size_t getWordCount() const { return words.size(); }
  void layout(GfxRenderer& renderer) override {};
  // given a renderer works out where to break the words into lines
  void render(const GfxRenderer& renderer, int fontId, int x, int y) const;
--- a/lib/GfxRenderer/GfxRenderer.cpp
+++ b/lib/GfxRenderer/GfxRenderer.cpp
@@ -510,7 +510,10 @@ void GfxRenderer::drawButtonHints(const int fontId, const char* btn1, const char
  setOrientation(orig_orientation);
 }

-void GfxRenderer::drawSideButtonHints(const int fontId, const char* topBtn, const char* bottomBtn) const {
+void GfxRenderer::drawSideButtonHints(const int fontId, const char* topBtn, const char* bottomBtn) {
+  const Orientation orig_orientation = getOrientation();
+  setOrientation(Orientation::Portrait);
+
  const int screenWidth = getScreenWidth();
  constexpr int buttonWidth = 40;   // Width on screen (height when rotated)
  constexpr int buttonHeight = 80;  // Height on screen (width when rotated)
@@ -559,6 +562,8 @@ void GfxRenderer::drawSideButtonHints(const int fontId, const char* topBtn, cons
      drawTextRotated90CW(fontId, textX, textY, labels[i]);
    }
  }
+
+  setOrientation(orig_orientation);
 }

 int GfxRenderer::getTextHeight(const int fontId) const {
@@ -862,3 +867,4 @@ void GfxRenderer::getOrientedViewableTRBL(int* outTop, int* outRight, int* outBo
      break;
  }
 }
+
--- a/lib/GfxRenderer/GfxRenderer.h
+++ b/lib/GfxRenderer/GfxRenderer.h
@@ -86,7 +86,7 @@ class GfxRenderer {

  // UI Components
  void drawButtonHints(int fontId, const char* btn1, const char* btn2, const char* btn3, const char* btn4);
-  void drawSideButtonHints(int fontId, const char* topBtn, const char* bottomBtn) const;
+  void drawSideButtonHints(int fontId, const char* topBtn, const char* bottomBtn);

 private:
  // Helper for drawing rotated text (90 degrees clockwise, for side buttons)
--- a/lib/StarDict/DictHtmlParser.cpp
+++ b/lib/StarDict/DictHtmlParser.cpp
@@ -0,0 +1,370 @@
+#include "DictHtmlParser.h"
+
+#include <Epub/ParsedText.h>
+#include <GfxRenderer.h>
+
+#include <algorithm>
+#include <cctype>
+#include <stack>
+
+std::string DictHtmlParser::decodeEntity(const std::string& html, size_t& i) {
+  const size_t start = i;  // Position of '&'
+  const size_t remaining = html.length() - start;
+
+  // Numeric entities: &#NNN; or &#xHHH;
+  if (remaining > 2 && html[start + 1] == '#') {
+    size_t numStart = start + 2;
+    bool isHex = false;
+    if (remaining > 3 && (html[numStart] == 'x' || html[numStart] == 'X')) {
+      isHex = true;
+      numStart++;
+    }
+
+    size_t numEnd = numStart;
+    while (numEnd < html.length() && html[numEnd] != ';') {
+      const char c = html[numEnd];
+      if (isHex) {
+        if (!std::isxdigit(static_cast<unsigned char>(c))) break;
+      } else {
+        if (!std::isdigit(static_cast<unsigned char>(c))) break;
+      }
+      numEnd++;
+    }
+
+    if (numEnd > numStart && numEnd < html.length() && html[numEnd] == ';') {
+      const std::string numStr = html.substr(numStart, numEnd - numStart);
+      unsigned long codepoint = std::strtoul(numStr.c_str(), nullptr, isHex ? 16 : 10);
+      i = numEnd;  // Will be incremented by caller's loop
+
+      // Convert codepoint to UTF-8
+      std::string utf8;
+      if (codepoint < 0x80) {
+        utf8 += static_cast<char>(codepoint);
+      } else if (codepoint < 0x800) {
+        utf8 += static_cast<char>(0xC0 | (codepoint >> 6));
+        utf8 += static_cast<char>(0x80 | (codepoint & 0x3F));
+      } else if (codepoint < 0x10000) {
+        utf8 += static_cast<char>(0xE0 | (codepoint >> 12));
+        utf8 += static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F));
+        utf8 += static_cast<char>(0x80 | (codepoint & 0x3F));
+      } else if (codepoint < 0x110000) {
+        utf8 += static_cast<char>(0xF0 | (codepoint >> 18));
+        utf8 += static_cast<char>(0x80 | ((codepoint >> 12) & 0x3F));
+        utf8 += static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F));
+        utf8 += static_cast<char>(0x80 | (codepoint & 0x3F));
+      }
+      return utf8;
+    }
+  }
+
+  // Named entities - find the semicolon first
+  size_t semicolon = html.find(';', start + 1);
+  if (semicolon != std::string::npos && semicolon - start < 12) {
+    const std::string entity = html.substr(start, semicolon - start + 1);
+
+    // Common named entities
+    struct EntityMapping {
+      const char* entity;
+      const char* replacement;
+    };
+    static const EntityMapping entities[] = {
+        {"&nbsp;", " "},
+        {"&lt;", "<"},
+        {"&gt;", ">"},
+        {"&amp;", "&"},
+        {"&quot;", "\""},
+        {"&apos;", "'"},
+        {"&mdash;", "\xe2\x80\x94"},   // —
+        {"&ndash;", "\xe2\x80\x93"},   // –
+        {"&hellip;", "\xe2\x80\xa6"},  // …
+        {"&rsquo;", "\xe2\x80\x99"},   // '
+        {"&lsquo;", "\xe2\x80\x98"},   // '
+        {"&rdquo;", "\xe2\x80\x9d"},   // "
+        {"&ldquo;", "\xe2\x80\x9c"},   // "
+        {"&deg;", "\xc2\xb0"},         // °
+        {"&times;", "\xc3\x97"},       // ×
+        {"&divide;", "\xc3\xb7"},      // ÷
+        {"&plusmn;", "\xc2\xb1"},      // ±
+        {"&frac12;", "\xc2\xbd"},      // ½
+        {"&frac14;", "\xc2\xbc"},      // ¼
+        {"&frac34;", "\xc2\xbe"},      // ¾
+        {"&cent;", "\xc2\xa2"},        // ¢
+        {"&pound;", "\xc2\xa3"},       // £
+        {"&euro;", "\xe2\x82\xac"},    // €
+        {"&yen;", "\xc2\xa5"},         // ¥
+        {"&copy;", "\xc2\xa9"},        // ©
+        {"&reg;", "\xc2\xae"},         // ®
+        {"&trade;", "\xe2\x84\xa2"},   // ™
+        {"&bull;", "\xe2\x80\xa2"},    // •
+        {"&middot;", "\xc2\xb7"},      // ·
+        {"&sect;", "\xc2\xa7"},        // §
+        {"&para;", "\xc2\xb6"},        // ¶
+        {"&dagger;", "\xe2\x80\xa0"},  // †
+        {"&Dagger;", "\xe2\x80\xa1"},  // ‡
+        {"&iexcl;", "\xc2\xa1"},       // ¡
+        {"&iquest;", "\xc2\xbf"},      // ¿
+        {"&laquo;", "\xc2\xab"},       // «
+        {"&raquo;", "\xc2\xbb"},       // »
+        {"&lrm;", ""},                 // Left-to-right mark (invisible)
+        {"&rlm;", ""},                 // Right-to-left mark (invisible)
+        {"&shy;", ""},                 // Soft hyphen
+        {"&ensp;", " "},
+        {"&emsp;", " "},
+        {"&thinsp;", " "},
+        {"&zwj;", ""},
+        {"&zwnj;", ""},
+    };
+
+    for (const auto& mapping : entities) {
+      if (entity == mapping.entity) {
+        i = semicolon;  // Will be incremented by caller's loop
+        return mapping.replacement;
+      }
+    }
+  }
+
+  // Unknown entity - return just the ampersand
+  return "&";
+}
+
+std::string DictHtmlParser::extractTagName(const std::string& html, size_t start, bool& isClosing) {
+  isClosing = false;
+  size_t pos = start;
+
+  // Skip whitespace after '<'
+  while (pos < html.length() && std::isspace(static_cast<unsigned char>(html[pos]))) {
+    pos++;
+  }
+
+  // Check for closing tag
+  if (pos < html.length() && html[pos] == '/') {
+    isClosing = true;
+    pos++;
+  }
+
+  // Extract tag name (alphanumeric characters)
+  size_t nameStart = pos;
+  while (pos < html.length() && (std::isalnum(static_cast<unsigned char>(html[pos])) || html[pos] == '!')) {
+    pos++;
+  }
+
+  std::string tagName = html.substr(nameStart, pos - nameStart);
+  // Convert to lowercase
+  std::transform(tagName.begin(), tagName.end(), tagName.begin(),
+                 [](unsigned char c) { return std::tolower(c); });
+  return tagName;
+}
+
+bool DictHtmlParser::isBlockTag(const std::string& tagName) {
+  return tagName == "p" || tagName == "div" || tagName == "br" || tagName == "hr" || tagName == "li" ||
+         tagName == "ol" || tagName == "ul" || tagName == "dt" || tagName == "dd" || tagName == "html";
+}
+
+bool DictHtmlParser::isBoldTag(const std::string& tagName) {
+  return tagName == "b" || tagName == "strong";
+}
+
+bool DictHtmlParser::isItalicTag(const std::string& tagName) {
+  return tagName == "i" || tagName == "em";
+}
+
+bool DictHtmlParser::isUnderlineTag(const std::string& tagName) {
+  return tagName == "u" || tagName == "ins";
+}
+
+bool DictHtmlParser::isSuperscriptTag(const std::string& tagName) { return tagName == "sup"; }
+
+bool DictHtmlParser::isListItemTag(const std::string& tagName) { return tagName == "li"; }
+
+bool DictHtmlParser::isOrderedListTag(const std::string& tagName) { return tagName == "ol"; }
+
+void DictHtmlParser::parse(const std::string& html, int fontId, const GfxRenderer& renderer, uint16_t viewportWidth,
+                           const std::function<void(std::shared_ptr<TextBlock>)>& onTextBlock) {
+  // Current paragraph being built
+  ParsedText currentParagraph(TextBlock::Style::LEFT_ALIGN, false, false);
+
+  // State tracking
+  int boldDepth = 0;
+  int italicDepth = 0;
+  int underlineDepth = 0;
+  bool inSuperscript = false;
+  bool inTag = false;
+
+  // List tracking
+  std::stack<int> listCounters;  // Stack for nested lists (0 = unordered, >0 = ordered counter)
+
+  // Current word being accumulated
+  std::string currentWord;
+  bool lastWasSpace = true;  // Start true to skip leading spaces
+
+  // Helper to flush current word to paragraph
+  auto flushWord = [&]() {
+    if (currentWord.empty()) return;
+
+    // Determine font style
+    EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
+    if (boldDepth > 0 && italicDepth > 0) {
+      fontStyle = EpdFontFamily::BOLD_ITALIC;
+    } else if (boldDepth > 0) {
+      fontStyle = EpdFontFamily::BOLD;
+    } else if (italicDepth > 0) {
+      fontStyle = EpdFontFamily::ITALIC;
+    }
+
+    currentParagraph.addWord(currentWord, fontStyle, underlineDepth > 0);
+    currentWord.clear();
+    lastWasSpace = false;
+  };
+
+  // Helper to flush current paragraph (create TextBlocks)
+  auto flushParagraph = [&]() {
+    flushWord();
+    if (!currentParagraph.isEmpty()) {
+      currentParagraph.layoutAndExtractLines(renderer, fontId, viewportWidth, onTextBlock);
+      currentParagraph = ParsedText(TextBlock::Style::LEFT_ALIGN, false, false);
+    }
+    lastWasSpace = true;
+  };
+
+  // Parse the HTML
+  for (size_t i = 0; i < html.length(); i++) {
+    const char c = html[i];
+
+    if (c == '<') {
+      // Start of tag - flush current word first
+      flushWord();
+
+      // Find end of tag
+      size_t tagEnd = html.find('>', i);
+      if (tagEnd == std::string::npos) {
+        // Malformed HTML - treat rest as text
+        currentWord += c;
+        continue;
+      }
+
+      // Extract tag name
+      bool isClosing = false;
+      std::string tagName = extractTagName(html, i + 1, isClosing);
+
+      // Handle different tag types
+      if (isBoldTag(tagName)) {
+        if (isClosing) {
+          boldDepth = std::max(0, boldDepth - 1);
+        } else {
+          boldDepth++;
+        }
+      } else if (isItalicTag(tagName)) {
+        if (isClosing) {
+          italicDepth = std::max(0, italicDepth - 1);
+        } else {
+          italicDepth++;
+        }
+      } else if (isUnderlineTag(tagName)) {
+        if (isClosing) {
+          underlineDepth = std::max(0, underlineDepth - 1);
+        } else {
+          underlineDepth++;
+        }
+      } else if (isSuperscriptTag(tagName)) {
+        if (isClosing) {
+          inSuperscript = false;
+        } else {
+          inSuperscript = true;
+          // Add caret prefix for superscript
+          currentWord += '^';
+        }
+      } else if (isOrderedListTag(tagName)) {
+        if (isClosing) {
+          if (!listCounters.empty()) {
+            listCounters.pop();
+          }
+        } else {
+          // Check if it's an unordered list style
+          std::string tagContent = html.substr(i, tagEnd - i);
+          if (tagContent.find("list-style-type:lower-alpha") != std::string::npos) {
+            listCounters.push(-1);  // -1 = alphabetic
+          } else {
+            listCounters.push(1);  // Start at 1 for ordered
+          }
+        }
+      } else if (tagName == "ul") {
+        if (isClosing) {
+          if (!listCounters.empty()) {
+            listCounters.pop();
+          }
+        } else {
+          listCounters.push(0);  // 0 = unordered (bullet)
+        }
+      } else if (isListItemTag(tagName) && !isClosing) {
+        // Start of list item - flush paragraph and add bullet/number
+        flushParagraph();
+
+        std::string prefix;
+        if (!listCounters.empty()) {
+          int counter = listCounters.top();
+          if (counter == 0) {
+            // Unordered - bullet point
+            prefix = "\xe2\x80\xa2 ";  // • bullet
+          } else if (counter == -1) {
+            // Alphabetic - not fully supported, just use bullet
+            prefix = "  ";
+          } else {
+            // Ordered - number
+            char numBuf[8];
+            snprintf(numBuf, sizeof(numBuf), "%d. ", counter);
+            prefix = numBuf;
+            listCounters.pop();
+            listCounters.push(counter + 1);  // Increment for next item
+          }
+        } else {
+          // No list context - just indent
+          prefix = "\xe2\x80\xa2 ";  // • bullet
+        }
+
+        // Add prefix as a word (em-space for indent + prefix)
+        currentParagraph.addWord("\xe2\x80\x83" + prefix, EpdFontFamily::REGULAR, false);
+        lastWasSpace = true;
+      } else if (isBlockTag(tagName)) {
+        // Block element - flush paragraph
+        flushParagraph();
+
+        // Special handling for </html> which separates dictionary entries
+        if (tagName == "html" && isClosing) {
+          // Add extra spacing between entries
+          flushParagraph();
+        }
+      }
+
+      // Skip to end of tag
+      i = tagEnd;
+    } else if (c == '&') {
+      // HTML entity
+      std::string decoded = decodeEntity(html, i);
+      if (!decoded.empty()) {
+        if (decoded == " ") {
+          // Space entity - treat as space
+          if (!lastWasSpace) {
+            flushWord();
+            lastWasSpace = true;
+          }
+        } else {
+          currentWord += decoded;
+          lastWasSpace = false;
+        }
+      }
+    } else if (std::isspace(static_cast<unsigned char>(c))) {
+      // Whitespace - flush word and collapse
+      if (!lastWasSpace) {
+        flushWord();
+        lastWasSpace = true;
+      }
+    } else {
+      // Regular character
+      currentWord += c;
+      lastWasSpace = false;
+    }
+  }
+
+  // Flush any remaining content
+  flushParagraph();
+}
--- a/lib/StarDict/DictHtmlParser.h
+++ b/lib/StarDict/DictHtmlParser.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <Epub/blocks/TextBlock.h>
+
+#include <functional>
+#include <memory>
+#include <string>
+
+class GfxRenderer;
+
+/**
+ * DictHtmlParser parses HTML dictionary definitions into ParsedText.
+ * 
+ * Supports:
+ * - Bold: <b>, <strong>
+ * - Italic: <i>, <em>
+ * - Underline: <u>, <ins>
+ * - Lists: <ol>, <li> with numbering/bullets
+ * - Block elements: <p>, <br>, <hr>, </html> (entry separator)
+ * - HTML entities: numeric (&#NNN;, &#xHHH;) and named (&amp;, etc.)
+ * - Superscript: <sup> rendered as ^text
+ */
+class DictHtmlParser {
+ public:
+  /**
+   * Parse HTML definition and populate ParsedText with styled words.
+   * Each paragraph/block creates a separate ParsedText via the callback.
+   * 
+   * @param html The HTML definition text
+   * @param fontId Font ID for text width calculations
+   * @param renderer Reference to renderer for layout
+   * @param onParagraph Callback invoked for each paragraph/block of text
+   */
+  static void parse(const std::string& html, int fontId, const GfxRenderer& renderer, uint16_t viewportWidth,
+                    const std::function<void(std::shared_ptr<TextBlock>)>& onTextBlock);
+
+ private:
+  // Decode HTML entity at position i (starting with '&')
+  static std::string decodeEntity(const std::string& html, size_t& i);
+
+  // Extract tag name from position (after '<')
+  static std::string extractTagName(const std::string& html, size_t start, bool& isClosing);
+
+  // Check if tag is a block-level element
+  static bool isBlockTag(const std::string& tagName);
+
+  // Check if tag starts/ends bold
+  static bool isBoldTag(const std::string& tagName);
+
+  // Check if tag starts/ends italic
+  static bool isItalicTag(const std::string& tagName);
+
+  // Check if tag starts/ends underline
+  static bool isUnderlineTag(const std::string& tagName);
+
+  // Check if tag is superscript
+  static bool isSuperscriptTag(const std::string& tagName);
+
+  // Check if tag is list item
+  static bool isListItemTag(const std::string& tagName);
+
+  // Check if tag starts ordered list
+  static bool isOrderedListTag(const std::string& tagName);
+};
--- a/lib/StarDict/StarDict.cpp
+++ b/lib/StarDict/StarDict.cpp
@@ -0,0 +1,759 @@
+#include "StarDict.h"
+
+#include <HardwareSerial.h>
+#include <SDCardManager.h>
+#include <miniz.h>
+
+#include <algorithm>
+#include <cctype>
+
+#include "DictPrefixIndex.generated.h"
+
+StarDict::StarDict(const std::string& basePath) : basePath(basePath) {}
+
+StarDict::~StarDict() {
+  if (dzInfo.chunkSizes) {
+    free(dzInfo.chunkSizes);
+    dzInfo.chunkSizes = nullptr;
+  }
+}
+
+uint32_t StarDict::readBE32(const uint8_t* data) {
+  return (static_cast<uint32_t>(data[0]) << 24) | (static_cast<uint32_t>(data[1]) << 16) |
+         (static_cast<uint32_t>(data[2]) << 8) | static_cast<uint32_t>(data[3]);
+}
+
+bool StarDict::loadInfo() {
+  const std::string ifoPath = basePath + ".ifo";
+  FsFile file;
+  if (!SdMan.openFileForRead("DICT", ifoPath, file)) {
+    Serial.printf("[%lu] [DICT] Failed to open .ifo file: %s\n", millis(), ifoPath.c_str());
+    return false;
+  }
+
+  char buffer[256];
+  while (file.available()) {
+    const int len = file.fgets(buffer, sizeof(buffer));
+    if (len <= 0) break;
+
+    // Remove newline
+    char* newline = strchr(buffer, '\n');
+    if (newline) *newline = '\0';
+    newline = strchr(buffer, '\r');
+    if (newline) *newline = '\0';
+
+    // Parse key=value
+    char* eq = strchr(buffer, '=');
+    if (!eq) continue;
+
+    *eq = '\0';
+    const char* key = buffer;
+    const char* value = eq + 1;
+
+    if (strcmp(key, "bookname") == 0) {
+      info.bookname = value;
+    } else if (strcmp(key, "wordcount") == 0) {
+      info.wordcount = strtoul(value, nullptr, 10);
+    } else if (strcmp(key, "idxfilesize") == 0) {
+      info.idxfilesize = strtoul(value, nullptr, 10);
+    } else if (strcmp(key, "sametypesequence") == 0) {
+      info.sametypesequence = value[0];
+    } else if (strcmp(key, "synwordcount") == 0) {
+      info.synwordcount = strtoul(value, nullptr, 10);
+    }
+  }
+
+  file.close();
+  info.loaded = true;
+
+  Serial.printf("[%lu] [DICT] Loaded dictionary: %s (%u words)\n", millis(), info.bookname.c_str(), info.wordcount);
+  return true;
+}
+
+bool StarDict::loadDictzipHeader() {
+  if (dzInfo.loaded) return true;
+
+  const std::string dzPath = basePath + ".dict.dz";
+  FsFile file;
+  if (!SdMan.openFileForRead("DICT", dzPath, file)) {
+    Serial.printf("[%lu] [DICT] Failed to open .dict.dz file\n", millis());
+    return false;
+  }
+
+  // Read gzip header
+  uint8_t header[10];
+  if (file.read(header, 10) != 10) {
+    file.close();
+    return false;
+  }
+
+  // Verify gzip magic number
+  if (header[0] != 0x1f || header[1] != 0x8b) {
+    Serial.printf("[%lu] [DICT] Not a valid gzip file\n", millis());
+    file.close();
+    return false;
+  }
+
+  // Check for extra field flag (bit 2)
+  const uint8_t flags = header[3];
+  if (!(flags & 0x04)) {
+    Serial.printf("[%lu] [DICT] No extra field - not a dictzip file\n", millis());
+    file.close();
+    return false;
+  }
+
+  // Read extra field length
+  uint8_t xlenBuf[2];
+  if (file.read(xlenBuf, 2) != 2) {
+    file.close();
+    return false;
+  }
+  const uint16_t xlen = xlenBuf[0] | (xlenBuf[1] << 8);
+
+  // Read extra field
+  auto* extraField = static_cast<uint8_t*>(malloc(xlen));
+  if (!extraField) {
+    file.close();
+    return false;
+  }
+
+  if (file.read(extraField, xlen) != xlen) {
+    free(extraField);
+    file.close();
+    return false;
+  }
+
+  // Parse dictzip subfield (SI1='R', SI2='A')
+  bool foundDictzip = false;
+  uint16_t pos = 0;
+  while (pos + 4 <= xlen) {
+    const uint8_t si1 = extraField[pos];
+    const uint8_t si2 = extraField[pos + 1];
+    const uint16_t slen = extraField[pos + 2] | (extraField[pos + 3] << 8);
+
+    if (si1 == 'R' && si2 == 'A' && pos + 4 + slen <= xlen) {
+      // Dictzip subfield found
+      // Format: ver(2) + chlen(2) + count(2) + sizes[count](2 each)
+      const uint8_t* data = &extraField[pos + 4];
+      // uint16_t version = data[0] | (data[1] << 8);  // Usually 1
+      dzInfo.chunkLength = data[2] | (data[3] << 8);
+      dzInfo.chunkCount = data[4] | (data[5] << 8);
+
+      dzInfo.chunkSizes = static_cast<uint16_t*>(malloc(dzInfo.chunkCount * sizeof(uint16_t)));
+      if (!dzInfo.chunkSizes) {
+        free(extraField);
+        file.close();
+        return false;
+      }
+
+      for (uint16_t i = 0; i < dzInfo.chunkCount; i++) {
+        dzInfo.chunkSizes[i] = data[6 + i * 2] | (data[7 + i * 2] << 8);
+      }
+
+      foundDictzip = true;
+      break;
+    }
+
+    pos += 4 + slen;
+  }
+
+  free(extraField);
+
+  if (!foundDictzip) {
+    Serial.printf("[%lu] [DICT] Dictzip subfield not found\n", millis());
+    file.close();
+    return false;
+  }
+
+  // Calculate header size (10 + 2 + xlen + optional fields)
+  dzInfo.headerSize = 10 + 2 + xlen;
+
+  // Skip FNAME if present (bit 3)
+  if (flags & 0x08) {
+    file.seek(dzInfo.headerSize);
+    while (file.available()) {
+      uint8_t c;
+      file.read(&c, 1);
+      dzInfo.headerSize++;
+      if (c == 0) break;
+    }
+  }
+
+  // Skip FCOMMENT if present (bit 4)
+  if (flags & 0x10) {
+    file.seek(dzInfo.headerSize);
+    while (file.available()) {
+      uint8_t c;
+      file.read(&c, 1);
+      dzInfo.headerSize++;
+      if (c == 0) break;
+    }
+  }
+
+  // Skip FHCRC if present (bit 1)
+  if (flags & 0x02) {
+    dzInfo.headerSize += 2;
+  }
+
+  file.close();
+  dzInfo.loaded = true;
+
+  Serial.printf("[%lu] [DICT] Dictzip: %u chunks of %u bytes, header size %u\n", millis(), dzInfo.chunkCount,
+                dzInfo.chunkLength, dzInfo.headerSize);
+  return true;
+}
+
+bool StarDict::begin() {
+  if (!loadInfo()) return false;
+  if (!loadDictzipHeader()) return false;
+  return true;
+}
+
+bool StarDict::readWordAtPosition(FsFile& idxFile, uint32_t& position, std::string& word, uint32_t& dictOffset,
+                                  uint32_t& dictSize) {
+  idxFile.seek(position);
+
+  // Read null-terminated word
+  word.clear();
+  char c;
+  while (idxFile.read(&c, 1) == 1) {
+    if (c == '\0') break;
+    word += c;
+    if (word.length() > 256) {
+      // Safety limit
+      return false;
+    }
+  }
+
+  if (word.empty()) return false;
+
+  // Read 4-byte big-endian offset
+  uint8_t buf[8];
+  if (idxFile.read(buf, 8) != 8) return false;
+
+  dictOffset = readBE32(buf);
+  dictSize = readBE32(buf + 4);
+
+  position = idxFile.position();
+  return true;
+}
+
+bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string& definition) {
+  if (!dzInfo.loaded) return false;
+
+  const std::string dzPath = basePath + ".dict.dz";
+  FsFile file;
+  if (!SdMan.openFileForRead("DICT", dzPath, file)) {
+    return false;
+  }
+
+  // Calculate which chunk(s) we need
+  const uint32_t startChunk = offset / dzInfo.chunkLength;
+  const uint32_t endChunk = (offset + size - 1) / dzInfo.chunkLength;
+  const uint32_t startOffsetInChunk = offset % dzInfo.chunkLength;
+
+  if (endChunk >= dzInfo.chunkCount) {
+    file.close();
+    return false;
+  }
+
+  // Calculate file offset for start chunk
+  uint32_t fileOffset = dzInfo.headerSize;
+  for (uint32_t i = 0; i < startChunk; i++) {
+    fileOffset += dzInfo.chunkSizes[i];
+  }
+
+  // Allocate buffers
+  const uint32_t maxCompressedSize = 65536;  // Max compressed chunk size
+  auto* compressedBuf = static_cast<uint8_t*>(malloc(maxCompressedSize));
+  auto* decompressedBuf = static_cast<uint8_t*>(malloc(dzInfo.chunkLength));
+  if (!compressedBuf || !decompressedBuf) {
+    free(compressedBuf);
+    free(decompressedBuf);
+    file.close();
+    return false;
+  }
+
+  definition.clear();
+  definition.reserve(size);
+
+  // Process each needed chunk
+  for (uint32_t chunk = startChunk; chunk <= endChunk; chunk++) {
+    const uint16_t compressedSize = dzInfo.chunkSizes[chunk];
+
+    // Seek and read compressed data
+    file.seek(fileOffset);
+    if (file.read(compressedBuf, compressedSize) != compressedSize) {
+      free(compressedBuf);
+      free(decompressedBuf);
+      file.close();
+      return false;
+    }
+
+    // Decompress using raw inflate (no zlib header)
+    auto* inflator = static_cast<tinfl_decompressor*>(malloc(sizeof(tinfl_decompressor)));
+    if (!inflator) {
+      free(compressedBuf);
+      free(decompressedBuf);
+      file.close();
+      return false;
+    }
+    tinfl_init(inflator);
+
+    size_t inBytes = compressedSize;
+    size_t outBytes = dzInfo.chunkLength;
+    const tinfl_status status =
+        tinfl_decompress(inflator, compressedBuf, &inBytes, decompressedBuf, decompressedBuf, &outBytes,
+                         TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | TINFL_FLAG_PARSE_ZLIB_HEADER);
+
+    free(inflator);
+
+    if (status != TINFL_STATUS_DONE && status != TINFL_STATUS_HAS_MORE_OUTPUT) {
+      // Try without zlib header flag
+      inflator = static_cast<tinfl_decompressor*>(malloc(sizeof(tinfl_decompressor)));
+      if (inflator) {
+        tinfl_init(inflator);
+        inBytes = compressedSize;
+        outBytes = dzInfo.chunkLength;
+        tinfl_decompress(inflator, compressedBuf, &inBytes, decompressedBuf, decompressedBuf, &outBytes,
+                         TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
+        free(inflator);
+      }
+    }
+
+    // Extract the portion we need from this chunk
+    uint32_t copyStart = 0;
+    uint32_t copyEnd = outBytes;
+
+    if (chunk == startChunk) {
+      copyStart = startOffsetInChunk;
+    }
+    if (chunk == endChunk) {
+      const uint32_t endOffsetInChunk = (offset + size) - (endChunk * dzInfo.chunkLength);
+      if (endOffsetInChunk < copyEnd) {
+        copyEnd = endOffsetInChunk;
+      }
+    }
+
+    if (copyEnd > copyStart) {
+      definition.append(reinterpret_cast<char*>(decompressedBuf + copyStart), copyEnd - copyStart);
+    }
+
+    fileOffset += compressedSize;
+  }
+
+  free(compressedBuf);
+  free(decompressedBuf);
+  file.close();
+
+  return true;
+}
+
+// StarDict comparison function: case-insensitive first, then case-sensitive as tiebreaker
+int StarDict::stardictStrcmp(const std::string& a, const std::string& b) {
+  // First: case-insensitive comparison (like g_ascii_strcasecmp)
+  size_t i = 0;
+  while (i < a.length() && i < b.length()) {
+    const int ca = std::tolower(static_cast<unsigned char>(a[i]));
+    const int cb = std::tolower(static_cast<unsigned char>(b[i]));
+    if (ca != cb) return ca - cb;
+    i++;
+  }
+  if (a.length() != b.length()) {
+    return static_cast<int>(a.length()) - static_cast<int>(b.length());
+  }
+  // If case-insensitive equal, use case-sensitive as tiebreaker
+  return a.compare(b);
+}
+
+std::string StarDict::normalizeWord(const std::string& word) {
+  std::string result;
+  result.reserve(word.length());
+
+  // Trim leading whitespace
+  size_t start = 0;
+  while (start < word.length() && std::isspace(static_cast<unsigned char>(word[start]))) {
+    start++;
+  }
+
+  // Trim trailing whitespace
+  size_t end = word.length();
+  while (end > start && std::isspace(static_cast<unsigned char>(word[end - 1]))) {
+    end--;
+  }
+
+  // Convert to lowercase
+  for (size_t i = start; i < end; i++) {
+    result += static_cast<char>(std::tolower(static_cast<unsigned char>(word[i])));
+  }
+
+  return result;
+}
+
+StarDict::LookupResult StarDict::lookup(const std::string& word) {
+  LookupResult result;
+  result.word = word;
+
+  if (!info.loaded) {
+    return result;
+  }
+
+  const std::string normalizedSearch = normalizeWord(word);
+  if (normalizedSearch.empty()) {
+    return result;
+  }
+
+  // First try .idx (main entries) - use prefix jump table for fast lookup
+  const std::string idxPath = basePath + ".idx";
+  FsFile idxFile;
+  if (!SdMan.openFileForRead("DICT", idxPath, idxFile)) {
+    Serial.printf("[%lu] [DICT] Failed to open index file\n", millis());
+    return result;
+  }
+
+  // Jump to the relevant section using prefix index (if word has 2+ alpha chars)
+  uint32_t position = 0;
+  if (normalizedSearch.length() >= 2 && DictPrefixIndex::isAlpha(normalizedSearch[0]) &&
+      DictPrefixIndex::isAlpha(normalizedSearch[1])) {
+    const uint16_t prefixIdx = DictPrefixIndex::prefixToIndex(normalizedSearch[0], normalizedSearch[1]);
+    position = DictPrefixIndex::dictPrefixOffsets[prefixIdx];
+  }
+  bool found = false;
+
+  while (position < info.idxfilesize) {
+    std::string currentWord;
+    uint32_t dictOffset, dictSize;
+
+    if (!readWordAtPosition(idxFile, position, currentWord, dictOffset, dictSize)) {
+      break;
+    }
+
+    // Use stardictStrcmp for case-insensitive matching
+    const int cmp = stardictStrcmp(normalizedSearch, currentWord);
+
+    if (cmp == 0) {
+      std::string definition;
+      if (decompressDefinition(dictOffset, dictSize, definition)) {
+        if (!found) {
+          result.word = currentWord;
+          result.definition = definition;
+          result.found = true;
+          found = true;
+        } else {
+          result.definition += "</html>" + definition;
+        }
+      }
+      // Continue scanning for additional matches (same word, different case)
+    } else if (cmp < 0) {
+      // Passed where target would be (file is sorted)
+      break;
+    }
+  }
+
+  idxFile.close();
+
+  // If not found in main index, try synonym file with prefix jump
+  if (!found && info.synwordcount > 0) {
+    const std::string synPath = basePath + ".syn";
+    FsFile synFile;
+    if (SdMan.openFileForRead("DICT", synPath, synFile)) {
+      const uint32_t synFileSize = synFile.size();
+
+      // Jump to the relevant section using prefix index (if word has 2+ alpha chars)
+      uint32_t synPosition = 0;
+      if (normalizedSearch.length() >= 2 && DictPrefixIndex::isAlpha(normalizedSearch[0]) &&
+          DictPrefixIndex::isAlpha(normalizedSearch[1])) {
+        const uint16_t prefixIdx = DictPrefixIndex::prefixToIndex(normalizedSearch[0], normalizedSearch[1]);
+        synPosition = DictPrefixIndex::synPrefixOffsets[prefixIdx];
+        synFile.seek(synPosition);
+      }
+
+      while (synFile.position() < synFileSize) {
+        // Read synonym word (null-terminated)
+        std::string synWord;
+        char c;
+        while (synFile.read(&c, 1) == 1 && c != '\0') {
+          synWord += c;
+        }
+
+        // Read 4-byte big-endian index
+        uint8_t idxBytes[4];
+        if (synFile.read(idxBytes, 4) != 4) break;
+        const uint32_t mainIdx = readBE32(idxBytes);
+
+        // Use stardictStrcmp for case-insensitive comparison
+        const int cmp = stardictStrcmp(normalizedSearch, synWord);
+
+        if (cmp == 0) {
+          // Found synonym - look up the main entry by index
+          FsFile idxFile2;
+          if (SdMan.openFileForRead("DICT", idxPath, idxFile2)) {
+            uint32_t pos = 0;
+            uint32_t entryNum = 0;
+            while (entryNum < mainIdx && pos < info.idxfilesize) {
+              std::string w;
+              uint32_t off, sz;
+              if (!readWordAtPosition(idxFile2, pos, w, off, sz)) break;
+              entryNum++;
+            }
+            // Now read the target entry
+            if (entryNum == mainIdx) {
+              std::string mainWord;
+              uint32_t dictOffset, dictSize;
+              if (readWordAtPosition(idxFile2, pos, mainWord, dictOffset, dictSize)) {
+                std::string definition;
+                if (decompressDefinition(dictOffset, dictSize, definition)) {
+                  result.word = synWord;
+                  result.definition = definition;
+                  result.found = true;
+                  found = true;
+                }
+              }
+            }
+            idxFile2.close();
+          }
+          break;  // Found a match, stop searching
+        } else if (cmp < 0) {
+          // Passed where it would be (file is sorted)
+          break;
+        }
+      }
+      synFile.close();
+    }
+  }
+
+  return result;
+}
+
+// Helper to decode a single HTML entity starting at position i (after the '&')
+// Returns the decoded string and advances i past the entity (including ';')
+static std::string decodeHtmlEntity(const std::string& html, size_t& i) {
+  const size_t start = i;  // Position of '&'
+  const size_t remaining = html.length() - start;
+
+  // Numeric entities: &#NNN; or &#xHHH;
+  if (remaining > 2 && html[start + 1] == '#') {
+    size_t numStart = start + 2;
+    bool isHex = false;
+    if (remaining > 3 && (html[numStart] == 'x' || html[numStart] == 'X')) {
+      isHex = true;
+      numStart++;
+    }
+
+    size_t numEnd = numStart;
+    while (numEnd < html.length() && html[numEnd] != ';') {
+      const char c = html[numEnd];
+      if (isHex) {
+        if (!std::isxdigit(static_cast<unsigned char>(c))) break;
+      } else {
+        if (!std::isdigit(static_cast<unsigned char>(c))) break;
+      }
+      numEnd++;
+    }
+
+    if (numEnd > numStart && numEnd < html.length() && html[numEnd] == ';') {
+      const std::string numStr = html.substr(numStart, numEnd - numStart);
+      unsigned long codepoint = std::strtoul(numStr.c_str(), nullptr, isHex ? 16 : 10);
+      i = numEnd;  // Will be incremented by caller's loop
+
+      // Convert codepoint to UTF-8
+      std::string utf8;
+      if (codepoint < 0x80) {
+        utf8 += static_cast<char>(codepoint);
+      } else if (codepoint < 0x800) {
+        utf8 += static_cast<char>(0xC0 | (codepoint >> 6));
+        utf8 += static_cast<char>(0x80 | (codepoint & 0x3F));
+      } else if (codepoint < 0x10000) {
+        utf8 += static_cast<char>(0xE0 | (codepoint >> 12));
+        utf8 += static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F));
+        utf8 += static_cast<char>(0x80 | (codepoint & 0x3F));
+      } else if (codepoint < 0x110000) {
+        utf8 += static_cast<char>(0xF0 | (codepoint >> 18));
+        utf8 += static_cast<char>(0x80 | ((codepoint >> 12) & 0x3F));
+        utf8 += static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F));
+        utf8 += static_cast<char>(0x80 | (codepoint & 0x3F));
+      }
+      return utf8;
+    }
+  }
+
+  // Named entities - find the semicolon first
+  size_t semicolon = html.find(';', start + 1);
+  if (semicolon != std::string::npos && semicolon - start < 12) {
+    const std::string entity = html.substr(start, semicolon - start + 1);
+
+    // Common named entities
+    struct EntityMapping {
+      const char* entity;
+      const char* replacement;
+    };
+    static const EntityMapping entities[] = {
+        {"&nbsp;", " "},      {"&lt;", "<"},        {"&gt;", ">"},
+        {"&amp;", "&"},       {"&quot;", "\""},     {"&apos;", "'"},
+        {"&mdash;", "\xe2\x80\x94"},   // —
+        {"&ndash;", "\xe2\x80\x93"},   // –
+        {"&hellip;", "\xe2\x80\xa6"},  // …
+        {"&rsquo;", "\xe2\x80\x99"},   // '
+        {"&lsquo;", "\xe2\x80\x98"},   // '
+        {"&rdquo;", "\xe2\x80\x9d"},   // "
+        {"&ldquo;", "\xe2\x80\x9c"},   // "
+        {"&deg;", "\xc2\xb0"},         // °
+        {"&times;", "\xc3\x97"},       // ×
+        {"&divide;", "\xc3\xb7"},      // ÷
+        {"&plusmn;", "\xc2\xb1"},      // ±
+        {"&frac12;", "\xc2\xbd"},      // ½
+        {"&frac14;", "\xc2\xbc"},      // ¼
+        {"&frac34;", "\xc2\xbe"},      // ¾
+        {"&cent;", "\xc2\xa2"},        // ¢
+        {"&pound;", "\xc2\xa3"},       // £
+        {"&euro;", "\xe2\x82\xac"},    // €
+        {"&yen;", "\xc2\xa5"},         // ¥
+        {"&copy;", "\xc2\xa9"},        // ©
+        {"&reg;", "\xc2\xae"},         // ®
+        {"&trade;", "\xe2\x84\xa2"},   // ™
+        {"&bull;", "\xe2\x80\xa2"},    // •
+        {"&middot;", "\xc2\xb7"},      // ·
+        {"&sect;", "\xc2\xa7"},        // §
+        {"&para;", "\xc2\xb6"},        // ¶
+        {"&dagger;", "\xe2\x80\xa0"},  // †
+        {"&Dagger;", "\xe2\x80\xa1"},  // ‡
+        {"&iexcl;", "\xc2\xa1"},       // ¡
+        {"&iquest;", "\xc2\xbf"},      // ¿
+        {"&laquo;", "\xc2\xab"},       // «
+        {"&raquo;", "\xc2\xbb"},       // »
+        {"&shy;", ""},
+        {"&ensp;", " "},
+        {"&emsp;", " "},
+        {"&thinsp;", " "},
+        {"&zwj;", ""},
+        {"&zwnj;", ""},
+    };
+
+    for (const auto& mapping : entities) {
+      if (entity == mapping.entity) {
+        i = semicolon;  // Will be incremented by caller's loop
+        return mapping.replacement;
+      }
+    }
+  }
+
+  // Unknown entity - return just the ampersand and let the rest be processed normally
+  return "&";
+}
+
+// Helper to check if a tag is a block-level element that needs line breaks
+static bool isBlockTag(const std::string& tag, bool isClosing) {
+  // Normalize to lowercase for comparison
+  std::string lowerTag = tag;
+  for (char& c : lowerTag) {
+    c = std::tolower(static_cast<unsigned char>(c));
+  }
+
+  // Block-level tags that should have line breaks
+  if (lowerTag == "p" || lowerTag == "div" || lowerTag == "br" || lowerTag == "hr" || lowerTag == "li" ||
+      lowerTag == "dt" || lowerTag == "dd" || lowerTag == "tr" || lowerTag == "h1" || lowerTag == "h2" ||
+      lowerTag == "h3" || lowerTag == "h4" || lowerTag == "h5" || lowerTag == "h6" || lowerTag == "blockquote" ||
+      lowerTag == "pre" || lowerTag == "ol" || lowerTag == "ul") {
+    return true;
+  }
+  return false;
+}
+
+std::string StarDict::stripHtml(const std::string& html) {
+  std::string result;
+  result.reserve(html.length());
+
+  bool inTag = false;
+  bool lastWasSpace = false;
+  bool lastWasNewline = false;
+
+  for (size_t i = 0; i < html.length(); i++) {
+    const char c = html[i];
+
+    if (c == '<') {
+      // Parse the tag name
+      size_t tagStart = i + 1;
+      bool isClosing = false;
+
+      // Skip whitespace after <
+      while (tagStart < html.length() && std::isspace(static_cast<unsigned char>(html[tagStart]))) {
+        tagStart++;
+      }
+
+      // Check for closing tag
+      if (tagStart < html.length() && html[tagStart] == '/') {
+        isClosing = true;
+        tagStart++;
+      }
+
+      // Extract tag name
+      size_t tagEnd = tagStart;
+      while (tagEnd < html.length() && !std::isspace(static_cast<unsigned char>(html[tagEnd])) &&
+             html[tagEnd] != '>' && html[tagEnd] != '/') {
+        tagEnd++;
+      }
+
+      const std::string tagName = html.substr(tagStart, tagEnd - tagStart);
+
+      // Check if this is a block-level element
+      if (isBlockTag(tagName, isClosing)) {
+        // Add line break for block elements
+        if (!result.empty() && !lastWasNewline) {
+          result += '\n';
+          lastWasNewline = true;
+          lastWasSpace = true;
+        }
+      }
+
+      inTag = true;
+    } else if (c == '>') {
+      inTag = false;
+    } else if (!inTag) {
+      // Handle HTML entities
+      if (c == '&') {
+        const std::string decoded = decodeHtmlEntity(html, i);
+        if (!decoded.empty()) {
+          // Check if decoded content is whitespace
+          bool allSpace = true;
+          for (const char dc : decoded) {
+            if (!std::isspace(static_cast<unsigned char>(dc))) {
+              allSpace = false;
+              break;
+            }
+          }
+
+          if (allSpace) {
+            if (!lastWasSpace) {
+              result += ' ';
+              lastWasSpace = true;
+            }
+          } else {
+            result += decoded;
+            lastWasSpace = false;
+            lastWasNewline = false;
+          }
+        }
+        continue;
+      }
+
+      // Collapse whitespace
+      if (std::isspace(static_cast<unsigned char>(c))) {
+        if (!lastWasSpace) {
+          result += ' ';
+          lastWasSpace = true;
+        }
+      } else {
+        result += c;
+        lastWasSpace = false;
+        lastWasNewline = false;
+      }
+    }
+  }
+
+  // Trim trailing whitespace
+  while (!result.empty() && std::isspace(static_cast<unsigned char>(result.back()))) {
+    result.pop_back();
+  }
+
+  return result;
+}
--- a/lib/StarDict/StarDict.h
+++ b/lib/StarDict/StarDict.h
@@ -0,0 +1,81 @@
+#pragma once
+
+#include <SdFat.h>
+
+#include <cstdint>
+#include <string>
+
+// StarDict dictionary lookup library
+// Supports .ifo/.idx/.dict.dz format with linear scan lookup
+class StarDict {
+ public:
+  struct DictInfo {
+    std::string bookname;
+    uint32_t wordcount = 0;
+    uint32_t idxfilesize = 0;
+    char sametypesequence = '\0';  // 'h' for HTML, 'm' for plain text, etc.
+    uint32_t synwordcount = 0;
+    bool loaded = false;
+  };
+
+  struct LookupResult {
+    std::string word;
+    std::string definition;
+    bool found = false;
+  };
+
+ private:
+  std::string basePath;  // Path without extension (e.g., "/dictionaries/dict-data")
+  DictInfo info;
+
+  // Dictzip chunk info for random access decompression
+  struct DictzipInfo {
+    uint32_t chunkLength = 0;  // Uncompressed chunk size (usually 58315)
+    uint16_t chunkCount = 0;
+    uint32_t headerSize = 0;   // Total header size to skip
+    uint16_t* chunkSizes = nullptr;  // Array of compressed chunk sizes
+    bool loaded = false;
+  };
+  DictzipInfo dzInfo;
+
+  // Parse .ifo file
+  bool loadInfo();
+
+  // Load dictzip header for random access
+  bool loadDictzipHeader();
+
+  // Read word at given index file position, returns word and advances position
+  bool readWordAtPosition(FsFile& idxFile, uint32_t& position, std::string& word, uint32_t& dictOffset,
+                          uint32_t& dictSize);
+
+  // Decompress a portion of the .dict.dz file
+  bool decompressDefinition(uint32_t offset, uint32_t size, std::string& definition);
+
+  // Convert 4-byte big-endian to uint32
+  static uint32_t readBE32(const uint8_t* data);
+
+ public:
+  explicit StarDict(const std::string& basePath);
+  ~StarDict();
+
+  // Initialize dictionary (loads .ifo)
+  bool begin();
+
+  // Get dictionary info
+  const DictInfo& getInfo() const { return info; }
+
+  // Look up a word (case-insensitive)
+  LookupResult lookup(const std::string& word);
+
+  // Check if dictionary is ready
+  bool isReady() const { return info.loaded; }
+
+  // Strip HTML tags from definition for plain text display
+  static std::string stripHtml(const std::string& html);
+
+  // Normalize word for comparison (lowercase, trim)
+  static std::string normalizeWord(const std::string& word);
+
+  // StarDict comparison (case-insensitive first, then case-sensitive tiebreaker)
+  static int stardictStrcmp(const std::string& a, const std::string& b);
+};