sort of working dictionary

2026-01-22 12:42:01 -05:00
parent ff22a82563
commit 9493fb1f18
24 changed files with 2887 additions and 11 deletions
--- a/lib/StarDict/StarDict.h
+++ b/lib/StarDict/StarDict.h
@@ -0,0 +1,81 @@
+#pragma once
+
+#include <SdFat.h>
+
+#include <cstdint>
+#include <string>
+
+// StarDict dictionary lookup library
+// Supports .ifo/.idx/.dict.dz format with linear scan lookup
+class StarDict {
+ public:
+  struct DictInfo {
+    std::string bookname;
+    uint32_t wordcount = 0;
+    uint32_t idxfilesize = 0;
+    char sametypesequence = '\0';  // 'h' for HTML, 'm' for plain text, etc.
+    uint32_t synwordcount = 0;
+    bool loaded = false;
+  };
+
+  struct LookupResult {
+    std::string word;
+    std::string definition;
+    bool found = false;
+  };
+
+ private:
+  std::string basePath;  // Path without extension (e.g., "/dictionaries/dict-data")
+  DictInfo info;
+
+  // Dictzip chunk info for random access decompression
+  struct DictzipInfo {
+    uint32_t chunkLength = 0;  // Uncompressed chunk size (usually 58315)
+    uint16_t chunkCount = 0;
+    uint32_t headerSize = 0;   // Total header size to skip
+    uint16_t* chunkSizes = nullptr;  // Array of compressed chunk sizes
+    bool loaded = false;
+  };
+  DictzipInfo dzInfo;
+
+  // Parse .ifo file
+  bool loadInfo();
+
+  // Load dictzip header for random access
+  bool loadDictzipHeader();
+
+  // Read word at given index file position, returns word and advances position
+  bool readWordAtPosition(FsFile& idxFile, uint32_t& position, std::string& word, uint32_t& dictOffset,
+                          uint32_t& dictSize);
+
+  // Decompress a portion of the .dict.dz file
+  bool decompressDefinition(uint32_t offset, uint32_t size, std::string& definition);
+
+  // Convert 4-byte big-endian to uint32
+  static uint32_t readBE32(const uint8_t* data);
+
+ public:
+  explicit StarDict(const std::string& basePath);
+  ~StarDict();
+
+  // Initialize dictionary (loads .ifo)
+  bool begin();
+
+  // Get dictionary info
+  const DictInfo& getInfo() const { return info; }
+
+  // Look up a word (case-insensitive)
+  LookupResult lookup(const std::string& word);
+
+  // Check if dictionary is ready
+  bool isReady() const { return info.loaded; }
+
+  // Strip HTML tags from definition for plain text display
+  static std::string stripHtml(const std::string& html);
+
+  // Normalize word for comparison (lowercase, trim)
+  static std::string normalizeWord(const std::string& word);
+
+  // StarDict comparison (case-insensitive first, then case-sensitive tiebreaker)
+  static int stardictStrcmp(const std::string& a, const std::string& b);
+};