crosspoint-reader/lib/StarDict/StarDict.h

#pragma once

#include <SdFat.h>

#include <cstdint>
#include <string>

// StarDict dictionary lookup library
// Supports .ifo/.idx/.dict (uncompressed) and .ifo/.idx/.dict.dz (compressed) formats
class StarDict {
 public:
  struct DictInfo {
    std::string bookname;
    uint32_t wordcount = 0;
    uint32_t idxfilesize = 0;
    char sametypesequence = '\0';  // 'h' for HTML, 'm' for plain text, etc.
    uint32_t synwordcount = 0;
    bool loaded = false;
  };

  struct LookupResult {
    std::string word;
    std::string definition;
    bool found = false;
  };

 private:
  std::string basePath;  // Path without extension (e.g., "/dictionaries/dict-data")
  DictInfo info;

  // Dictzip chunk info for random access decompression
  struct DictzipInfo {
    uint32_t chunkLength = 0;  // Uncompressed chunk size (usually 58315)
    uint16_t chunkCount = 0;
    uint32_t headerSize = 0;         // Total header size to skip
    uint16_t* chunkSizes = nullptr;  // Array of compressed chunk sizes
    bool loaded = false;
  };
  DictzipInfo dzInfo;

  // Whether to use uncompressed .dict file (preferred) or compressed .dict.dz
  bool useUncompressed = false;

  // Parse .ifo file
  bool loadInfo();

  // Load dictzip header for random access (only if using compressed)
  bool loadDictzipHeader();

  // Read word at given index file position, returns word and advances position
  bool readWordAtPosition(FsFile& idxFile, uint32_t& position, std::string& word, uint32_t& dictOffset,
                          uint32_t& dictSize);

  // Read definition directly from uncompressed .dict file (no decompression needed)
  bool readDefinitionDirect(uint32_t offset, uint32_t size, std::string& definition);

  // Decompress a portion of the .dict.dz file
  bool decompressDefinition(uint32_t offset, uint32_t size, std::string& definition);

  // Convert 4-byte big-endian to uint32
  static uint32_t readBE32(const uint8_t* data);

 public:
  explicit StarDict(const std::string& basePath);
  ~StarDict();

  // Initialize dictionary (loads .ifo)
  bool begin();

  // Get dictionary info
  const DictInfo& getInfo() const { return info; }

  // Look up a word (case-insensitive)
  LookupResult lookup(const std::string& word);

  // Check if dictionary is ready
  bool isReady() const { return info.loaded; }

  // Strip HTML tags from definition for plain text display
  static std::string stripHtml(const std::string& html);

  // Normalize word for comparison (lowercase, trim)
  static std::string normalizeWord(const std::string& word);

  // StarDict comparison (case-insensitive first, then case-sensitive tiebreaker)
  static int stardictStrcmp(const std::string& a, const std::string& b);
};