2026-01-22 12:42:01 -05:00

82 lines
2.3 KiB
C++

#pragma once
#include <SdFat.h>
#include <cstdint>
#include <string>
// StarDict dictionary lookup library
// Supports .ifo/.idx/.dict.dz format with linear scan lookup
class StarDict {
public:
struct DictInfo {
std::string bookname;
uint32_t wordcount = 0;
uint32_t idxfilesize = 0;
char sametypesequence = '\0'; // 'h' for HTML, 'm' for plain text, etc.
uint32_t synwordcount = 0;
bool loaded = false;
};
struct LookupResult {
std::string word;
std::string definition;
bool found = false;
};
private:
std::string basePath; // Path without extension (e.g., "/dictionaries/dict-data")
DictInfo info;
// Dictzip chunk info for random access decompression
struct DictzipInfo {
uint32_t chunkLength = 0; // Uncompressed chunk size (usually 58315)
uint16_t chunkCount = 0;
uint32_t headerSize = 0; // Total header size to skip
uint16_t* chunkSizes = nullptr; // Array of compressed chunk sizes
bool loaded = false;
};
DictzipInfo dzInfo;
// Parse .ifo file
bool loadInfo();
// Load dictzip header for random access
bool loadDictzipHeader();
// Read word at given index file position, returns word and advances position
bool readWordAtPosition(FsFile& idxFile, uint32_t& position, std::string& word, uint32_t& dictOffset,
uint32_t& dictSize);
// Decompress a portion of the .dict.dz file
bool decompressDefinition(uint32_t offset, uint32_t size, std::string& definition);
// Convert 4-byte big-endian to uint32
static uint32_t readBE32(const uint8_t* data);
public:
explicit StarDict(const std::string& basePath);
~StarDict();
// Initialize dictionary (loads .ifo)
bool begin();
// Get dictionary info
const DictInfo& getInfo() const { return info; }
// Look up a word (case-insensitive)
LookupResult lookup(const std::string& word);
// Check if dictionary is ready
bool isReady() const { return info.loaded; }
// Strip HTML tags from definition for plain text display
static std::string stripHtml(const std::string& html);
// Normalize word for comparison (lowercase, trim)
static std::string normalizeWord(const std::string& word);
// StarDict comparison (case-insensitive first, then case-sensitive tiebreaker)
static int stardictStrcmp(const std::string& a, const std::string& b);
};