- Add uncompressed dictionary (.dict) file support to avoid decompression memory issues - Implement chunked on-demand parsing for large definitions - Add backward navigation with re-parse capability - Limit cached pages to MAX_CACHED_PAGES (4) to prevent memory exhaustion - Add helper script for extracting/recompressing dictzip files
88 lines
2.6 KiB
C++
88 lines
2.6 KiB
C++
#pragma once
|
|
|
|
#include <SdFat.h>
|
|
|
|
#include <cstdint>
|
|
#include <string>
|
|
|
|
// StarDict dictionary lookup library
|
|
// Supports .ifo/.idx/.dict (uncompressed) and .ifo/.idx/.dict.dz (compressed) formats
|
|
class StarDict {
|
|
public:
|
|
struct DictInfo {
|
|
std::string bookname;
|
|
uint32_t wordcount = 0;
|
|
uint32_t idxfilesize = 0;
|
|
char sametypesequence = '\0'; // 'h' for HTML, 'm' for plain text, etc.
|
|
uint32_t synwordcount = 0;
|
|
bool loaded = false;
|
|
};
|
|
|
|
struct LookupResult {
|
|
std::string word;
|
|
std::string definition;
|
|
bool found = false;
|
|
};
|
|
|
|
private:
|
|
std::string basePath; // Path without extension (e.g., "/dictionaries/dict-data")
|
|
DictInfo info;
|
|
|
|
// Dictzip chunk info for random access decompression
|
|
struct DictzipInfo {
|
|
uint32_t chunkLength = 0; // Uncompressed chunk size (usually 58315)
|
|
uint16_t chunkCount = 0;
|
|
uint32_t headerSize = 0; // Total header size to skip
|
|
uint16_t* chunkSizes = nullptr; // Array of compressed chunk sizes
|
|
bool loaded = false;
|
|
};
|
|
DictzipInfo dzInfo;
|
|
|
|
// Whether to use uncompressed .dict file (preferred) or compressed .dict.dz
|
|
bool useUncompressed = false;
|
|
|
|
// Parse .ifo file
|
|
bool loadInfo();
|
|
|
|
// Load dictzip header for random access (only if using compressed)
|
|
bool loadDictzipHeader();
|
|
|
|
// Read word at given index file position, returns word and advances position
|
|
bool readWordAtPosition(FsFile& idxFile, uint32_t& position, std::string& word, uint32_t& dictOffset,
|
|
uint32_t& dictSize);
|
|
|
|
// Read definition directly from uncompressed .dict file (no decompression needed)
|
|
bool readDefinitionDirect(uint32_t offset, uint32_t size, std::string& definition);
|
|
|
|
// Decompress a portion of the .dict.dz file
|
|
bool decompressDefinition(uint32_t offset, uint32_t size, std::string& definition);
|
|
|
|
// Convert 4-byte big-endian to uint32
|
|
static uint32_t readBE32(const uint8_t* data);
|
|
|
|
public:
|
|
explicit StarDict(const std::string& basePath);
|
|
~StarDict();
|
|
|
|
// Initialize dictionary (loads .ifo)
|
|
bool begin();
|
|
|
|
// Get dictionary info
|
|
const DictInfo& getInfo() const { return info; }
|
|
|
|
// Look up a word (case-insensitive)
|
|
LookupResult lookup(const std::string& word);
|
|
|
|
// Check if dictionary is ready
|
|
bool isReady() const { return info.loaded; }
|
|
|
|
// Strip HTML tags from definition for plain text display
|
|
static std::string stripHtml(const std::string& html);
|
|
|
|
// Normalize word for comparison (lowercase, trim)
|
|
static std::string normalizeWord(const std::string& word);
|
|
|
|
// StarDict comparison (case-insensitive first, then case-sensitive tiebreaker)
|
|
static int stardictStrcmp(const std::string& a, const std::string& b);
|
|
};
|