checkpoint: pre list-to-vector refactor, fixes dictionary crash, mostly

- Add uncompressed dictionary (.dict) file support to avoid decompression memory issues
- Implement chunked on-demand parsing for large definitions
- Add backward navigation with re-parse capability
- Limit cached pages to MAX_CACHED_PAGES (4) to prevent memory exhaustion
- Add helper script for extracting/recompressing dictzip files
This commit is contained in:
cottongin
2026-01-29 09:33:40 -05:00
parent 8b41dccfb9
commit 62643ae933
5 changed files with 770 additions and 55 deletions

View File

@@ -6,7 +6,7 @@
#include <string>
// StarDict dictionary lookup library
// Supports .ifo/.idx/.dict.dz format with linear scan lookup
// Supports .ifo/.idx/.dict (uncompressed) and .ifo/.idx/.dict.dz (compressed) formats
class StarDict {
public:
struct DictInfo {
@@ -38,16 +38,22 @@ class StarDict {
};
DictzipInfo dzInfo;
// Whether to use uncompressed .dict file (preferred) or compressed .dict.dz
bool useUncompressed = false;
// Parse .ifo file
bool loadInfo();
// Load dictzip header for random access
// Load dictzip header for random access (only if using compressed)
bool loadDictzipHeader();
// Read word at given index file position, returns word and advances position
bool readWordAtPosition(FsFile& idxFile, uint32_t& position, std::string& word, uint32_t& dictOffset,
uint32_t& dictSize);
// Read definition directly from uncompressed .dict file (no decompression needed)
bool readDefinitionDirect(uint32_t offset, uint32_t size, std::string& definition);
// Decompress a portion of the .dict.dz file
bool decompressDefinition(uint32_t offset, uint32_t size, std::string& definition);