checkpoint: pre list-to-vector refactor, fixes dictionary crash, mostly
- Add uncompressed dictionary (.dict) file support to avoid decompression memory issues - Implement chunked on-demand parsing for large definitions - Add backward navigation with re-parse capability - Limit cached pages to MAX_CACHED_PAGES (4) to prevent memory exhaustion - Add helper script for extracting/recompressing dictzip files
This commit is contained in:
parent
8b41dccfb9
commit
62643ae933
@ -205,6 +205,19 @@ bool StarDict::loadDictzipHeader() {
|
|||||||
|
|
||||||
bool StarDict::begin() {
|
bool StarDict::begin() {
|
||||||
if (!loadInfo()) return false;
|
if (!loadInfo()) return false;
|
||||||
|
|
||||||
|
// Try uncompressed .dict file first (preferred - no memory overhead)
|
||||||
|
const std::string dictPath = basePath + ".dict";
|
||||||
|
FsFile testFile;
|
||||||
|
if (SdMan.openFileForRead("DICT", dictPath, testFile)) {
|
||||||
|
testFile.close();
|
||||||
|
useUncompressed = true;
|
||||||
|
Serial.printf("[%lu] [DICT] Using uncompressed .dict file (no decompression needed)\n", millis());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to compressed .dict.dz
|
||||||
|
useUncompressed = false;
|
||||||
if (!loadDictzipHeader()) return false;
|
if (!loadDictzipHeader()) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -238,12 +251,46 @@ bool StarDict::readWordAtPosition(FsFile& idxFile, uint32_t& position, std::stri
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool StarDict::readDefinitionDirect(uint32_t offset, uint32_t size, std::string& definition) {
|
||||||
|
// Read directly from uncompressed .dict file - no decompression needed!
|
||||||
|
const std::string dictPath = basePath + ".dict";
|
||||||
|
FsFile file;
|
||||||
|
if (!SdMan.openFileForRead("DICT", dictPath, file)) {
|
||||||
|
Serial.printf("[DICT-DBG] Failed to open .dict file\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Seek to the definition offset
|
||||||
|
if (!file.seek(offset)) {
|
||||||
|
Serial.printf("[DICT-DBG] Failed to seek to offset %lu\n", offset);
|
||||||
|
file.close();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the definition directly into the string
|
||||||
|
definition.resize(size);
|
||||||
|
const int bytesRead = file.read(&definition[0], size);
|
||||||
|
file.close();
|
||||||
|
|
||||||
|
if (bytesRead != static_cast<int>(size)) {
|
||||||
|
Serial.printf("[DICT-DBG] Read %d bytes, expected %lu\n", bytesRead, size);
|
||||||
|
definition.clear();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string& definition) {
|
bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string& definition) {
|
||||||
if (!dzInfo.loaded) return false;
|
if (!dzInfo.loaded) {
|
||||||
|
Serial.printf("[DICT-DBG] dzInfo not loaded!\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
const std::string dzPath = basePath + ".dict.dz";
|
const std::string dzPath = basePath + ".dict.dz";
|
||||||
FsFile file;
|
FsFile file;
|
||||||
if (!SdMan.openFileForRead("DICT", dzPath, file)) {
|
if (!SdMan.openFileForRead("DICT", dzPath, file)) {
|
||||||
|
Serial.printf("[DICT-DBG] Failed to open dict.dz file\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -252,7 +299,11 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string&
|
|||||||
const uint32_t endChunk = (offset + size - 1) / dzInfo.chunkLength;
|
const uint32_t endChunk = (offset + size - 1) / dzInfo.chunkLength;
|
||||||
const uint32_t startOffsetInChunk = offset % dzInfo.chunkLength;
|
const uint32_t startOffsetInChunk = offset % dzInfo.chunkLength;
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] Chunks: start=%lu, end=%lu, total=%u\n",
|
||||||
|
startChunk, endChunk, dzInfo.chunkCount);
|
||||||
|
|
||||||
if (endChunk >= dzInfo.chunkCount) {
|
if (endChunk >= dzInfo.chunkCount) {
|
||||||
|
Serial.printf("[DICT-DBG] endChunk %lu >= chunkCount %u\n", endChunk, dzInfo.chunkCount);
|
||||||
file.close();
|
file.close();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -263,13 +314,38 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string&
|
|||||||
fileOffset += dzInfo.chunkSizes[i];
|
fileOffset += dzInfo.chunkSizes[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate buffers
|
// Calculate actual max compressed size needed for the chunks we'll process
|
||||||
const uint32_t maxCompressedSize = 65536; // Max compressed chunk size
|
uint32_t maxCompressedSize = 0;
|
||||||
|
for (uint32_t i = startChunk; i <= endChunk; i++) {
|
||||||
|
if (dzInfo.chunkSizes[i] > maxCompressedSize) {
|
||||||
|
maxCompressedSize = dzInfo.chunkSizes[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate buffers - allocate inflator FIRST (smallest) to reduce fragmentation impact
|
||||||
|
// tinfl_decompressor is ~11KB, so total allocations are ~85KB
|
||||||
|
Serial.printf("[DICT-DBG] Allocating inflator=%u, comp=%lu, decomp=%u bytes\n",
|
||||||
|
sizeof(tinfl_decompressor), maxCompressedSize, dzInfo.chunkLength);
|
||||||
|
|
||||||
|
auto* inflator = static_cast<tinfl_decompressor*>(malloc(sizeof(tinfl_decompressor)));
|
||||||
|
if (!inflator) {
|
||||||
|
Serial.printf("[DICT-DBG] inflator alloc failed! (need %u bytes)\n", sizeof(tinfl_decompressor));
|
||||||
|
file.close();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
auto* compressedBuf = static_cast<uint8_t*>(malloc(maxCompressedSize));
|
auto* compressedBuf = static_cast<uint8_t*>(malloc(maxCompressedSize));
|
||||||
|
if (!compressedBuf) {
|
||||||
|
Serial.printf("[DICT-DBG] compressedBuf alloc failed!\n");
|
||||||
|
free(inflator);
|
||||||
|
file.close();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
auto* decompressedBuf = static_cast<uint8_t*>(malloc(dzInfo.chunkLength));
|
auto* decompressedBuf = static_cast<uint8_t*>(malloc(dzInfo.chunkLength));
|
||||||
if (!compressedBuf || !decompressedBuf) {
|
if (!decompressedBuf) {
|
||||||
|
Serial.printf("[DICT-DBG] decompressedBuf alloc failed!\n");
|
||||||
|
free(inflator);
|
||||||
free(compressedBuf);
|
free(compressedBuf);
|
||||||
free(decompressedBuf);
|
|
||||||
file.close();
|
file.close();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -277,13 +353,15 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string&
|
|||||||
definition.clear();
|
definition.clear();
|
||||||
definition.reserve(size);
|
definition.reserve(size);
|
||||||
|
|
||||||
// Process each needed chunk
|
// Process each needed chunk (reusing inflator allocation)
|
||||||
for (uint32_t chunk = startChunk; chunk <= endChunk; chunk++) {
|
for (uint32_t chunk = startChunk; chunk <= endChunk; chunk++) {
|
||||||
const uint16_t compressedSize = dzInfo.chunkSizes[chunk];
|
const uint16_t compressedSize = dzInfo.chunkSizes[chunk];
|
||||||
|
|
||||||
// Seek and read compressed data
|
// Seek and read compressed data
|
||||||
file.seek(fileOffset);
|
file.seek(fileOffset);
|
||||||
if (file.read(compressedBuf, compressedSize) != compressedSize) {
|
if (file.read(compressedBuf, compressedSize) != compressedSize) {
|
||||||
|
Serial.printf("[DICT-DBG] File read failed at offset %lu, size %u\n", fileOffset, compressedSize);
|
||||||
|
free(inflator);
|
||||||
free(compressedBuf);
|
free(compressedBuf);
|
||||||
free(decompressedBuf);
|
free(decompressedBuf);
|
||||||
file.close();
|
file.close();
|
||||||
@ -291,13 +369,6 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string&
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Decompress using raw inflate (no zlib header)
|
// Decompress using raw inflate (no zlib header)
|
||||||
auto* inflator = static_cast<tinfl_decompressor*>(malloc(sizeof(tinfl_decompressor)));
|
|
||||||
if (!inflator) {
|
|
||||||
free(compressedBuf);
|
|
||||||
free(decompressedBuf);
|
|
||||||
file.close();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
tinfl_init(inflator);
|
tinfl_init(inflator);
|
||||||
|
|
||||||
size_t inBytes = compressedSize;
|
size_t inBytes = compressedSize;
|
||||||
@ -306,19 +377,13 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string&
|
|||||||
tinfl_decompress(inflator, compressedBuf, &inBytes, decompressedBuf, decompressedBuf, &outBytes,
|
tinfl_decompress(inflator, compressedBuf, &inBytes, decompressedBuf, decompressedBuf, &outBytes,
|
||||||
TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | TINFL_FLAG_PARSE_ZLIB_HEADER);
|
TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | TINFL_FLAG_PARSE_ZLIB_HEADER);
|
||||||
|
|
||||||
free(inflator);
|
|
||||||
|
|
||||||
if (status != TINFL_STATUS_DONE && status != TINFL_STATUS_HAS_MORE_OUTPUT) {
|
if (status != TINFL_STATUS_DONE && status != TINFL_STATUS_HAS_MORE_OUTPUT) {
|
||||||
// Try without zlib header flag
|
// Try without zlib header flag
|
||||||
inflator = static_cast<tinfl_decompressor*>(malloc(sizeof(tinfl_decompressor)));
|
tinfl_init(inflator);
|
||||||
if (inflator) {
|
inBytes = compressedSize;
|
||||||
tinfl_init(inflator);
|
outBytes = dzInfo.chunkLength;
|
||||||
inBytes = compressedSize;
|
tinfl_decompress(inflator, compressedBuf, &inBytes, decompressedBuf, decompressedBuf, &outBytes,
|
||||||
outBytes = dzInfo.chunkLength;
|
TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
|
||||||
tinfl_decompress(inflator, compressedBuf, &inBytes, decompressedBuf, decompressedBuf, &outBytes,
|
|
||||||
TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
|
|
||||||
free(inflator);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract the portion we need from this chunk
|
// Extract the portion we need from this chunk
|
||||||
@ -342,6 +407,7 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string&
|
|||||||
fileOffset += compressedSize;
|
fileOffset += compressedSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(inflator);
|
||||||
free(compressedBuf);
|
free(compressedBuf);
|
||||||
free(decompressedBuf);
|
free(decompressedBuf);
|
||||||
file.close();
|
file.close();
|
||||||
@ -349,9 +415,9 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string&
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// StarDict comparison function: case-insensitive first, then case-sensitive as tiebreaker
|
// StarDict comparison function: case-insensitive matching
|
||||||
int StarDict::stardictStrcmp(const std::string& a, const std::string& b) {
|
int StarDict::stardictStrcmp(const std::string& a, const std::string& b) {
|
||||||
// First: case-insensitive comparison (like g_ascii_strcasecmp)
|
// Case-insensitive comparison (like g_ascii_strcasecmp)
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
while (i < a.length() && i < b.length()) {
|
while (i < a.length() && i < b.length()) {
|
||||||
const int ca = std::tolower(static_cast<unsigned char>(a[i]));
|
const int ca = std::tolower(static_cast<unsigned char>(a[i]));
|
||||||
@ -362,8 +428,8 @@ int StarDict::stardictStrcmp(const std::string& a, const std::string& b) {
|
|||||||
if (a.length() != b.length()) {
|
if (a.length() != b.length()) {
|
||||||
return static_cast<int>(a.length()) - static_cast<int>(b.length());
|
return static_cast<int>(a.length()) - static_cast<int>(b.length());
|
||||||
}
|
}
|
||||||
// If case-insensitive equal, use case-sensitive as tiebreaker
|
// Case-insensitive match found
|
||||||
return a.compare(b);
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string StarDict::normalizeWord(const std::string& word) {
|
std::string StarDict::normalizeWord(const std::string& word) {
|
||||||
@ -403,6 +469,9 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] Searching for: '%s' (normalized: '%s')\n",
|
||||||
|
word.c_str(), normalizedSearch.c_str());
|
||||||
|
|
||||||
// First try .idx (main entries) - use prefix jump table for fast lookup
|
// First try .idx (main entries) - use prefix jump table for fast lookup
|
||||||
const std::string idxPath = basePath + ".idx";
|
const std::string idxPath = basePath + ".idx";
|
||||||
FsFile idxFile;
|
FsFile idxFile;
|
||||||
@ -418,7 +487,10 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) {
|
|||||||
const uint16_t prefixIdx = DictPrefixIndex::prefixToIndex(normalizedSearch[0], normalizedSearch[1]);
|
const uint16_t prefixIdx = DictPrefixIndex::prefixToIndex(normalizedSearch[0], normalizedSearch[1]);
|
||||||
position = DictPrefixIndex::dictPrefixOffsets[prefixIdx];
|
position = DictPrefixIndex::dictPrefixOffsets[prefixIdx];
|
||||||
}
|
}
|
||||||
|
Serial.printf("[DICT-DBG] Starting at position %lu (prefix: %c%c)\n",
|
||||||
|
position, normalizedSearch[0], normalizedSearch[1]);
|
||||||
bool found = false;
|
bool found = false;
|
||||||
|
uint32_t wordCount = 0;
|
||||||
|
|
||||||
while (position < info.idxfilesize) {
|
while (position < info.idxfilesize) {
|
||||||
std::string currentWord;
|
std::string currentWord;
|
||||||
@ -427,13 +499,24 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) {
|
|||||||
if (!readWordAtPosition(idxFile, position, currentWord, dictOffset, dictSize)) {
|
if (!readWordAtPosition(idxFile, position, currentWord, dictOffset, dictSize)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
wordCount++;
|
||||||
|
if (wordCount % 50000 == 0) {
|
||||||
|
Serial.printf("[DICT-DBG] Progress: %lu words scanned, pos=%lu, current='%s'\n",
|
||||||
|
wordCount, position, currentWord.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
// Use stardictStrcmp for case-insensitive matching
|
// Use stardictStrcmp for case-insensitive matching
|
||||||
const int cmp = stardictStrcmp(normalizedSearch, currentWord);
|
const int cmp = stardictStrcmp(normalizedSearch, currentWord);
|
||||||
|
|
||||||
if (cmp == 0) {
|
if (cmp == 0) {
|
||||||
|
Serial.printf("[DICT-DBG] MATCH: '%s' == '%s' (offset=%lu, size=%lu)\n",
|
||||||
|
normalizedSearch.c_str(), currentWord.c_str(), dictOffset, dictSize);
|
||||||
std::string definition;
|
std::string definition;
|
||||||
if (decompressDefinition(dictOffset, dictSize, definition)) {
|
const bool loaded = useUncompressed
|
||||||
|
? readDefinitionDirect(dictOffset, dictSize, definition)
|
||||||
|
: decompressDefinition(dictOffset, dictSize, definition);
|
||||||
|
if (loaded) {
|
||||||
|
Serial.printf("[DICT-DBG] Definition loaded, %u bytes\n", definition.length());
|
||||||
if (!found) {
|
if (!found) {
|
||||||
result.word = currentWord;
|
result.word = currentWord;
|
||||||
result.definition = definition;
|
result.definition = definition;
|
||||||
@ -442,14 +525,20 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) {
|
|||||||
} else {
|
} else {
|
||||||
result.definition += "</html>" + definition;
|
result.definition += "</html>" + definition;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
Serial.printf("[DICT-DBG] Definition load FAILED!\n");
|
||||||
}
|
}
|
||||||
// Continue scanning for additional matches (same word, different case)
|
// Continue scanning for additional matches (same word, different case)
|
||||||
} else if (cmp < 0) {
|
} else if (found) {
|
||||||
// Passed where target would be (file is sorted)
|
// We had matches but now moved past them - safe to stop
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// Note: Cannot use early-break before first match because prefix index
|
||||||
|
// may not land exactly at target position
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] Search complete: %lu words scanned, found=%s\n",
|
||||||
|
wordCount, found ? "YES" : "NO");
|
||||||
idxFile.close();
|
idxFile.close();
|
||||||
|
|
||||||
// If not found in main index, try synonym file with prefix jump
|
// If not found in main index, try synonym file with prefix jump
|
||||||
@ -502,7 +591,10 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) {
|
|||||||
uint32_t dictOffset, dictSize;
|
uint32_t dictOffset, dictSize;
|
||||||
if (readWordAtPosition(idxFile2, pos, mainWord, dictOffset, dictSize)) {
|
if (readWordAtPosition(idxFile2, pos, mainWord, dictOffset, dictSize)) {
|
||||||
std::string definition;
|
std::string definition;
|
||||||
if (decompressDefinition(dictOffset, dictSize, definition)) {
|
const bool loaded = useUncompressed
|
||||||
|
? readDefinitionDirect(dictOffset, dictSize, definition)
|
||||||
|
: decompressDefinition(dictOffset, dictSize, definition);
|
||||||
|
if (loaded) {
|
||||||
result.word = synWord;
|
result.word = synWord;
|
||||||
result.definition = definition;
|
result.definition = definition;
|
||||||
result.found = true;
|
result.found = true;
|
||||||
@ -513,10 +605,9 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) {
|
|||||||
idxFile2.close();
|
idxFile2.close();
|
||||||
}
|
}
|
||||||
break; // Found a match, stop searching
|
break; // Found a match, stop searching
|
||||||
} else if (cmp < 0) {
|
|
||||||
// Passed where it would be (file is sorted)
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
// Note: Cannot use early-break optimization here because prefix index
|
||||||
|
// may not land exactly at target position
|
||||||
}
|
}
|
||||||
synFile.close();
|
synFile.close();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
// StarDict dictionary lookup library
|
// StarDict dictionary lookup library
|
||||||
// Supports .ifo/.idx/.dict.dz format with linear scan lookup
|
// Supports .ifo/.idx/.dict (uncompressed) and .ifo/.idx/.dict.dz (compressed) formats
|
||||||
class StarDict {
|
class StarDict {
|
||||||
public:
|
public:
|
||||||
struct DictInfo {
|
struct DictInfo {
|
||||||
@ -38,16 +38,22 @@ class StarDict {
|
|||||||
};
|
};
|
||||||
DictzipInfo dzInfo;
|
DictzipInfo dzInfo;
|
||||||
|
|
||||||
|
// Whether to use uncompressed .dict file (preferred) or compressed .dict.dz
|
||||||
|
bool useUncompressed = false;
|
||||||
|
|
||||||
// Parse .ifo file
|
// Parse .ifo file
|
||||||
bool loadInfo();
|
bool loadInfo();
|
||||||
|
|
||||||
// Load dictzip header for random access
|
// Load dictzip header for random access (only if using compressed)
|
||||||
bool loadDictzipHeader();
|
bool loadDictzipHeader();
|
||||||
|
|
||||||
// Read word at given index file position, returns word and advances position
|
// Read word at given index file position, returns word and advances position
|
||||||
bool readWordAtPosition(FsFile& idxFile, uint32_t& position, std::string& word, uint32_t& dictOffset,
|
bool readWordAtPosition(FsFile& idxFile, uint32_t& position, std::string& word, uint32_t& dictOffset,
|
||||||
uint32_t& dictSize);
|
uint32_t& dictSize);
|
||||||
|
|
||||||
|
// Read definition directly from uncompressed .dict file (no decompression needed)
|
||||||
|
bool readDefinitionDirect(uint32_t offset, uint32_t size, std::string& definition);
|
||||||
|
|
||||||
// Decompress a portion of the .dict.dz file
|
// Decompress a portion of the .dict.dz file
|
||||||
bool decompressDefinition(uint32_t offset, uint32_t size, std::string& definition);
|
bool decompressDefinition(uint32_t offset, uint32_t size, std::string& definition);
|
||||||
|
|
||||||
|
|||||||
335
scripts/recompress_dictzip.py
Normal file
335
scripts/recompress_dictzip.py
Normal file
@ -0,0 +1,335 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Recompress a dictzip file with a custom chunk size.
|
||||||
|
|
||||||
|
Dictzip is a gzip-compatible format that allows random access by compressing
|
||||||
|
data in independent chunks. The standard dictzip uses ~58KB chunks, but this
|
||||||
|
can cause memory issues on embedded devices like ESP32.
|
||||||
|
|
||||||
|
This script recompresses dictionary files with smaller chunks (default 16KB)
|
||||||
|
to reduce memory requirements during decompression.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# From uncompressed .dict file:
|
||||||
|
python recompress_dictzip.py reader.dict reader.dict.dz --chunk-size 16384
|
||||||
|
|
||||||
|
# From existing .dict.dz file (will decompress first):
|
||||||
|
python recompress_dictzip.py reader.dict.dz reader_small.dict.dz --chunk-size 16384
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import gzip
|
||||||
|
import struct
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import zlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def read_input_file(input_path: Path) -> bytes:
|
||||||
|
"""Read input file, decompressing if it's a .dz or .gz file."""
|
||||||
|
suffix = input_path.suffix.lower()
|
||||||
|
|
||||||
|
if suffix in ('.dz', '.gz'):
|
||||||
|
print(f"Decompressing {input_path}...")
|
||||||
|
with gzip.open(input_path, 'rb') as f:
|
||||||
|
data = f.read()
|
||||||
|
print(f" Decompressed size: {len(data):,} bytes")
|
||||||
|
return data
|
||||||
|
else:
|
||||||
|
print(f"Reading {input_path}...")
|
||||||
|
with open(input_path, 'rb') as f:
|
||||||
|
data = f.read()
|
||||||
|
print(f" Size: {len(data):,} bytes")
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def compress_chunk(data: bytes, level: int = 9) -> bytes:
|
||||||
|
"""Compress a single chunk using raw deflate (no zlib header)."""
|
||||||
|
# Use raw deflate (-15 for raw, 15 for window size)
|
||||||
|
compressor = zlib.compressobj(level, zlib.DEFLATED, -15)
|
||||||
|
compressed = compressor.compress(data)
|
||||||
|
compressed += compressor.flush()
|
||||||
|
return compressed
|
||||||
|
|
||||||
|
|
||||||
|
def create_dictzip(data: bytes, output_path: Path, chunk_size: int = 16384,
|
||||||
|
compression_level: int = 9) -> None:
|
||||||
|
"""
|
||||||
|
Create a dictzip file from uncompressed data.
|
||||||
|
|
||||||
|
Dictzip format:
|
||||||
|
- Standard gzip header with FEXTRA flag
|
||||||
|
- Extra field containing 'RA' subfield with chunk info
|
||||||
|
- Compressed chunks (raw deflate, no headers)
|
||||||
|
- Standard gzip trailer (CRC32 + ISIZE)
|
||||||
|
"""
|
||||||
|
# Validate chunk size (must fit in 16-bit field)
|
||||||
|
if chunk_size > 65535:
|
||||||
|
raise ValueError(f"Chunk size {chunk_size} exceeds maximum of 65535")
|
||||||
|
if chunk_size < 1024:
|
||||||
|
raise ValueError(f"Chunk size {chunk_size} is too small (minimum 1024)")
|
||||||
|
|
||||||
|
# Calculate number of chunks
|
||||||
|
num_chunks = (len(data) + chunk_size - 1) // chunk_size
|
||||||
|
|
||||||
|
# Check if we can fit all chunk sizes in the extra field
|
||||||
|
# Extra field max is 65535 bytes, each chunk size takes 2 bytes, plus 6 bytes header
|
||||||
|
max_chunks = (65535 - 6) // 2
|
||||||
|
if num_chunks > max_chunks:
|
||||||
|
raise ValueError(f"Too many chunks ({num_chunks}) for dictzip format (max {max_chunks})")
|
||||||
|
|
||||||
|
print(f"Compressing into {num_chunks} chunks of {chunk_size} bytes...")
|
||||||
|
|
||||||
|
# Compress each chunk and collect sizes
|
||||||
|
compressed_chunks = []
|
||||||
|
chunk_sizes = []
|
||||||
|
|
||||||
|
for i in range(num_chunks):
|
||||||
|
start = i * chunk_size
|
||||||
|
end = min(start + chunk_size, len(data))
|
||||||
|
chunk_data = data[start:end]
|
||||||
|
|
||||||
|
compressed = compress_chunk(chunk_data, compression_level)
|
||||||
|
compressed_chunks.append(compressed)
|
||||||
|
chunk_sizes.append(len(compressed))
|
||||||
|
|
||||||
|
if (i + 1) % 500 == 0 or i == num_chunks - 1:
|
||||||
|
print(f" Compressed chunk {i + 1}/{num_chunks}")
|
||||||
|
|
||||||
|
# Calculate CRC32 and size for gzip trailer
|
||||||
|
crc32 = zlib.crc32(data) & 0xffffffff
|
||||||
|
isize = len(data) & 0xffffffff
|
||||||
|
|
||||||
|
# Build the extra field
|
||||||
|
# RA subfield: VER(2) + CHLEN(2) + CHCNT(2) + sizes[CHCNT](2 each)
|
||||||
|
ra_subfield_len = 6 + 2 * num_chunks
|
||||||
|
extra_field = bytearray()
|
||||||
|
extra_field.extend(b'RA') # SI1, SI2
|
||||||
|
extra_field.extend(struct.pack('<H', ra_subfield_len)) # LEN
|
||||||
|
extra_field.extend(struct.pack('<H', 1)) # VER
|
||||||
|
extra_field.extend(struct.pack('<H', chunk_size)) # CHLEN
|
||||||
|
extra_field.extend(struct.pack('<H', num_chunks)) # CHCNT
|
||||||
|
for size in chunk_sizes:
|
||||||
|
if size > 65535:
|
||||||
|
raise ValueError(f"Compressed chunk size {size} exceeds 65535 bytes")
|
||||||
|
extra_field.extend(struct.pack('<H', size))
|
||||||
|
|
||||||
|
xlen = len(extra_field)
|
||||||
|
|
||||||
|
# Build gzip header
|
||||||
|
# Flags: FEXTRA (0x04)
|
||||||
|
timestamp = int(time.time())
|
||||||
|
xfl = 2 if compression_level == 9 else (4 if compression_level == 1 else 0)
|
||||||
|
|
||||||
|
header = bytearray()
|
||||||
|
header.extend(b'\x1f\x8b') # Magic number
|
||||||
|
header.append(0x08) # Compression method (deflate)
|
||||||
|
header.append(0x04) # Flags: FEXTRA
|
||||||
|
header.extend(struct.pack('<I', timestamp)) # MTIME
|
||||||
|
header.append(xfl) # XFL
|
||||||
|
header.append(0xff) # OS (unknown)
|
||||||
|
header.extend(struct.pack('<H', xlen)) # XLEN
|
||||||
|
header.extend(extra_field)
|
||||||
|
|
||||||
|
# Write output file
|
||||||
|
print(f"Writing {output_path}...")
|
||||||
|
with open(output_path, 'wb') as f:
|
||||||
|
f.write(header)
|
||||||
|
for chunk in compressed_chunks:
|
||||||
|
f.write(chunk)
|
||||||
|
f.write(struct.pack('<I', crc32))
|
||||||
|
f.write(struct.pack('<I', isize))
|
||||||
|
|
||||||
|
# Report stats
|
||||||
|
output_size = output_path.stat().st_size
|
||||||
|
ratio = (1 - output_size / len(data)) * 100
|
||||||
|
print(f" Output size: {output_size:,} bytes ({ratio:.1f}% compression)")
|
||||||
|
print(f" Chunk size: {chunk_size} bytes")
|
||||||
|
print(f" Number of chunks: {num_chunks}")
|
||||||
|
|
||||||
|
|
||||||
|
def verify_dictzip(path: Path) -> bool:
|
||||||
|
"""Verify a dictzip file by reading its header and decompressing chunk by chunk."""
|
||||||
|
print(f"Verifying {path}...")
|
||||||
|
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
# Read gzip header
|
||||||
|
magic = f.read(2)
|
||||||
|
if magic != b'\x1f\x8b':
|
||||||
|
print(f" ERROR: Invalid gzip magic number")
|
||||||
|
return False
|
||||||
|
|
||||||
|
method = f.read(1)[0]
|
||||||
|
if method != 8:
|
||||||
|
print(f" ERROR: Unknown compression method: {method}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
flags = f.read(1)[0]
|
||||||
|
if not (flags & 0x04):
|
||||||
|
print(f" ERROR: FEXTRA flag not set - not a dictzip file")
|
||||||
|
return False
|
||||||
|
|
||||||
|
f.read(4) # MTIME
|
||||||
|
f.read(1) # XFL
|
||||||
|
f.read(1) # OS
|
||||||
|
|
||||||
|
# Read extra field
|
||||||
|
xlen = struct.unpack('<H', f.read(2))[0]
|
||||||
|
extra = f.read(xlen)
|
||||||
|
|
||||||
|
# Parse extra field for RA subfield
|
||||||
|
pos = 0
|
||||||
|
found_ra = False
|
||||||
|
chlen = 0
|
||||||
|
chcnt = 0
|
||||||
|
chunk_sizes = []
|
||||||
|
|
||||||
|
while pos < len(extra):
|
||||||
|
si1 = extra[pos]
|
||||||
|
si2 = extra[pos + 1]
|
||||||
|
slen = struct.unpack('<H', extra[pos + 2:pos + 4])[0]
|
||||||
|
|
||||||
|
if si1 == ord('R') and si2 == ord('A'):
|
||||||
|
found_ra = True
|
||||||
|
ra_data = extra[pos + 4:pos + 4 + slen]
|
||||||
|
|
||||||
|
ver = struct.unpack('<H', ra_data[0:2])[0]
|
||||||
|
chlen = struct.unpack('<H', ra_data[2:4])[0]
|
||||||
|
chcnt = struct.unpack('<H', ra_data[4:6])[0]
|
||||||
|
|
||||||
|
print(f" Version: {ver}")
|
||||||
|
print(f" Chunk size: {chlen} bytes")
|
||||||
|
print(f" Chunk count: {chcnt}")
|
||||||
|
|
||||||
|
# Verify chunk sizes array
|
||||||
|
if len(ra_data) != 6 + 2 * chcnt:
|
||||||
|
print(f" ERROR: Chunk sizes array length mismatch")
|
||||||
|
return False
|
||||||
|
|
||||||
|
for i in range(chcnt):
|
||||||
|
size = struct.unpack('<H', ra_data[6 + 2*i:8 + 2*i])[0]
|
||||||
|
chunk_sizes.append(size)
|
||||||
|
|
||||||
|
print(f" Total compressed data: {sum(chunk_sizes):,} bytes")
|
||||||
|
break
|
||||||
|
|
||||||
|
pos += 4 + slen
|
||||||
|
|
||||||
|
if not found_ra:
|
||||||
|
print(f" ERROR: RA subfield not found - not a dictzip file")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Decompress chunk by chunk (like the firmware does)
|
||||||
|
data_start = f.tell()
|
||||||
|
decompressed_data = bytearray()
|
||||||
|
|
||||||
|
try:
|
||||||
|
for i, comp_size in enumerate(chunk_sizes):
|
||||||
|
f.seek(data_start + sum(chunk_sizes[:i]))
|
||||||
|
compressed_chunk = f.read(comp_size)
|
||||||
|
|
||||||
|
# Decompress using raw inflate (no zlib header)
|
||||||
|
decompressor = zlib.decompressobj(-15)
|
||||||
|
decompressed_chunk = decompressor.decompress(compressed_chunk)
|
||||||
|
decompressed_chunk += decompressor.flush()
|
||||||
|
decompressed_data.extend(decompressed_chunk)
|
||||||
|
|
||||||
|
print(f" Decompressed size: {len(decompressed_data):,} bytes")
|
||||||
|
|
||||||
|
# Verify CRC32 from trailer
|
||||||
|
f.seek(-8, 2) # Seek to 8 bytes before end
|
||||||
|
expected_crc = struct.unpack('<I', f.read(4))[0]
|
||||||
|
expected_size = struct.unpack('<I', f.read(4))[0]
|
||||||
|
|
||||||
|
actual_crc = zlib.crc32(bytes(decompressed_data)) & 0xffffffff
|
||||||
|
actual_size = len(decompressed_data) & 0xffffffff
|
||||||
|
|
||||||
|
if actual_crc != expected_crc:
|
||||||
|
print(f" ERROR: CRC mismatch: expected {expected_crc:08x}, got {actual_crc:08x}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if actual_size != expected_size:
|
||||||
|
print(f" ERROR: Size mismatch: expected {expected_size}, got {actual_size}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
print(f" CRC32: {actual_crc:08x} (verified)")
|
||||||
|
print(f" Verification: PASSED")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ERROR: Decompression failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Recompress a dictzip file with a custom chunk size.',
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
# Recompress with 16KB chunks (recommended for ESP32):
|
||||||
|
%(prog)s reader.dict reader.dict.dz --chunk-size 16384
|
||||||
|
|
||||||
|
# Recompress from existing .dz file:
|
||||||
|
%(prog)s reader.dict.dz reader_small.dict.dz --chunk-size 16384
|
||||||
|
|
||||||
|
# Verify a dictzip file:
|
||||||
|
%(prog)s --verify reader.dict.dz
|
||||||
|
""")
|
||||||
|
|
||||||
|
parser.add_argument('input', nargs='?', help='Input .dict or .dict.dz file')
|
||||||
|
parser.add_argument('output', nargs='?', help='Output .dict.dz file')
|
||||||
|
parser.add_argument('--chunk-size', '-c', type=int, default=16384,
|
||||||
|
help='Chunk size in bytes (default: 16384, i.e., 16KB)')
|
||||||
|
parser.add_argument('--compression-level', '-l', type=int, default=9,
|
||||||
|
choices=range(1, 10), metavar='1-9',
|
||||||
|
help='Compression level 1-9 (default: 9)')
|
||||||
|
parser.add_argument('--verify', '-v', action='store_true',
|
||||||
|
help='Verify a dictzip file instead of compressing')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.verify:
|
||||||
|
if not args.input:
|
||||||
|
parser.error("Input file required for verification")
|
||||||
|
input_path = Path(args.input)
|
||||||
|
if not input_path.exists():
|
||||||
|
print(f"Error: File not found: {input_path}")
|
||||||
|
sys.exit(1)
|
||||||
|
success = verify_dictzip(input_path)
|
||||||
|
sys.exit(0 if success else 1)
|
||||||
|
|
||||||
|
if not args.input or not args.output:
|
||||||
|
parser.error("Both input and output files are required")
|
||||||
|
|
||||||
|
input_path = Path(args.input)
|
||||||
|
output_path = Path(args.output)
|
||||||
|
|
||||||
|
if not input_path.exists():
|
||||||
|
print(f"Error: Input file not found: {input_path}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if output_path.exists():
|
||||||
|
response = input(f"Output file {output_path} exists. Overwrite? [y/N] ")
|
||||||
|
if response.lower() != 'y':
|
||||||
|
print("Aborted.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Read and decompress input if needed
|
||||||
|
data = read_input_file(input_path)
|
||||||
|
|
||||||
|
# Create new dictzip with specified chunk size
|
||||||
|
create_dictzip(data, output_path, args.chunk_size, args.compression_level)
|
||||||
|
|
||||||
|
# Verify the output
|
||||||
|
print()
|
||||||
|
if verify_dictzip(output_path):
|
||||||
|
print(f"\nSuccess! Created {output_path} with {args.chunk_size}-byte chunks.")
|
||||||
|
else:
|
||||||
|
print(f"\nError: Verification failed!")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
@ -3,6 +3,10 @@
|
|||||||
#include <DictHtmlParser.h>
|
#include <DictHtmlParser.h>
|
||||||
#include <GfxRenderer.h>
|
#include <GfxRenderer.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cctype>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
#include "DictionaryMargins.h"
|
#include "DictionaryMargins.h"
|
||||||
#include "MappedInputManager.h"
|
#include "MappedInputManager.h"
|
||||||
#include "fontIds.h"
|
#include "fontIds.h"
|
||||||
@ -15,22 +19,28 @@ void DictionaryResultActivity::taskTrampoline(void* param) {
|
|||||||
void DictionaryResultActivity::onEnter() {
|
void DictionaryResultActivity::onEnter() {
|
||||||
Activity::onEnter();
|
Activity::onEnter();
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] DictionaryResult onEnter, defLen=%u\n", rawDefinition.length());
|
||||||
|
|
||||||
renderingMutex = xSemaphoreCreateMutex();
|
renderingMutex = xSemaphoreCreateMutex();
|
||||||
currentPage = 0;
|
currentPage = 0;
|
||||||
|
|
||||||
// Process definition for display
|
// Process definition for display
|
||||||
if (!notFound) {
|
if (!notFound) {
|
||||||
|
Serial.printf("[DICT-DBG] Starting paginateDefinition...\n");
|
||||||
paginateDefinition();
|
paginateDefinition();
|
||||||
|
Serial.printf("[DICT-DBG] Pagination done, %u pages\n", pages.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
updateRequired = true;
|
updateRequired = true;
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] Creating display task...\n");
|
||||||
xTaskCreate(&DictionaryResultActivity::taskTrampoline, "DictResultTask",
|
xTaskCreate(&DictionaryResultActivity::taskTrampoline, "DictResultTask",
|
||||||
4096, // Stack size
|
4096, // Stack size
|
||||||
this, // Parameters
|
this, // Parameters
|
||||||
1, // Priority
|
1, // Priority
|
||||||
&displayTaskHandle // Task handle
|
&displayTaskHandle // Task handle
|
||||||
);
|
);
|
||||||
|
Serial.printf("[DICT-DBG] Task created\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void DictionaryResultActivity::onExit() {
|
void DictionaryResultActivity::onExit() {
|
||||||
@ -61,24 +71,51 @@ void DictionaryResultActivity::loop() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Handle page navigation - use orientation-aware PageBack/PageForward buttons
|
// Handle page navigation - use orientation-aware PageBack/PageForward buttons
|
||||||
if (!notFound && pages.size() > 1) {
|
if (!notFound && !pages.empty()) {
|
||||||
const bool prevPressed = mappedInput.wasPressed(MappedInputManager::Button::PageBack) ||
|
const bool prevPressed = mappedInput.wasPressed(MappedInputManager::Button::PageBack) ||
|
||||||
mappedInput.wasPressed(MappedInputManager::Button::Left);
|
mappedInput.wasPressed(MappedInputManager::Button::Left);
|
||||||
const bool nextPressed = mappedInput.wasPressed(MappedInputManager::Button::PageForward) ||
|
const bool nextPressed = mappedInput.wasPressed(MappedInputManager::Button::PageForward) ||
|
||||||
mappedInput.wasPressed(MappedInputManager::Button::Right);
|
mappedInput.wasPressed(MappedInputManager::Button::Right);
|
||||||
|
|
||||||
if (prevPressed && currentPage > 0) {
|
if (prevPressed) {
|
||||||
currentPage--;
|
if (currentPage > 0) {
|
||||||
updateRequired = true;
|
// Navigate within cached pages
|
||||||
} else if (nextPressed && currentPage < static_cast<int>(pages.size()) - 1) {
|
currentPage--;
|
||||||
currentPage++;
|
updateRequired = true;
|
||||||
updateRequired = true;
|
} else if (firstPageNumber > 1) {
|
||||||
|
// At first cached page but earlier pages exist - re-parse to get them
|
||||||
|
const int targetPage = firstPageNumber - 1; // Go to the page before current first
|
||||||
|
Serial.printf("[DICT-DBG] Re-parsing to reach page %d\n", targetPage);
|
||||||
|
reparseToPage(targetPage);
|
||||||
|
updateRequired = true;
|
||||||
|
}
|
||||||
|
} else if (nextPressed) {
|
||||||
|
// Check if we can navigate to existing cached page
|
||||||
|
if (currentPage < static_cast<int>(pages.size()) - 1) {
|
||||||
|
currentPage++;
|
||||||
|
updateRequired = true;
|
||||||
|
} else if (hasMoreContent) {
|
||||||
|
// At end of cached pages but more content available - parse next chunk
|
||||||
|
Serial.printf("[DICT-DBG] Parsing next chunk on navigation (page %d)\n", currentPage);
|
||||||
|
const size_t pagesBefore = pages.size();
|
||||||
|
parseNextChunk();
|
||||||
|
|
||||||
|
// If new pages were added, navigate to the next one
|
||||||
|
if (pages.size() > pagesBefore) {
|
||||||
|
currentPage++;
|
||||||
|
updateRequired = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// else: at true end of content, do nothing
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DictionaryResultActivity::paginateDefinition() {
|
void DictionaryResultActivity::paginateDefinition() {
|
||||||
pages.clear();
|
pages.clear();
|
||||||
|
parsePosition = 0;
|
||||||
|
hasMoreContent = false;
|
||||||
|
firstPageNumber = 1;
|
||||||
|
|
||||||
if (rawDefinition.empty()) {
|
if (rawDefinition.empty()) {
|
||||||
notFound = true;
|
notFound = true;
|
||||||
@ -99,14 +136,55 @@ void DictionaryResultActivity::paginateDefinition() {
|
|||||||
const int textWidth = pageWidth - textMargin - marginRight - 10;
|
const int textWidth = pageWidth - textMargin - marginRight - 10;
|
||||||
const int textHeight = pageHeight - marginTop - marginBottom - headerHeight - footerHeight;
|
const int textHeight = pageHeight - marginTop - marginBottom - headerHeight - footerHeight;
|
||||||
const int lineHeight = renderer.getLineHeight(UI_10_FONT_ID);
|
const int lineHeight = renderer.getLineHeight(UI_10_FONT_ID);
|
||||||
|
const int linesPerPage = textHeight / lineHeight;
|
||||||
|
|
||||||
// Collect all TextBlocks from the HTML parser
|
// For chunked parsing, we estimate how much HTML to parse at a time
|
||||||
|
// Roughly: each line is ~40-60 chars, so one page ≈ linesPerPage * 60 bytes of text
|
||||||
|
// With HTML overhead, multiply by ~2, plus buffer for finding break points
|
||||||
|
constexpr size_t CHUNK_SIZE_BASE = 1500; // Base chunk size
|
||||||
|
const size_t chunkSize = std::max(CHUNK_SIZE_BASE, static_cast<size_t>(linesPerPage * 120));
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] Chunked parsing: defLen=%u, chunkSize=%u, linesPerPage=%d\n",
|
||||||
|
rawDefinition.length(), chunkSize, linesPerPage);
|
||||||
|
|
||||||
|
// Determine how much to parse for first page
|
||||||
|
size_t parseEnd;
|
||||||
|
if (rawDefinition.length() <= chunkSize) {
|
||||||
|
// Small definition - parse it all
|
||||||
|
parseEnd = rawDefinition.length();
|
||||||
|
hasMoreContent = false;
|
||||||
|
} else {
|
||||||
|
// Large definition - find a good break point
|
||||||
|
parseEnd = findHtmlBreakPoint(rawDefinition, chunkSize / 2, chunkSize);
|
||||||
|
hasMoreContent = (parseEnd < rawDefinition.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract the chunk to parse
|
||||||
|
std::string chunk = rawDefinition.substr(0, parseEnd);
|
||||||
|
parsePosition = parseEnd;
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] Parsing first chunk: 0-%u of %u, hasMore=%d\n",
|
||||||
|
parseEnd, rawDefinition.length(), hasMoreContent);
|
||||||
|
|
||||||
|
// Parse this chunk into TextBlocks
|
||||||
std::vector<std::shared_ptr<TextBlock>> allBlocks;
|
std::vector<std::shared_ptr<TextBlock>> allBlocks;
|
||||||
DictHtmlParser::parse(rawDefinition, UI_10_FONT_ID, renderer, textWidth,
|
DictHtmlParser::parse(chunk, UI_10_FONT_ID, renderer, textWidth,
|
||||||
[&allBlocks](std::shared_ptr<TextBlock> block) { allBlocks.push_back(block); });
|
[&allBlocks](std::shared_ptr<TextBlock> block) {
|
||||||
|
allBlocks.push_back(block);
|
||||||
|
});
|
||||||
|
Serial.printf("[DICT-DBG] First chunk parsed, %u TextBlocks\n", allBlocks.size());
|
||||||
|
|
||||||
if (allBlocks.empty()) {
|
if (allBlocks.empty()) {
|
||||||
notFound = true;
|
// Check if there's more to parse - maybe first chunk had no displayable content
|
||||||
|
if (hasMoreContent) {
|
||||||
|
// Try parsing more
|
||||||
|
parseNextChunk();
|
||||||
|
if (pages.empty()) {
|
||||||
|
notFound = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
notFound = true;
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -131,6 +209,189 @@ void DictionaryResultActivity::paginateDefinition() {
|
|||||||
if (!currentPageBlocks.empty()) {
|
if (!currentPageBlocks.empty()) {
|
||||||
pages.push_back(currentPageBlocks);
|
pages.push_back(currentPageBlocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] Initial pagination: %u pages\n", pages.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t DictionaryResultActivity::findHtmlBreakPoint(const std::string& html, size_t searchStart, size_t maxPos) {
|
||||||
|
// Search backwards from maxPos for good HTML break points
|
||||||
|
// Priority: </li>, </p>, </ol>, </ul>, </div> then any '>' then whitespace
|
||||||
|
|
||||||
|
if (maxPos >= html.length()) {
|
||||||
|
return html.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clamp searchStart to not exceed maxPos
|
||||||
|
if (searchStart > maxPos) {
|
||||||
|
searchStart = maxPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search for closing block tags (best break points)
|
||||||
|
const char* closingTags[] = {"</li>", "</p>", "</ol>", "</ul>", "</div>", "</dd>", "</dt>"};
|
||||||
|
size_t bestBreak = std::string::npos;
|
||||||
|
|
||||||
|
for (const char* tag : closingTags) {
|
||||||
|
size_t pos = html.rfind(tag, maxPos);
|
||||||
|
if (pos != std::string::npos && pos >= searchStart) {
|
||||||
|
// Found a closing tag - break after it
|
||||||
|
size_t breakAfter = pos + strlen(tag);
|
||||||
|
if (bestBreak == std::string::npos || breakAfter > bestBreak) {
|
||||||
|
bestBreak = breakAfter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bestBreak != std::string::npos) {
|
||||||
|
return bestBreak;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: search for any '>' (end of tag)
|
||||||
|
size_t tagEnd = html.rfind('>', maxPos);
|
||||||
|
if (tagEnd != std::string::npos && tagEnd >= searchStart) {
|
||||||
|
return tagEnd + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Last resort: search for whitespace
|
||||||
|
for (size_t i = maxPos; i >= searchStart && i != std::string::npos; i--) {
|
||||||
|
if (std::isspace(static_cast<unsigned char>(html[i]))) {
|
||||||
|
return i + 1;
|
||||||
|
}
|
||||||
|
if (i == 0) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No good break point found - use maxPos
|
||||||
|
return maxPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DictionaryResultActivity::parseNextChunk() {
|
||||||
|
if (!hasMoreContent || parsePosition >= rawDefinition.length()) {
|
||||||
|
hasMoreContent = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] parseNextChunk starting at position %u of %u\n",
|
||||||
|
parsePosition, rawDefinition.length());
|
||||||
|
|
||||||
|
// Get margins for calculating page dimensions
|
||||||
|
int marginTop, marginRight, marginBottom, marginLeft;
|
||||||
|
getDictionaryContentMargins(renderer, &marginTop, &marginRight, &marginBottom, &marginLeft);
|
||||||
|
|
||||||
|
const auto pageWidth = renderer.getScreenWidth();
|
||||||
|
const auto pageHeight = renderer.getScreenHeight();
|
||||||
|
|
||||||
|
// Calculate text area dimensions (must match paginateDefinition and render)
|
||||||
|
constexpr int headerHeight = 80;
|
||||||
|
constexpr int footerHeight = 30;
|
||||||
|
const int textMargin = marginLeft + 10;
|
||||||
|
const int textWidth = pageWidth - textMargin - marginRight - 10;
|
||||||
|
const int textHeight = pageHeight - marginTop - marginBottom - headerHeight - footerHeight;
|
||||||
|
const int lineHeight = renderer.getLineHeight(UI_10_FONT_ID);
|
||||||
|
const int linesPerPage = textHeight / lineHeight;
|
||||||
|
|
||||||
|
// Chunk size estimation (same as paginateDefinition)
|
||||||
|
constexpr size_t CHUNK_SIZE_BASE = 1500;
|
||||||
|
const size_t chunkSize = std::max(CHUNK_SIZE_BASE, static_cast<size_t>(linesPerPage * 120));
|
||||||
|
|
||||||
|
// Determine parse range for this chunk
|
||||||
|
size_t parseStart = parsePosition;
|
||||||
|
size_t parseEnd;
|
||||||
|
|
||||||
|
if (parsePosition + chunkSize >= rawDefinition.length()) {
|
||||||
|
// This will be the last chunk
|
||||||
|
parseEnd = rawDefinition.length();
|
||||||
|
hasMoreContent = false;
|
||||||
|
} else {
|
||||||
|
// Find a good break point
|
||||||
|
parseEnd = findHtmlBreakPoint(rawDefinition, parsePosition + chunkSize / 2, parsePosition + chunkSize);
|
||||||
|
hasMoreContent = (parseEnd < rawDefinition.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract the chunk to parse
|
||||||
|
std::string chunk = rawDefinition.substr(parseStart, parseEnd - parseStart);
|
||||||
|
parsePosition = parseEnd;
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] Parsing chunk %u-%u, hasMore=%d\n", parseStart, parseEnd, hasMoreContent);
|
||||||
|
|
||||||
|
// Parse this chunk into TextBlocks
|
||||||
|
std::vector<std::shared_ptr<TextBlock>> allBlocks;
|
||||||
|
DictHtmlParser::parse(chunk, UI_10_FONT_ID, renderer, textWidth,
|
||||||
|
[&allBlocks](std::shared_ptr<TextBlock> block) {
|
||||||
|
allBlocks.push_back(block);
|
||||||
|
});
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] Chunk parsed, %u TextBlocks\n", allBlocks.size());
|
||||||
|
|
||||||
|
if (allBlocks.empty()) {
|
||||||
|
// No content in this chunk - try parsing more if available
|
||||||
|
if (hasMoreContent) {
|
||||||
|
parseNextChunk();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Paginate: group TextBlocks into pages based on available height
|
||||||
|
std::vector<std::shared_ptr<TextBlock>> currentPageBlocks;
|
||||||
|
int currentY = 0;
|
||||||
|
|
||||||
|
for (const auto& block : allBlocks) {
|
||||||
|
if (currentY + lineHeight > textHeight && !currentPageBlocks.empty()) {
|
||||||
|
// Page is full, start new page
|
||||||
|
pages.push_back(currentPageBlocks);
|
||||||
|
currentPageBlocks.clear();
|
||||||
|
currentY = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
currentPageBlocks.push_back(block);
|
||||||
|
currentY += lineHeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add remaining blocks as last page
|
||||||
|
if (!currentPageBlocks.empty()) {
|
||||||
|
pages.push_back(currentPageBlocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trim old pages if we exceed the limit to prevent memory exhaustion
|
||||||
|
while (static_cast<int>(pages.size()) > MAX_CACHED_PAGES && currentPage > 0) {
|
||||||
|
// Remove the oldest page and adjust indices
|
||||||
|
pages.erase(pages.begin());
|
||||||
|
currentPage--;
|
||||||
|
firstPageNumber++;
|
||||||
|
Serial.printf("[DICT-DBG] Trimmed old page, firstPageNumber now %d\n", firstPageNumber);
|
||||||
|
}
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] After chunk: %u cached pages (pages %d-%d)\n",
|
||||||
|
pages.size(), firstPageNumber, firstPageNumber + static_cast<int>(pages.size()) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DictionaryResultActivity::reparseToPage(int targetPageNumber) {
|
||||||
|
// Re-parse from the beginning to reach an earlier page that was trimmed
|
||||||
|
// This allows backward navigation through the entire definition
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] reparseToPage: target=%d, clearing and re-parsing\n", targetPageNumber);
|
||||||
|
|
||||||
|
// Clear current state and start fresh
|
||||||
|
pages.clear();
|
||||||
|
parsePosition = 0;
|
||||||
|
firstPageNumber = 1;
|
||||||
|
hasMoreContent = !rawDefinition.empty();
|
||||||
|
|
||||||
|
// Parse chunks until we have the target page
|
||||||
|
while (hasMoreContent && firstPageNumber + static_cast<int>(pages.size()) - 1 < targetPageNumber) {
|
||||||
|
parseNextChunk();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now position currentPage to show the target page
|
||||||
|
if (targetPageNumber >= firstPageNumber &&
|
||||||
|
targetPageNumber < firstPageNumber + static_cast<int>(pages.size())) {
|
||||||
|
currentPage = targetPageNumber - firstPageNumber;
|
||||||
|
} else {
|
||||||
|
// Target page doesn't exist (definition is shorter than expected)
|
||||||
|
currentPage = static_cast<int>(pages.size()) - 1;
|
||||||
|
if (currentPage < 0) currentPage = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Serial.printf("[DICT-DBG] reparseToPage done: currentPage=%d, firstPageNumber=%d, pages=%u\n",
|
||||||
|
currentPage, firstPageNumber, pages.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void DictionaryResultActivity::displayTaskLoop() {
|
void DictionaryResultActivity::displayTaskLoop() {
|
||||||
@ -181,17 +442,29 @@ void DictionaryResultActivity::render() const {
|
|||||||
y += lineHeight;
|
y += lineHeight;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Draw page indicator if multiple pages
|
// Draw page indicator if multiple pages or more content available
|
||||||
if (pages.size() > 1) {
|
const bool hasMultiplePages = pages.size() > 1 || hasMoreContent || firstPageNumber > 1;
|
||||||
char pageIndicator[32];
|
if (hasMultiplePages) {
|
||||||
snprintf(pageIndicator, sizeof(pageIndicator), "Page %d of %d", currentPage + 1, static_cast<int>(pages.size()));
|
char pageIndicator[48];
|
||||||
|
const int displayPageNum = firstPageNumber + currentPage;
|
||||||
|
const int lastKnownPage = firstPageNumber + static_cast<int>(pages.size()) - 1;
|
||||||
|
if (hasMoreContent) {
|
||||||
|
// More content to load - show "Page X of Y+" to indicate more pages coming
|
||||||
|
snprintf(pageIndicator, sizeof(pageIndicator), "Page %d of %d+", displayPageNum, lastKnownPage);
|
||||||
|
} else {
|
||||||
|
snprintf(pageIndicator, sizeof(pageIndicator), "Page %d of %d", displayPageNum, lastKnownPage);
|
||||||
|
}
|
||||||
renderer.drawCenteredText(SMALL_FONT_ID, pageHeight - marginBottom - 5, pageIndicator);
|
renderer.drawCenteredText(SMALL_FONT_ID, pageHeight - marginBottom - 5, pageIndicator);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Draw button hints
|
// Draw button hints
|
||||||
const char* leftHint = (pages.size() > 1 && currentPage > 0) ? "< Prev" : "";
|
// Show navigation hints when there are multiple pages or more content to load
|
||||||
const char* rightHint = (pages.size() > 1 && currentPage < static_cast<int>(pages.size()) - 1) ? "Next >" : "";
|
// canGoBack is true if we have previous cached pages OR if earlier pages were trimmed
|
||||||
|
const bool canGoBack = currentPage > 0 || firstPageNumber > 1;
|
||||||
|
const bool canGoForward = currentPage < static_cast<int>(pages.size()) - 1 || hasMoreContent;
|
||||||
|
const char* leftHint = canGoBack ? "< Prev" : "";
|
||||||
|
const char* rightHint = canGoForward ? "Next >" : "";
|
||||||
const auto labels = mappedInput.mapLabels("\xc2\xab Back", "Search", leftHint, rightHint);
|
const auto labels = mappedInput.mapLabels("\xc2\xab Back", "Search", leftHint, rightHint);
|
||||||
renderer.drawButtonHints(UI_10_FONT_ID, labels.btn1, labels.btn2, labels.btn3, labels.btn4);
|
renderer.drawButtonHints(UI_10_FONT_ID, labels.btn1, labels.btn2, labels.btn3, labels.btn4);
|
||||||
|
|
||||||
|
|||||||
@ -26,14 +26,24 @@ class DictionaryResultActivity final : public Activity {
|
|||||||
const std::function<void()> onSearchAnother;
|
const std::function<void()> onSearchAnother;
|
||||||
|
|
||||||
// Pagination - each page contains TextBlocks with styled text
|
// Pagination - each page contains TextBlocks with styled text
|
||||||
|
// We limit cached pages to prevent memory exhaustion on long definitions
|
||||||
|
static constexpr int MAX_CACHED_PAGES = 4;
|
||||||
std::vector<std::vector<std::shared_ptr<TextBlock>>> pages;
|
std::vector<std::vector<std::shared_ptr<TextBlock>>> pages;
|
||||||
int currentPage = 0;
|
int currentPage = 0; // Index into pages vector
|
||||||
|
int firstPageNumber = 1; // The page number of pages[0] (1-based for display)
|
||||||
bool notFound = false;
|
bool notFound = false;
|
||||||
|
|
||||||
|
// Chunked parsing state - parse definition on-demand as user navigates
|
||||||
|
size_t parsePosition = 0; // Current position in rawDefinition HTML
|
||||||
|
bool hasMoreContent = false; // True if more HTML remains to parse
|
||||||
|
|
||||||
static void taskTrampoline(void* param);
|
static void taskTrampoline(void* param);
|
||||||
[[noreturn]] void displayTaskLoop();
|
[[noreturn]] void displayTaskLoop();
|
||||||
void render() const;
|
void render() const;
|
||||||
void paginateDefinition();
|
void paginateDefinition();
|
||||||
|
void parseNextChunk();
|
||||||
|
void reparseToPage(int targetPageNumber); // Re-parse from beginning to reach earlier page
|
||||||
|
static size_t findHtmlBreakPoint(const std::string& html, size_t searchStart, size_t maxPos);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user