From 62643ae93390ad74537207d6686dfb5a7592d865 Mon Sep 17 00:00:00 2001 From: cottongin Date: Thu, 29 Jan 2026 09:33:40 -0500 Subject: [PATCH] checkpoint: pre list-to-vector refactor, fixes dictionary crash, mostly - Add uncompressed dictionary (.dict) file support to avoid decompression memory issues - Implement chunked on-demand parsing for large definitions - Add backward navigation with re-parse capability - Limit cached pages to MAX_CACHED_PAGES (4) to prevent memory exhaustion - Add helper script for extracting/recompressing dictzip files --- lib/StarDict/StarDict.cpp | 161 +++++++-- lib/StarDict/StarDict.h | 10 +- scripts/recompress_dictzip.py | 335 ++++++++++++++++++ .../dictionary/DictionaryResultActivity.cpp | 307 +++++++++++++++- .../dictionary/DictionaryResultActivity.h | 12 +- 5 files changed, 770 insertions(+), 55 deletions(-) create mode 100644 scripts/recompress_dictzip.py diff --git a/lib/StarDict/StarDict.cpp b/lib/StarDict/StarDict.cpp index 3c49456..29ccb76 100644 --- a/lib/StarDict/StarDict.cpp +++ b/lib/StarDict/StarDict.cpp @@ -205,6 +205,19 @@ bool StarDict::loadDictzipHeader() { bool StarDict::begin() { if (!loadInfo()) return false; + + // Try uncompressed .dict file first (preferred - no memory overhead) + const std::string dictPath = basePath + ".dict"; + FsFile testFile; + if (SdMan.openFileForRead("DICT", dictPath, testFile)) { + testFile.close(); + useUncompressed = true; + Serial.printf("[%lu] [DICT] Using uncompressed .dict file (no decompression needed)\n", millis()); + return true; + } + + // Fall back to compressed .dict.dz + useUncompressed = false; if (!loadDictzipHeader()) return false; return true; } @@ -238,12 +251,46 @@ bool StarDict::readWordAtPosition(FsFile& idxFile, uint32_t& position, std::stri return true; } +bool StarDict::readDefinitionDirect(uint32_t offset, uint32_t size, std::string& definition) { + // Read directly from uncompressed .dict file - no decompression needed! + const std::string dictPath = basePath + ".dict"; + FsFile file; + if (!SdMan.openFileForRead("DICT", dictPath, file)) { + Serial.printf("[DICT-DBG] Failed to open .dict file\n"); + return false; + } + + // Seek to the definition offset + if (!file.seek(offset)) { + Serial.printf("[DICT-DBG] Failed to seek to offset %lu\n", offset); + file.close(); + return false; + } + + // Read the definition directly into the string + definition.resize(size); + const int bytesRead = file.read(&definition[0], size); + file.close(); + + if (bytesRead != static_cast(size)) { + Serial.printf("[DICT-DBG] Read %d bytes, expected %lu\n", bytesRead, size); + definition.clear(); + return false; + } + + return true; +} + bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string& definition) { - if (!dzInfo.loaded) return false; + if (!dzInfo.loaded) { + Serial.printf("[DICT-DBG] dzInfo not loaded!\n"); + return false; + } const std::string dzPath = basePath + ".dict.dz"; FsFile file; if (!SdMan.openFileForRead("DICT", dzPath, file)) { + Serial.printf("[DICT-DBG] Failed to open dict.dz file\n"); return false; } @@ -252,7 +299,11 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string& const uint32_t endChunk = (offset + size - 1) / dzInfo.chunkLength; const uint32_t startOffsetInChunk = offset % dzInfo.chunkLength; + Serial.printf("[DICT-DBG] Chunks: start=%lu, end=%lu, total=%u\n", + startChunk, endChunk, dzInfo.chunkCount); + if (endChunk >= dzInfo.chunkCount) { + Serial.printf("[DICT-DBG] endChunk %lu >= chunkCount %u\n", endChunk, dzInfo.chunkCount); file.close(); return false; } @@ -263,13 +314,38 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string& fileOffset += dzInfo.chunkSizes[i]; } - // Allocate buffers - const uint32_t maxCompressedSize = 65536; // Max compressed chunk size + // Calculate actual max compressed size needed for the chunks we'll process + uint32_t maxCompressedSize = 0; + for (uint32_t i = startChunk; i <= endChunk; i++) { + if (dzInfo.chunkSizes[i] > maxCompressedSize) { + maxCompressedSize = dzInfo.chunkSizes[i]; + } + } + + // Allocate buffers - allocate inflator FIRST (smallest) to reduce fragmentation impact + // tinfl_decompressor is ~11KB, so total allocations are ~85KB + Serial.printf("[DICT-DBG] Allocating inflator=%u, comp=%lu, decomp=%u bytes\n", + sizeof(tinfl_decompressor), maxCompressedSize, dzInfo.chunkLength); + + auto* inflator = static_cast(malloc(sizeof(tinfl_decompressor))); + if (!inflator) { + Serial.printf("[DICT-DBG] inflator alloc failed! (need %u bytes)\n", sizeof(tinfl_decompressor)); + file.close(); + return false; + } + auto* compressedBuf = static_cast(malloc(maxCompressedSize)); + if (!compressedBuf) { + Serial.printf("[DICT-DBG] compressedBuf alloc failed!\n"); + free(inflator); + file.close(); + return false; + } auto* decompressedBuf = static_cast(malloc(dzInfo.chunkLength)); - if (!compressedBuf || !decompressedBuf) { + if (!decompressedBuf) { + Serial.printf("[DICT-DBG] decompressedBuf alloc failed!\n"); + free(inflator); free(compressedBuf); - free(decompressedBuf); file.close(); return false; } @@ -277,13 +353,15 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string& definition.clear(); definition.reserve(size); - // Process each needed chunk + // Process each needed chunk (reusing inflator allocation) for (uint32_t chunk = startChunk; chunk <= endChunk; chunk++) { const uint16_t compressedSize = dzInfo.chunkSizes[chunk]; // Seek and read compressed data file.seek(fileOffset); if (file.read(compressedBuf, compressedSize) != compressedSize) { + Serial.printf("[DICT-DBG] File read failed at offset %lu, size %u\n", fileOffset, compressedSize); + free(inflator); free(compressedBuf); free(decompressedBuf); file.close(); @@ -291,13 +369,6 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string& } // Decompress using raw inflate (no zlib header) - auto* inflator = static_cast(malloc(sizeof(tinfl_decompressor))); - if (!inflator) { - free(compressedBuf); - free(decompressedBuf); - file.close(); - return false; - } tinfl_init(inflator); size_t inBytes = compressedSize; @@ -306,19 +377,13 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string& tinfl_decompress(inflator, compressedBuf, &inBytes, decompressedBuf, decompressedBuf, &outBytes, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | TINFL_FLAG_PARSE_ZLIB_HEADER); - free(inflator); - if (status != TINFL_STATUS_DONE && status != TINFL_STATUS_HAS_MORE_OUTPUT) { // Try without zlib header flag - inflator = static_cast(malloc(sizeof(tinfl_decompressor))); - if (inflator) { - tinfl_init(inflator); - inBytes = compressedSize; - outBytes = dzInfo.chunkLength; - tinfl_decompress(inflator, compressedBuf, &inBytes, decompressedBuf, decompressedBuf, &outBytes, - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); - free(inflator); - } + tinfl_init(inflator); + inBytes = compressedSize; + outBytes = dzInfo.chunkLength; + tinfl_decompress(inflator, compressedBuf, &inBytes, decompressedBuf, decompressedBuf, &outBytes, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); } // Extract the portion we need from this chunk @@ -342,6 +407,7 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string& fileOffset += compressedSize; } + free(inflator); free(compressedBuf); free(decompressedBuf); file.close(); @@ -349,9 +415,9 @@ bool StarDict::decompressDefinition(uint32_t offset, uint32_t size, std::string& return true; } -// StarDict comparison function: case-insensitive first, then case-sensitive as tiebreaker +// StarDict comparison function: case-insensitive matching int StarDict::stardictStrcmp(const std::string& a, const std::string& b) { - // First: case-insensitive comparison (like g_ascii_strcasecmp) + // Case-insensitive comparison (like g_ascii_strcasecmp) size_t i = 0; while (i < a.length() && i < b.length()) { const int ca = std::tolower(static_cast(a[i])); @@ -362,8 +428,8 @@ int StarDict::stardictStrcmp(const std::string& a, const std::string& b) { if (a.length() != b.length()) { return static_cast(a.length()) - static_cast(b.length()); } - // If case-insensitive equal, use case-sensitive as tiebreaker - return a.compare(b); + // Case-insensitive match found + return 0; } std::string StarDict::normalizeWord(const std::string& word) { @@ -403,6 +469,9 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) { return result; } + Serial.printf("[DICT-DBG] Searching for: '%s' (normalized: '%s')\n", + word.c_str(), normalizedSearch.c_str()); + // First try .idx (main entries) - use prefix jump table for fast lookup const std::string idxPath = basePath + ".idx"; FsFile idxFile; @@ -418,7 +487,10 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) { const uint16_t prefixIdx = DictPrefixIndex::prefixToIndex(normalizedSearch[0], normalizedSearch[1]); position = DictPrefixIndex::dictPrefixOffsets[prefixIdx]; } + Serial.printf("[DICT-DBG] Starting at position %lu (prefix: %c%c)\n", + position, normalizedSearch[0], normalizedSearch[1]); bool found = false; + uint32_t wordCount = 0; while (position < info.idxfilesize) { std::string currentWord; @@ -427,13 +499,24 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) { if (!readWordAtPosition(idxFile, position, currentWord, dictOffset, dictSize)) { break; } + wordCount++; + if (wordCount % 50000 == 0) { + Serial.printf("[DICT-DBG] Progress: %lu words scanned, pos=%lu, current='%s'\n", + wordCount, position, currentWord.c_str()); + } // Use stardictStrcmp for case-insensitive matching const int cmp = stardictStrcmp(normalizedSearch, currentWord); if (cmp == 0) { + Serial.printf("[DICT-DBG] MATCH: '%s' == '%s' (offset=%lu, size=%lu)\n", + normalizedSearch.c_str(), currentWord.c_str(), dictOffset, dictSize); std::string definition; - if (decompressDefinition(dictOffset, dictSize, definition)) { + const bool loaded = useUncompressed + ? readDefinitionDirect(dictOffset, dictSize, definition) + : decompressDefinition(dictOffset, dictSize, definition); + if (loaded) { + Serial.printf("[DICT-DBG] Definition loaded, %u bytes\n", definition.length()); if (!found) { result.word = currentWord; result.definition = definition; @@ -442,14 +525,20 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) { } else { result.definition += "" + definition; } + } else { + Serial.printf("[DICT-DBG] Definition load FAILED!\n"); } // Continue scanning for additional matches (same word, different case) - } else if (cmp < 0) { - // Passed where target would be (file is sorted) + } else if (found) { + // We had matches but now moved past them - safe to stop break; } + // Note: Cannot use early-break before first match because prefix index + // may not land exactly at target position } + Serial.printf("[DICT-DBG] Search complete: %lu words scanned, found=%s\n", + wordCount, found ? "YES" : "NO"); idxFile.close(); // If not found in main index, try synonym file with prefix jump @@ -502,7 +591,10 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) { uint32_t dictOffset, dictSize; if (readWordAtPosition(idxFile2, pos, mainWord, dictOffset, dictSize)) { std::string definition; - if (decompressDefinition(dictOffset, dictSize, definition)) { + const bool loaded = useUncompressed + ? readDefinitionDirect(dictOffset, dictSize, definition) + : decompressDefinition(dictOffset, dictSize, definition); + if (loaded) { result.word = synWord; result.definition = definition; result.found = true; @@ -513,10 +605,9 @@ StarDict::LookupResult StarDict::lookup(const std::string& word) { idxFile2.close(); } break; // Found a match, stop searching - } else if (cmp < 0) { - // Passed where it would be (file is sorted) - break; } + // Note: Cannot use early-break optimization here because prefix index + // may not land exactly at target position } synFile.close(); } diff --git a/lib/StarDict/StarDict.h b/lib/StarDict/StarDict.h index d3358c4..b6e3fb6 100644 --- a/lib/StarDict/StarDict.h +++ b/lib/StarDict/StarDict.h @@ -6,7 +6,7 @@ #include // StarDict dictionary lookup library -// Supports .ifo/.idx/.dict.dz format with linear scan lookup +// Supports .ifo/.idx/.dict (uncompressed) and .ifo/.idx/.dict.dz (compressed) formats class StarDict { public: struct DictInfo { @@ -38,16 +38,22 @@ class StarDict { }; DictzipInfo dzInfo; + // Whether to use uncompressed .dict file (preferred) or compressed .dict.dz + bool useUncompressed = false; + // Parse .ifo file bool loadInfo(); - // Load dictzip header for random access + // Load dictzip header for random access (only if using compressed) bool loadDictzipHeader(); // Read word at given index file position, returns word and advances position bool readWordAtPosition(FsFile& idxFile, uint32_t& position, std::string& word, uint32_t& dictOffset, uint32_t& dictSize); + // Read definition directly from uncompressed .dict file (no decompression needed) + bool readDefinitionDirect(uint32_t offset, uint32_t size, std::string& definition); + // Decompress a portion of the .dict.dz file bool decompressDefinition(uint32_t offset, uint32_t size, std::string& definition); diff --git a/scripts/recompress_dictzip.py b/scripts/recompress_dictzip.py new file mode 100644 index 0000000..8c72ba5 --- /dev/null +++ b/scripts/recompress_dictzip.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python3 +""" +Recompress a dictzip file with a custom chunk size. + +Dictzip is a gzip-compatible format that allows random access by compressing +data in independent chunks. The standard dictzip uses ~58KB chunks, but this +can cause memory issues on embedded devices like ESP32. + +This script recompresses dictionary files with smaller chunks (default 16KB) +to reduce memory requirements during decompression. + +Usage: + # From uncompressed .dict file: + python recompress_dictzip.py reader.dict reader.dict.dz --chunk-size 16384 + + # From existing .dict.dz file (will decompress first): + python recompress_dictzip.py reader.dict.dz reader_small.dict.dz --chunk-size 16384 +""" + +import argparse +import gzip +import struct +import sys +import time +import zlib +from pathlib import Path + + +def read_input_file(input_path: Path) -> bytes: + """Read input file, decompressing if it's a .dz or .gz file.""" + suffix = input_path.suffix.lower() + + if suffix in ('.dz', '.gz'): + print(f"Decompressing {input_path}...") + with gzip.open(input_path, 'rb') as f: + data = f.read() + print(f" Decompressed size: {len(data):,} bytes") + return data + else: + print(f"Reading {input_path}...") + with open(input_path, 'rb') as f: + data = f.read() + print(f" Size: {len(data):,} bytes") + return data + + +def compress_chunk(data: bytes, level: int = 9) -> bytes: + """Compress a single chunk using raw deflate (no zlib header).""" + # Use raw deflate (-15 for raw, 15 for window size) + compressor = zlib.compressobj(level, zlib.DEFLATED, -15) + compressed = compressor.compress(data) + compressed += compressor.flush() + return compressed + + +def create_dictzip(data: bytes, output_path: Path, chunk_size: int = 16384, + compression_level: int = 9) -> None: + """ + Create a dictzip file from uncompressed data. + + Dictzip format: + - Standard gzip header with FEXTRA flag + - Extra field containing 'RA' subfield with chunk info + - Compressed chunks (raw deflate, no headers) + - Standard gzip trailer (CRC32 + ISIZE) + """ + # Validate chunk size (must fit in 16-bit field) + if chunk_size > 65535: + raise ValueError(f"Chunk size {chunk_size} exceeds maximum of 65535") + if chunk_size < 1024: + raise ValueError(f"Chunk size {chunk_size} is too small (minimum 1024)") + + # Calculate number of chunks + num_chunks = (len(data) + chunk_size - 1) // chunk_size + + # Check if we can fit all chunk sizes in the extra field + # Extra field max is 65535 bytes, each chunk size takes 2 bytes, plus 6 bytes header + max_chunks = (65535 - 6) // 2 + if num_chunks > max_chunks: + raise ValueError(f"Too many chunks ({num_chunks}) for dictzip format (max {max_chunks})") + + print(f"Compressing into {num_chunks} chunks of {chunk_size} bytes...") + + # Compress each chunk and collect sizes + compressed_chunks = [] + chunk_sizes = [] + + for i in range(num_chunks): + start = i * chunk_size + end = min(start + chunk_size, len(data)) + chunk_data = data[start:end] + + compressed = compress_chunk(chunk_data, compression_level) + compressed_chunks.append(compressed) + chunk_sizes.append(len(compressed)) + + if (i + 1) % 500 == 0 or i == num_chunks - 1: + print(f" Compressed chunk {i + 1}/{num_chunks}") + + # Calculate CRC32 and size for gzip trailer + crc32 = zlib.crc32(data) & 0xffffffff + isize = len(data) & 0xffffffff + + # Build the extra field + # RA subfield: VER(2) + CHLEN(2) + CHCNT(2) + sizes[CHCNT](2 each) + ra_subfield_len = 6 + 2 * num_chunks + extra_field = bytearray() + extra_field.extend(b'RA') # SI1, SI2 + extra_field.extend(struct.pack(' 65535: + raise ValueError(f"Compressed chunk size {size} exceeds 65535 bytes") + extra_field.extend(struct.pack(' bool: + """Verify a dictzip file by reading its header and decompressing chunk by chunk.""" + print(f"Verifying {path}...") + + with open(path, 'rb') as f: + # Read gzip header + magic = f.read(2) + if magic != b'\x1f\x8b': + print(f" ERROR: Invalid gzip magic number") + return False + + method = f.read(1)[0] + if method != 8: + print(f" ERROR: Unknown compression method: {method}") + return False + + flags = f.read(1)[0] + if not (flags & 0x04): + print(f" ERROR: FEXTRA flag not set - not a dictzip file") + return False + + f.read(4) # MTIME + f.read(1) # XFL + f.read(1) # OS + + # Read extra field + xlen = struct.unpack(' #include +#include +#include +#include + #include "DictionaryMargins.h" #include "MappedInputManager.h" #include "fontIds.h" @@ -15,22 +19,28 @@ void DictionaryResultActivity::taskTrampoline(void* param) { void DictionaryResultActivity::onEnter() { Activity::onEnter(); + Serial.printf("[DICT-DBG] DictionaryResult onEnter, defLen=%u\n", rawDefinition.length()); + renderingMutex = xSemaphoreCreateMutex(); currentPage = 0; // Process definition for display if (!notFound) { + Serial.printf("[DICT-DBG] Starting paginateDefinition...\n"); paginateDefinition(); + Serial.printf("[DICT-DBG] Pagination done, %u pages\n", pages.size()); } updateRequired = true; + Serial.printf("[DICT-DBG] Creating display task...\n"); xTaskCreate(&DictionaryResultActivity::taskTrampoline, "DictResultTask", 4096, // Stack size this, // Parameters 1, // Priority &displayTaskHandle // Task handle ); + Serial.printf("[DICT-DBG] Task created\n"); } void DictionaryResultActivity::onExit() { @@ -61,24 +71,51 @@ void DictionaryResultActivity::loop() { } // Handle page navigation - use orientation-aware PageBack/PageForward buttons - if (!notFound && pages.size() > 1) { + if (!notFound && !pages.empty()) { const bool prevPressed = mappedInput.wasPressed(MappedInputManager::Button::PageBack) || mappedInput.wasPressed(MappedInputManager::Button::Left); const bool nextPressed = mappedInput.wasPressed(MappedInputManager::Button::PageForward) || mappedInput.wasPressed(MappedInputManager::Button::Right); - if (prevPressed && currentPage > 0) { - currentPage--; - updateRequired = true; - } else if (nextPressed && currentPage < static_cast(pages.size()) - 1) { - currentPage++; - updateRequired = true; + if (prevPressed) { + if (currentPage > 0) { + // Navigate within cached pages + currentPage--; + updateRequired = true; + } else if (firstPageNumber > 1) { + // At first cached page but earlier pages exist - re-parse to get them + const int targetPage = firstPageNumber - 1; // Go to the page before current first + Serial.printf("[DICT-DBG] Re-parsing to reach page %d\n", targetPage); + reparseToPage(targetPage); + updateRequired = true; + } + } else if (nextPressed) { + // Check if we can navigate to existing cached page + if (currentPage < static_cast(pages.size()) - 1) { + currentPage++; + updateRequired = true; + } else if (hasMoreContent) { + // At end of cached pages but more content available - parse next chunk + Serial.printf("[DICT-DBG] Parsing next chunk on navigation (page %d)\n", currentPage); + const size_t pagesBefore = pages.size(); + parseNextChunk(); + + // If new pages were added, navigate to the next one + if (pages.size() > pagesBefore) { + currentPage++; + updateRequired = true; + } + } + // else: at true end of content, do nothing } } } void DictionaryResultActivity::paginateDefinition() { pages.clear(); + parsePosition = 0; + hasMoreContent = false; + firstPageNumber = 1; if (rawDefinition.empty()) { notFound = true; @@ -99,14 +136,55 @@ void DictionaryResultActivity::paginateDefinition() { const int textWidth = pageWidth - textMargin - marginRight - 10; const int textHeight = pageHeight - marginTop - marginBottom - headerHeight - footerHeight; const int lineHeight = renderer.getLineHeight(UI_10_FONT_ID); + const int linesPerPage = textHeight / lineHeight; - // Collect all TextBlocks from the HTML parser + // For chunked parsing, we estimate how much HTML to parse at a time + // Roughly: each line is ~40-60 chars, so one page ≈ linesPerPage * 60 bytes of text + // With HTML overhead, multiply by ~2, plus buffer for finding break points + constexpr size_t CHUNK_SIZE_BASE = 1500; // Base chunk size + const size_t chunkSize = std::max(CHUNK_SIZE_BASE, static_cast(linesPerPage * 120)); + + Serial.printf("[DICT-DBG] Chunked parsing: defLen=%u, chunkSize=%u, linesPerPage=%d\n", + rawDefinition.length(), chunkSize, linesPerPage); + + // Determine how much to parse for first page + size_t parseEnd; + if (rawDefinition.length() <= chunkSize) { + // Small definition - parse it all + parseEnd = rawDefinition.length(); + hasMoreContent = false; + } else { + // Large definition - find a good break point + parseEnd = findHtmlBreakPoint(rawDefinition, chunkSize / 2, chunkSize); + hasMoreContent = (parseEnd < rawDefinition.length()); + } + + // Extract the chunk to parse + std::string chunk = rawDefinition.substr(0, parseEnd); + parsePosition = parseEnd; + + Serial.printf("[DICT-DBG] Parsing first chunk: 0-%u of %u, hasMore=%d\n", + parseEnd, rawDefinition.length(), hasMoreContent); + + // Parse this chunk into TextBlocks std::vector> allBlocks; - DictHtmlParser::parse(rawDefinition, UI_10_FONT_ID, renderer, textWidth, - [&allBlocks](std::shared_ptr block) { allBlocks.push_back(block); }); + DictHtmlParser::parse(chunk, UI_10_FONT_ID, renderer, textWidth, + [&allBlocks](std::shared_ptr block) { + allBlocks.push_back(block); + }); + Serial.printf("[DICT-DBG] First chunk parsed, %u TextBlocks\n", allBlocks.size()); if (allBlocks.empty()) { - notFound = true; + // Check if there's more to parse - maybe first chunk had no displayable content + if (hasMoreContent) { + // Try parsing more + parseNextChunk(); + if (pages.empty()) { + notFound = true; + } + } else { + notFound = true; + } return; } @@ -131,6 +209,189 @@ void DictionaryResultActivity::paginateDefinition() { if (!currentPageBlocks.empty()) { pages.push_back(currentPageBlocks); } + + Serial.printf("[DICT-DBG] Initial pagination: %u pages\n", pages.size()); +} + +size_t DictionaryResultActivity::findHtmlBreakPoint(const std::string& html, size_t searchStart, size_t maxPos) { + // Search backwards from maxPos for good HTML break points + // Priority: ,

, , , then any '>' then whitespace + + if (maxPos >= html.length()) { + return html.length(); + } + + // Clamp searchStart to not exceed maxPos + if (searchStart > maxPos) { + searchStart = maxPos; + } + + // Search for closing block tags (best break points) + const char* closingTags[] = {"", "

", "", "", "", "", ""}; + size_t bestBreak = std::string::npos; + + for (const char* tag : closingTags) { + size_t pos = html.rfind(tag, maxPos); + if (pos != std::string::npos && pos >= searchStart) { + // Found a closing tag - break after it + size_t breakAfter = pos + strlen(tag); + if (bestBreak == std::string::npos || breakAfter > bestBreak) { + bestBreak = breakAfter; + } + } + } + + if (bestBreak != std::string::npos) { + return bestBreak; + } + + // Fallback: search for any '>' (end of tag) + size_t tagEnd = html.rfind('>', maxPos); + if (tagEnd != std::string::npos && tagEnd >= searchStart) { + return tagEnd + 1; + } + + // Last resort: search for whitespace + for (size_t i = maxPos; i >= searchStart && i != std::string::npos; i--) { + if (std::isspace(static_cast(html[i]))) { + return i + 1; + } + if (i == 0) break; + } + + // No good break point found - use maxPos + return maxPos; +} + +void DictionaryResultActivity::parseNextChunk() { + if (!hasMoreContent || parsePosition >= rawDefinition.length()) { + hasMoreContent = false; + return; + } + + Serial.printf("[DICT-DBG] parseNextChunk starting at position %u of %u\n", + parsePosition, rawDefinition.length()); + + // Get margins for calculating page dimensions + int marginTop, marginRight, marginBottom, marginLeft; + getDictionaryContentMargins(renderer, &marginTop, &marginRight, &marginBottom, &marginLeft); + + const auto pageWidth = renderer.getScreenWidth(); + const auto pageHeight = renderer.getScreenHeight(); + + // Calculate text area dimensions (must match paginateDefinition and render) + constexpr int headerHeight = 80; + constexpr int footerHeight = 30; + const int textMargin = marginLeft + 10; + const int textWidth = pageWidth - textMargin - marginRight - 10; + const int textHeight = pageHeight - marginTop - marginBottom - headerHeight - footerHeight; + const int lineHeight = renderer.getLineHeight(UI_10_FONT_ID); + const int linesPerPage = textHeight / lineHeight; + + // Chunk size estimation (same as paginateDefinition) + constexpr size_t CHUNK_SIZE_BASE = 1500; + const size_t chunkSize = std::max(CHUNK_SIZE_BASE, static_cast(linesPerPage * 120)); + + // Determine parse range for this chunk + size_t parseStart = parsePosition; + size_t parseEnd; + + if (parsePosition + chunkSize >= rawDefinition.length()) { + // This will be the last chunk + parseEnd = rawDefinition.length(); + hasMoreContent = false; + } else { + // Find a good break point + parseEnd = findHtmlBreakPoint(rawDefinition, parsePosition + chunkSize / 2, parsePosition + chunkSize); + hasMoreContent = (parseEnd < rawDefinition.length()); + } + + // Extract the chunk to parse + std::string chunk = rawDefinition.substr(parseStart, parseEnd - parseStart); + parsePosition = parseEnd; + + Serial.printf("[DICT-DBG] Parsing chunk %u-%u, hasMore=%d\n", parseStart, parseEnd, hasMoreContent); + + // Parse this chunk into TextBlocks + std::vector> allBlocks; + DictHtmlParser::parse(chunk, UI_10_FONT_ID, renderer, textWidth, + [&allBlocks](std::shared_ptr block) { + allBlocks.push_back(block); + }); + + Serial.printf("[DICT-DBG] Chunk parsed, %u TextBlocks\n", allBlocks.size()); + + if (allBlocks.empty()) { + // No content in this chunk - try parsing more if available + if (hasMoreContent) { + parseNextChunk(); + } + return; + } + + // Paginate: group TextBlocks into pages based on available height + std::vector> currentPageBlocks; + int currentY = 0; + + for (const auto& block : allBlocks) { + if (currentY + lineHeight > textHeight && !currentPageBlocks.empty()) { + // Page is full, start new page + pages.push_back(currentPageBlocks); + currentPageBlocks.clear(); + currentY = 0; + } + + currentPageBlocks.push_back(block); + currentY += lineHeight; + } + + // Add remaining blocks as last page + if (!currentPageBlocks.empty()) { + pages.push_back(currentPageBlocks); + } + + // Trim old pages if we exceed the limit to prevent memory exhaustion + while (static_cast(pages.size()) > MAX_CACHED_PAGES && currentPage > 0) { + // Remove the oldest page and adjust indices + pages.erase(pages.begin()); + currentPage--; + firstPageNumber++; + Serial.printf("[DICT-DBG] Trimmed old page, firstPageNumber now %d\n", firstPageNumber); + } + + Serial.printf("[DICT-DBG] After chunk: %u cached pages (pages %d-%d)\n", + pages.size(), firstPageNumber, firstPageNumber + static_cast(pages.size()) - 1); +} + +void DictionaryResultActivity::reparseToPage(int targetPageNumber) { + // Re-parse from the beginning to reach an earlier page that was trimmed + // This allows backward navigation through the entire definition + + Serial.printf("[DICT-DBG] reparseToPage: target=%d, clearing and re-parsing\n", targetPageNumber); + + // Clear current state and start fresh + pages.clear(); + parsePosition = 0; + firstPageNumber = 1; + hasMoreContent = !rawDefinition.empty(); + + // Parse chunks until we have the target page + while (hasMoreContent && firstPageNumber + static_cast(pages.size()) - 1 < targetPageNumber) { + parseNextChunk(); + } + + // Now position currentPage to show the target page + if (targetPageNumber >= firstPageNumber && + targetPageNumber < firstPageNumber + static_cast(pages.size())) { + currentPage = targetPageNumber - firstPageNumber; + } else { + // Target page doesn't exist (definition is shorter than expected) + currentPage = static_cast(pages.size()) - 1; + if (currentPage < 0) currentPage = 0; + } + + Serial.printf("[DICT-DBG] reparseToPage done: currentPage=%d, firstPageNumber=%d, pages=%u\n", + currentPage, firstPageNumber, pages.size()); } void DictionaryResultActivity::displayTaskLoop() { @@ -181,17 +442,29 @@ void DictionaryResultActivity::render() const { y += lineHeight; } - // Draw page indicator if multiple pages - if (pages.size() > 1) { - char pageIndicator[32]; - snprintf(pageIndicator, sizeof(pageIndicator), "Page %d of %d", currentPage + 1, static_cast(pages.size())); + // Draw page indicator if multiple pages or more content available + const bool hasMultiplePages = pages.size() > 1 || hasMoreContent || firstPageNumber > 1; + if (hasMultiplePages) { + char pageIndicator[48]; + const int displayPageNum = firstPageNumber + currentPage; + const int lastKnownPage = firstPageNumber + static_cast(pages.size()) - 1; + if (hasMoreContent) { + // More content to load - show "Page X of Y+" to indicate more pages coming + snprintf(pageIndicator, sizeof(pageIndicator), "Page %d of %d+", displayPageNum, lastKnownPage); + } else { + snprintf(pageIndicator, sizeof(pageIndicator), "Page %d of %d", displayPageNum, lastKnownPage); + } renderer.drawCenteredText(SMALL_FONT_ID, pageHeight - marginBottom - 5, pageIndicator); } } // Draw button hints - const char* leftHint = (pages.size() > 1 && currentPage > 0) ? "< Prev" : ""; - const char* rightHint = (pages.size() > 1 && currentPage < static_cast(pages.size()) - 1) ? "Next >" : ""; + // Show navigation hints when there are multiple pages or more content to load + // canGoBack is true if we have previous cached pages OR if earlier pages were trimmed + const bool canGoBack = currentPage > 0 || firstPageNumber > 1; + const bool canGoForward = currentPage < static_cast(pages.size()) - 1 || hasMoreContent; + const char* leftHint = canGoBack ? "< Prev" : ""; + const char* rightHint = canGoForward ? "Next >" : ""; const auto labels = mappedInput.mapLabels("\xc2\xab Back", "Search", leftHint, rightHint); renderer.drawButtonHints(UI_10_FONT_ID, labels.btn1, labels.btn2, labels.btn3, labels.btn4); diff --git a/src/activities/dictionary/DictionaryResultActivity.h b/src/activities/dictionary/DictionaryResultActivity.h index e8bee0a..31023ce 100644 --- a/src/activities/dictionary/DictionaryResultActivity.h +++ b/src/activities/dictionary/DictionaryResultActivity.h @@ -26,14 +26,24 @@ class DictionaryResultActivity final : public Activity { const std::function onSearchAnother; // Pagination - each page contains TextBlocks with styled text + // We limit cached pages to prevent memory exhaustion on long definitions + static constexpr int MAX_CACHED_PAGES = 4; std::vector>> pages; - int currentPage = 0; + int currentPage = 0; // Index into pages vector + int firstPageNumber = 1; // The page number of pages[0] (1-based for display) bool notFound = false; + // Chunked parsing state - parse definition on-demand as user navigates + size_t parsePosition = 0; // Current position in rawDefinition HTML + bool hasMoreContent = false; // True if more HTML remains to parse + static void taskTrampoline(void* param); [[noreturn]] void displayTaskLoop(); void render() const; void paginateDefinition(); + void parseNextChunk(); + void reparseToPage(int targetPageNumber); // Re-parse from beginning to reach earlier page + static size_t findHtmlBreakPoint(const std::string& html, size_t searchStart, size_t maxPos); public: /**