Files
crosspoint-reader-mod/src/activities/reader/DictionaryDefinitionActivity.cpp
cottongin 5dc9d21bdb feat: Integrate PR #857 dictionary intelligence and sub-activity refactor
Pull in the full feature update from PR #857 while preserving fork
advantages (HTML parsing, custom drawHints, PageForward/PageBack,
cache management, stardictCmp, /.dictionary/ paths).

- Add morphological stemming (getStemVariants), Levenshtein edit
  distance, and fuzzy matching (findSimilar) to Dictionary
- Create DictionarySuggestionsActivity for "Did you mean?" flow
- Add onDone callback to DictionaryDefinitionActivity for direct
  exit-to-reader via "Done" button
- Refactor DictionaryWordSelectActivity to ActivityWithSubactivity
  with cascading lookup (exact → stems → suggestions → not found),
  en-dash/em-dash splitting, and cross-page hyphenation
- Refactor LookedUpWordsActivity with reverse-chronological order,
  inline cascading lookup, UITheme-aware rendering, and sub-activities
- Simplify EpubReaderActivity LOOKUP/LOOKED_UP_WORDS handlers

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-14 20:50:03 -05:00

546 lines
20 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "DictionaryDefinitionActivity.h"
#include <GfxRenderer.h>
#include <algorithm>
#include <cctype>
#include <cstdlib>
#include "CrossPointSettings.h"
#include "MappedInputManager.h"
#include "components/UITheme.h"
#include "fontIds.h"
void DictionaryDefinitionActivity::taskTrampoline(void* param) {
auto* self = static_cast<DictionaryDefinitionActivity*>(param);
self->displayTaskLoop();
}
void DictionaryDefinitionActivity::displayTaskLoop() {
while (true) {
if (updateRequired) {
updateRequired = false;
xSemaphoreTake(renderingMutex, portMAX_DELAY);
renderScreen();
xSemaphoreGive(renderingMutex);
}
vTaskDelay(10 / portTICK_PERIOD_MS);
}
}
void DictionaryDefinitionActivity::onEnter() {
Activity::onEnter();
renderingMutex = xSemaphoreCreateMutex();
wrapText();
updateRequired = true;
xTaskCreate(&DictionaryDefinitionActivity::taskTrampoline, "DictDefTask", 4096, this, 1, &displayTaskHandle);
}
void DictionaryDefinitionActivity::onExit() {
Activity::onExit();
xSemaphoreTake(renderingMutex, portMAX_DELAY);
if (displayTaskHandle) {
vTaskDelete(displayTaskHandle);
displayTaskHandle = nullptr;
}
vSemaphoreDelete(renderingMutex);
renderingMutex = nullptr;
}
// ---------------------------------------------------------------------------
// Check if a Unicode codepoint is likely renderable by the e-ink bitmap font.
// Keeps Latin text, combining marks, common punctuation, currency, and letterlike symbols.
// Skips IPA extensions, Greek, Cyrillic, Arabic, CJK, and other non-Latin scripts.
// ---------------------------------------------------------------------------
bool DictionaryDefinitionActivity::isRenderableCodepoint(uint32_t cp) {
if (cp <= 0x024F) return true; // Basic Latin + Latin Extended-A/B
if (cp >= 0x0300 && cp <= 0x036F) return true; // Combining Diacritical Marks
if (cp >= 0x2000 && cp <= 0x206F) return true; // General Punctuation
if (cp >= 0x20A0 && cp <= 0x20CF) return true; // Currency Symbols
if (cp >= 0x2100 && cp <= 0x214F) return true; // Letterlike Symbols
if (cp >= 0x2190 && cp <= 0x21FF) return true; // Arrows
return false;
}
// ---------------------------------------------------------------------------
// HTML entity decoder
// ---------------------------------------------------------------------------
std::string DictionaryDefinitionActivity::decodeEntity(const std::string& entity) {
// Named entities
if (entity == "amp") return "&";
if (entity == "lt") return "<";
if (entity == "gt") return ">";
if (entity == "quot") return "\"";
if (entity == "apos") return "'";
if (entity == "nbsp" || entity == "thinsp" || entity == "ensp" || entity == "emsp") return " ";
if (entity == "ndash") return "\xE2\x80\x93"; // U+2013
if (entity == "mdash") return "\xE2\x80\x94"; // U+2014
if (entity == "lsquo") return "\xE2\x80\x98";
if (entity == "rsquo") return "\xE2\x80\x99";
if (entity == "ldquo") return "\xE2\x80\x9C";
if (entity == "rdquo") return "\xE2\x80\x9D";
if (entity == "hellip") return "\xE2\x80\xA6";
if (entity == "lrm" || entity == "rlm" || entity == "zwj" || entity == "zwnj") return "";
// Numeric entities: &#123; or &#x1F;
if (!entity.empty() && entity[0] == '#') {
unsigned long cp = 0;
if (entity.size() > 1 && (entity[1] == 'x' || entity[1] == 'X')) {
cp = std::strtoul(entity.c_str() + 2, nullptr, 16);
} else {
cp = std::strtoul(entity.c_str() + 1, nullptr, 10);
}
if (cp > 0 && cp < 0x80) {
return std::string(1, static_cast<char>(cp));
}
if (cp >= 0x80 && cp < 0x800) {
char buf[3] = {static_cast<char>(0xC0 | (cp >> 6)), static_cast<char>(0x80 | (cp & 0x3F)), '\0'};
return std::string(buf, 2);
}
if (cp >= 0x800 && cp < 0x10000) {
char buf[4] = {static_cast<char>(0xE0 | (cp >> 12)), static_cast<char>(0x80 | ((cp >> 6) & 0x3F)),
static_cast<char>(0x80 | (cp & 0x3F)), '\0'};
return std::string(buf, 3);
}
if (cp >= 0x10000 && cp < 0x110000) {
char buf[5] = {static_cast<char>(0xF0 | (cp >> 18)), static_cast<char>(0x80 | ((cp >> 12) & 0x3F)),
static_cast<char>(0x80 | ((cp >> 6) & 0x3F)), static_cast<char>(0x80 | (cp & 0x3F)), '\0'};
return std::string(buf, 4);
}
}
return ""; // unknown entity — drop it
}
// ---------------------------------------------------------------------------
// HTML → TextAtom list
// ---------------------------------------------------------------------------
std::vector<DictionaryDefinitionActivity::TextAtom> DictionaryDefinitionActivity::parseHtml(const std::string& html) {
std::vector<TextAtom> atoms;
bool isBold = false;
bool isItalic = false;
bool inSvg = false;
int svgDepth = 0;
std::vector<ListState> listStack;
std::string currentWord;
auto currentStyle = [&]() -> EpdFontFamily::Style {
if (isBold && isItalic) return EpdFontFamily::BOLD_ITALIC;
if (isBold) return EpdFontFamily::BOLD;
if (isItalic) return EpdFontFamily::ITALIC;
return EpdFontFamily::REGULAR;
};
auto flushWord = [&]() {
if (!currentWord.empty() && !inSvg) {
atoms.push_back({currentWord, currentStyle(), false, 0});
currentWord.clear();
}
};
auto indentPx = [&]() -> int {
// 15 pixels per nesting level (the first level has no extra indent)
int depth = static_cast<int>(listStack.size());
return (depth > 1) ? (depth - 1) * 15 : 0;
};
// Skip any leading non-HTML text (e.g. pronunciation guides like "/ˈsɪm.pəl/, /ˈsɪmpəl/")
// that appears before the first tag in sametypesequence=h entries.
size_t i = 0;
{
size_t firstTag = html.find('<');
if (firstTag != std::string::npos) i = firstTag;
}
while (i < html.size()) {
// ------- HTML tag -------
if (html[i] == '<') {
flushWord();
size_t tagEnd = html.find('>', i);
if (tagEnd == std::string::npos) break;
std::string tagContent = html.substr(i + 1, tagEnd - i - 1);
// Extract tag name: first token, lowercased, trailing '/' stripped.
size_t space = tagContent.find(' ');
std::string tagName = (space != std::string::npos) ? tagContent.substr(0, space) : tagContent;
for (auto& c : tagName) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
if (!tagName.empty() && tagName.back() == '/') tagName.pop_back();
// --- SVG handling (skip all content inside <svg>…</svg>) ---
if (tagName == "svg") {
inSvg = true;
svgDepth = 1;
} else if (inSvg) {
if (tagName == "svg") {
svgDepth++;
} else if (tagName == "/svg") {
svgDepth--;
if (svgDepth <= 0) inSvg = false;
}
}
if (!inSvg) {
// --- Inline style tags ---
if (tagName == "b" || tagName == "strong") {
isBold = true;
} else if (tagName == "/b" || tagName == "/strong") {
isBold = false;
} else if (tagName == "i" || tagName == "em") {
isItalic = true;
} else if (tagName == "/i" || tagName == "/em") {
isItalic = false;
// --- Block-level tags → newlines ---
} else if (tagName == "p" || tagName == "h1" || tagName == "h2" || tagName == "h3" || tagName == "h4") {
atoms.push_back({"", EpdFontFamily::REGULAR, true, indentPx()});
// Headings get bold style applied to following text
if (tagName != "p") isBold = true;
} else if (tagName == "/p" || tagName == "/h1" || tagName == "/h2" || tagName == "/h3" || tagName == "/h4") {
atoms.push_back({"", EpdFontFamily::REGULAR, true, indentPx()});
isBold = false;
} else if (tagName == "br") {
atoms.push_back({"", EpdFontFamily::REGULAR, true, indentPx()});
// --- Separator between definition entries ---
} else if (tagName == "/html") {
atoms.push_back({"", EpdFontFamily::REGULAR, true, 0});
atoms.push_back({"", EpdFontFamily::REGULAR, true, 0}); // extra blank line
isBold = false;
isItalic = false;
// Skip any raw text between </html> and the next tag — this is where
// pronunciation guides (e.g. /ˈsɪmpəl/, /ksɛpt/) live in this dictionary.
size_t nextTag = html.find('<', tagEnd + 1);
i = (nextTag != std::string::npos) ? nextTag : html.size();
continue;
// --- Lists ---
} else if (tagName == "ol") {
bool alpha = tagContent.find("lower-alpha") != std::string::npos;
listStack.push_back({0, alpha});
} else if (tagName == "ul") {
listStack.push_back({0, false});
} else if (tagName == "/ol" || tagName == "/ul") {
if (!listStack.empty()) listStack.pop_back();
} else if (tagName == "li") {
atoms.push_back({"", EpdFontFamily::REGULAR, true, indentPx()});
if (!listStack.empty()) {
auto& ls = listStack.back();
ls.counter++;
std::string marker;
if (ls.isAlpha && ls.counter >= 1 && ls.counter <= 26) {
marker = std::string(1, static_cast<char>('a' + ls.counter - 1)) + ". ";
} else if (ls.isAlpha) {
marker = std::to_string(ls.counter) + ". ";
} else {
marker = std::to_string(ls.counter) + ". ";
}
atoms.push_back({marker, EpdFontFamily::REGULAR, false, 0});
} else {
// Unordered list or bare <li>
atoms.push_back({"\xE2\x80\xA2 ", EpdFontFamily::REGULAR, false, 0});
}
}
// All other tags (span, div, code, sup, sub, table, etc.) are silently ignored;
// their text content will still be emitted.
}
i = tagEnd + 1;
continue;
}
// Skip content inside SVG
if (inSvg) {
i++;
continue;
}
// ------- HTML entity -------
if (html[i] == '&') {
size_t semicolon = html.find(';', i);
if (semicolon != std::string::npos && semicolon - i < 16) {
std::string entity = html.substr(i + 1, semicolon - i - 1);
std::string decoded = decodeEntity(entity);
if (!decoded.empty()) {
// Treat decoded chars like normal text (could be space etc.)
for (char dc : decoded) {
if (dc == ' ') {
flushWord();
} else {
currentWord += dc;
}
}
}
i = semicolon + 1;
continue;
}
// Not a valid entity — emit '&' literally
currentWord += '&';
i++;
continue;
}
// ------- IPA pronunciation (skip /…/ and […] containing non-ASCII) -------
if (html[i] == '/' || html[i] == '[') {
char closeDelim = (html[i] == '/') ? '/' : ']';
size_t end = html.find(closeDelim, i + 1);
if (end != std::string::npos && end - i < 80) {
bool hasNonAscii = false;
for (size_t j = i + 1; j < end; j++) {
if (static_cast<unsigned char>(html[j]) > 127) {
hasNonAscii = true;
break;
}
}
if (hasNonAscii) {
flushWord();
i = end + 1; // skip entire IPA section including delimiters
continue;
}
}
// Not IPA — fall through to treat as regular character
}
// ------- Whitespace -------
if (html[i] == ' ' || html[i] == '\t' || html[i] == '\n' || html[i] == '\r') {
flushWord();
i++;
continue;
}
// ------- Regular character (with non-renderable character filter) -------
{
unsigned char byte = static_cast<unsigned char>(html[i]);
if (byte < 0x80) {
// ASCII — always renderable
currentWord += html[i];
i++;
} else {
// Multi-byte UTF-8: decode codepoint and check if renderable
int seqLen = 1;
uint32_t cp = 0;
if ((byte & 0xE0) == 0xC0) {
seqLen = 2;
cp = byte & 0x1F;
} else if ((byte & 0xF0) == 0xE0) {
seqLen = 3;
cp = byte & 0x0F;
} else if ((byte & 0xF8) == 0xF0) {
seqLen = 4;
cp = byte & 0x07;
} else {
i++;
continue;
} // invalid start byte
if (i + static_cast<size_t>(seqLen) > html.size()) {
i++;
continue;
}
bool valid = true;
for (int j = 1; j < seqLen; j++) {
unsigned char cb = static_cast<unsigned char>(html[i + j]);
if ((cb & 0xC0) != 0x80) {
valid = false;
break;
}
cp = (cp << 6) | (cb & 0x3F);
}
if (valid && isRenderableCodepoint(cp)) {
for (int j = 0; j < seqLen; j++) {
currentWord += html[i + j];
}
}
// else: silently skip non-renderable character
i += valid ? seqLen : 1;
}
}
}
flushWord();
return atoms;
}
// ---------------------------------------------------------------------------
// Word-wrap the parsed HTML atoms into positioned line segments
// ---------------------------------------------------------------------------
void DictionaryDefinitionActivity::wrapText() {
wrappedLines.clear();
const bool landscape = orientation == CrossPointSettings::ORIENTATION::LANDSCAPE_CW ||
orientation == CrossPointSettings::ORIENTATION::LANDSCAPE_CCW;
const int screenWidth = renderer.getScreenWidth();
const int lineHeight = renderer.getLineHeight(readerFontId);
const int sidePadding = landscape ? 50 : 20;
constexpr int topArea = 50;
constexpr int bottomArea = 50;
const int maxWidth = screenWidth - 2 * sidePadding;
const int spaceWidth = renderer.getSpaceWidth(readerFontId);
linesPerPage = (renderer.getScreenHeight() - topArea - bottomArea) / lineHeight;
if (linesPerPage < 1) linesPerPage = 1;
auto atoms = parseHtml(definition);
std::vector<Segment> currentLine;
int currentX = 0;
int baseIndent = 0; // indent for continuation lines within the same block
for (const auto& atom : atoms) {
// ---- Newline directive ----
if (atom.isNewline) {
// Collapse multiple consecutive blank lines
if (currentLine.empty() && !wrappedLines.empty() && wrappedLines.back().empty()) {
// Already have a blank line; update indent but don't push another
baseIndent = atom.indent;
currentX = baseIndent;
continue;
}
wrappedLines.push_back(std::move(currentLine));
currentLine.clear();
baseIndent = atom.indent;
currentX = baseIndent;
continue;
}
// ---- Text word ----
int wordWidth = renderer.getTextWidth(readerFontId, atom.text.c_str(), atom.style);
int gap = (currentX > baseIndent) ? spaceWidth : 0;
// Wrap if this word won't fit
if (currentX + gap + wordWidth > maxWidth && currentX > baseIndent) {
wrappedLines.push_back(std::move(currentLine));
currentLine.clear();
currentX = baseIndent;
gap = 0;
}
int16_t x = static_cast<int16_t>(currentX + gap);
currentLine.push_back({atom.text, x, atom.style});
currentX = x + wordWidth;
}
// Flush last line
if (!currentLine.empty()) {
wrappedLines.push_back(std::move(currentLine));
}
totalPages = (static_cast<int>(wrappedLines.size()) + linesPerPage - 1) / linesPerPage;
if (totalPages < 1) totalPages = 1;
}
void DictionaryDefinitionActivity::loop() {
const bool prevPage = mappedInput.wasReleased(MappedInputManager::Button::PageBack) ||
mappedInput.wasReleased(MappedInputManager::Button::Left);
const bool nextPage = mappedInput.wasReleased(MappedInputManager::Button::PageForward) ||
mappedInput.wasReleased(MappedInputManager::Button::Right);
if (prevPage && currentPage > 0) {
currentPage--;
updateRequired = true;
}
if (nextPage && currentPage < totalPages - 1) {
currentPage++;
updateRequired = true;
}
if (mappedInput.wasReleased(MappedInputManager::Button::Confirm)) {
if (onDone) {
onDone();
} else {
onBack();
}
return;
}
if (mappedInput.wasReleased(MappedInputManager::Button::Back)) {
onBack();
return;
}
}
void DictionaryDefinitionActivity::renderScreen() {
renderer.clearScreen();
const bool landscape = orientation == CrossPointSettings::ORIENTATION::LANDSCAPE_CW ||
orientation == CrossPointSettings::ORIENTATION::LANDSCAPE_CCW;
const int sidePadding = landscape ? 50 : 20;
constexpr int titleY = 10;
const int lineHeight = renderer.getLineHeight(readerFontId);
constexpr int bodyStartY = 50;
// Title: the word in bold (UI font)
renderer.drawText(UI_12_FONT_ID, sidePadding, titleY, headword.c_str(), true, EpdFontFamily::BOLD);
// Separator line
renderer.drawLine(sidePadding, 40, renderer.getScreenWidth() - sidePadding, 40);
// Body: styled definition lines
int startLine = currentPage * linesPerPage;
for (int i = 0; i < linesPerPage && (startLine + i) < static_cast<int>(wrappedLines.size()); i++) {
int y = bodyStartY + i * lineHeight;
const auto& line = wrappedLines[startLine + i];
for (const auto& seg : line) {
renderer.drawText(readerFontId, sidePadding + seg.x, y, seg.text.c_str(), true, seg.style);
}
}
// Pagination indicator (bottom right)
if (totalPages > 1) {
std::string pageInfo = std::to_string(currentPage + 1) + "/" + std::to_string(totalPages);
int textWidth = renderer.getTextWidth(SMALL_FONT_ID, pageInfo.c_str());
renderer.drawText(SMALL_FONT_ID, renderer.getScreenWidth() - sidePadding - textWidth,
renderer.getScreenHeight() - 50, pageInfo.c_str());
}
// Button hints (bottom face buttons)
const auto labels = mappedInput.mapLabels("\xC2\xAB Back", onDone ? "Done" : "", "\xC2\xAB Page", "Page \xC2\xBB");
GUI.drawButtonHints(renderer, labels.btn1, labels.btn2, labels.btn3, labels.btn4);
// Side button hints (drawn in portrait coordinates for correct placement)
{
const auto origOrientation = renderer.getOrientation();
renderer.setOrientation(GfxRenderer::Orientation::Portrait);
const int portW = renderer.getScreenWidth();
constexpr int sideButtonWidth = 30;
constexpr int sideButtonHeight = 78;
constexpr int sideButtonGap = 5;
constexpr int sideTopY = 345;
constexpr int cornerRadius = 6;
const int sideX = portW - sideButtonWidth;
const int sideButtonY[2] = {sideTopY, sideTopY + sideButtonHeight + sideButtonGap};
const char* sideLabels[2] = {"\xC2\xAB Page", "Page \xC2\xBB"};
const bool useCCW = (orientation == CrossPointSettings::ORIENTATION::LANDSCAPE_CCW);
for (int i = 0; i < 2; i++) {
renderer.fillRect(sideX, sideButtonY[i], sideButtonWidth, sideButtonHeight, false);
renderer.drawRoundedRect(sideX, sideButtonY[i], sideButtonWidth, sideButtonHeight, 1, cornerRadius, true, false,
true, false, true);
const std::string truncated = renderer.truncatedText(SMALL_FONT_ID, sideLabels[i], sideButtonHeight);
const int tw = renderer.getTextWidth(SMALL_FONT_ID, truncated.c_str());
if (useCCW) {
renderer.drawTextRotated90CCW(SMALL_FONT_ID, sideX,
sideButtonY[i] + (sideButtonHeight - tw) / 2, truncated.c_str());
} else {
renderer.drawTextRotated90CW(SMALL_FONT_ID, sideX,
sideButtonY[i] + (sideButtonHeight + tw) / 2, truncated.c_str());
}
}
renderer.setOrientation(origOrientation);
}
// Use half refresh when entering the screen for cleaner transition; fast refresh for page turns.
renderer.displayBuffer(firstRender ? HalDisplay::HALF_REFRESH : HalDisplay::FAST_REFRESH);
firstRender = false;
}