crosspoint-reader/lib/Epub/Epub/ParsedText.cpp

393 lines
14 KiB
C++
Raw Normal View History

#include "ParsedText.h"
#include <GfxRenderer.h>
#include <algorithm>
#include <cmath>
#include <functional>
#include <iterator>
#include <limits>
#include <vector>
2026-01-07 02:14:57 +05:00
#include "hyphenation/Hyphenator.h"
constexpr int MAX_COST = std::numeric_limits<int>::max();
2026-01-07 22:44:26 +05:00
namespace {
// Soft hyphen byte pattern used throughout EPUBs (UTF-8 for U+00AD).
constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD";
constexpr size_t SOFT_HYPHEN_BYTES = 2;
bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; }
// Removes every soft hyphen in-place so rendered glyphs match measured widths.
void stripSoftHyphensInPlace(std::string& word) {
size_t pos = 0;
while ((pos = word.find(SOFT_HYPHEN_UTF8, pos)) != std::string::npos) {
word.erase(pos, SOFT_HYPHEN_BYTES);
}
}
// Returns the rendered width for a word while ignoring soft hyphen glyphs and optionally appending a visible hyphen.
uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const std::string& word,
const EpdFontFamily::Style style, const bool appendHyphen = false) {
const bool hasSoftHyphen = containsSoftHyphen(word);
if (!hasSoftHyphen && !appendHyphen) {
return renderer.getTextWidth(fontId, word.c_str(), style);
}
std::string sanitized = word;
if (hasSoftHyphen) {
stripSoftHyphensInPlace(sanitized);
}
if (appendHyphen) {
sanitized.push_back('-');
}
return renderer.getTextWidth(fontId, sanitized.c_str(), style);
}
} // namespace
void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) {
if (word.empty()) return;
words.push_back(std::move(word));
wordStyles.push_back(fontStyle);
}
// Consumes data to minimize memory usage
void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fontId, const uint16_t viewportWidth,
const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
const bool includeLastLine) {
if (words.empty()) {
return;
}
// Apply fixed transforms before any per-line layout work.
applyParagraphIndent();
Rotation Support (#77) • What is the goal of this PR? Implement a horizontal EPUB reading mode so books can be read in landscape orientation (both 90° and 270°), while keeping the rest of the UI in portrait. • What changes are included? ◦ Rendering / Display ▪ Added an orientation model to GfxRenderer (Portrait, LandscapeNormal, LandscapeFlipped) and made: ▪ drawPixel, drawImage, displayWindow map logical coordinates differently depending on orientation. ▪ getScreenWidth() / getScreenHeight() return orientation‑aware logical dimensions (480×800 in portrait, 800×480 in landscape). ◦ Settings / Configuration ▪ Extended CrossPointSettings with: ▪ landscapeReading (toggle for portrait vs. landscape EPUB reading). ▪ landscapeFlipped (toggle to flip landscape 180° so both horizontal holding directions are supported). ▪ Updated settings serialization/deserialization to persist these fields while remaining backward‑compatible with existing settings files. ▪ Updated SettingsActivity to expose two new toggles: ▪ “Landscape Reading” ▪ “Flip Landscape (swap top/bottom)” ◦ EPUB Reader ▪ In EpubReaderActivity: ▪ On onEnter, set GfxRenderer orientation based on the new settings (Portrait, LandscapeNormal, or LandscapeFlipped). ▪ On onExit, reset orientation back to Portrait so Home, WiFi, Settings, etc. continue to render as before. ▪ Adjusted renderStatusBar to position the status bar and battery indicator relative to GfxRenderer::getScreenHeight() instead of hard‑coded Y coordinates, so it stays correctly at the bottom in both portrait and landscape. ◦ EPUB Caching / Layout ▪ Extended Section cache metadata (section.bin) to include the logical screenWidth and screenHeight used when pages were generated; bumped SECTION_FILE_VERSION. ▪ Updated loadCacheMetadata to compare: ▪ font/margins/line compression/extraParagraphSpacing and screen dimensions; mismatches now invalidate and clear the cache. ▪ Updated persistPageDataToSD and all call sites in EpubReaderActivity to pass the current GfxRenderer::getScreenWidth() / getScreenHeight() so portrait and landscape caches are kept separate and correctly sized. Additional Context • Cache behavior / migration ◦ Existing section.bin files (old SECTION_FILE_VERSION) will be detected as incompatible and their caches cleared and rebuilt once per chapter when first opened after this change. ◦ Within a given orientation, caches will be reused as before. Switching orientation (portrait ↔ landscape) will cause a one‑time re‑index of each chapter in the new orientation. • Scope and risks ◦ Orientation changes are scoped to the EPUB reader; the Home screen, Settings, WiFi selection, sleep screens, and web server UI continue to assume portrait orientation. ◦ The renderer’s orientation is a static/global setting; if future code uses GfxRenderer outside the reader while a reader instance is active, it should be aware that orientation is no longer implicitly fixed. ◦ All drawing primitives now go through orientation‑aware coordinate transforms; any code that previously relied on edge‑case behavior or out‑of‑bounds writes might surface as logged “Outside range” warnings instead. • Testing suggestions / areas to focus on ◦ Verify in hardware: ▪ Portrait mode still renders correctly (boot, home, settings, WiFi, reader). ▪ Landscape reading in both directions: ▪ Landscape Reading = ON, Flip Landscape = OFF. ▪ Landscape Reading = ON, Flip Landscape = ON. ▪ Status bar (page X/Y, % progress, battery icon) is fully visible and aligned at the bottom in all three combinations. ◦ Open the same book: ▪ In portrait first, then switch to landscape and reopen it. ▪ Confirm that: ▪ Old portrait caches are rebuilt once for landscape (you should see the “Indexing…” page). ▪ Progress save/restore still works (resume opens to the correct page in the current orientation). ◦ Ensure grayscale rendering (the secondary pass in EpubReaderActivity::renderContents) still looks correct in both orientations. --------- Co-authored-by: Dave Allie <dave@daveallie.com>
2025-12-28 05:33:20 -05:00
const int pageWidth = viewportWidth;
const int spaceWidth = renderer.getSpaceWidth(fontId);
auto wordWidths = calculateWordWidths(renderer, fontId);
std::vector<size_t> lineBreakIndices;
if (hyphenationEnabled) {
// Use greedy layout that can split words mid-loop when a hyphenated prefix fits.
lineBreakIndices = computeHyphenatedLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths);
} else {
lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths);
}
const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;
for (size_t i = 0; i < lineCount; ++i) {
extractLine(i, pageWidth, spaceWidth, wordWidths, lineBreakIndices, processLine);
}
}
std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId) {
const size_t totalWordCount = words.size();
std::vector<uint16_t> wordWidths;
wordWidths.reserve(totalWordCount);
auto wordsIt = words.begin();
auto wordStylesIt = wordStyles.begin();
while (wordsIt != words.end()) {
2026-01-07 22:44:26 +05:00
wordWidths.push_back(measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt));
std::advance(wordsIt, 1);
std::advance(wordStylesIt, 1);
}
return wordWidths;
}
std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth,
const int spaceWidth, std::vector<uint16_t>& wordWidths) {
if (words.empty()) {
return {};
}
// Ensure any word that would overflow even as the first entry on a line is split using fallback hyphenation.
for (size_t i = 0; i < wordWidths.size(); ++i) {
while (wordWidths[i] > pageWidth) {
// Try language-aware hyphenation first; only fall back to heuristics when no dictionary break fits.
if (hyphenateWordAtIndex(i, pageWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/false)) {
continue;
}
if (!hyphenateWordAtIndex(i, pageWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) {
break;
}
}
}
const size_t totalWordCount = words.size();
// DP table to store the minimum badness (cost) of lines starting at index i
std::vector<int> dp(totalWordCount);
// 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i'
std::vector<size_t> ans(totalWordCount);
// Base Case
dp[totalWordCount - 1] = 0;
ans[totalWordCount - 1] = totalWordCount - 1;
for (int i = totalWordCount - 2; i >= 0; --i) {
int currlen = -spaceWidth;
dp[i] = MAX_COST;
for (size_t j = i; j < totalWordCount; ++j) {
// Current line length: previous width + space + current word width
currlen += wordWidths[j] + spaceWidth;
if (currlen > pageWidth) {
break;
}
int cost;
if (j == totalWordCount - 1) {
cost = 0; // Last line
} else {
const int remainingSpace = pageWidth - currlen;
// Use long long for the square to prevent overflow
const long long cost_ll = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1];
if (cost_ll > MAX_COST) {
cost = MAX_COST;
} else {
cost = static_cast<int>(cost_ll);
}
}
if (cost < dp[i]) {
dp[i] = cost;
ans[i] = j; // j is the index of the last word in this optimal line
}
}
// Handle oversized word: if no valid configuration found, force single-word line
// This prevents cascade failure where one oversized word breaks all preceding words
if (dp[i] == MAX_COST) {
ans[i] = i; // Just this word on its own line
// Inherit cost from next word to allow subsequent words to find valid configurations
if (i + 1 < static_cast<int>(totalWordCount)) {
dp[i] = dp[i + 1];
} else {
dp[i] = 0;
}
}
}
// Stores the index of the word that starts the next line (last_word_index + 1)
std::vector<size_t> lineBreakIndices;
size_t currentWordIndex = 0;
while (currentWordIndex < totalWordCount) {
size_t nextBreakIndex = ans[currentWordIndex] + 1;
// Safety check: prevent infinite loop if nextBreakIndex doesn't advance
if (nextBreakIndex <= currentWordIndex) {
// Force advance by at least one word to avoid infinite loop
nextBreakIndex = currentWordIndex + 1;
}
lineBreakIndices.push_back(nextBreakIndex);
currentWordIndex = nextBreakIndex;
}
return lineBreakIndices;
}
void ParsedText::applyParagraphIndent() {
if (extraParagraphSpacing || words.empty()) {
return;
}
words.front().insert(0, "\xe2\x80\x83");
}
// Builds break indices while opportunistically splitting the word that would overflow the current line.
std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& renderer, const int fontId,
const int pageWidth, const int spaceWidth,
std::vector<uint16_t>& wordWidths) {
std::vector<size_t> lineBreakIndices;
size_t currentIndex = 0;
while (currentIndex < wordWidths.size()) {
const size_t lineStart = currentIndex;
int lineWidth = 0;
2026-01-07 20:25:34 +05:00
// Consume as many words as possible for current line, splitting when prefixes fit
while (currentIndex < wordWidths.size()) {
const bool isFirstWord = currentIndex == lineStart;
const int spacing = isFirstWord ? 0 : spaceWidth;
const int candidateWidth = spacing + wordWidths[currentIndex];
2026-01-08 03:01:36 +05:00
// Word fits on current line
if (lineWidth + candidateWidth <= pageWidth) {
lineWidth += candidateWidth;
2026-01-08 03:01:36 +05:00
++currentIndex;
continue;
}
2026-01-07 20:25:34 +05:00
// Word would overflow — try to split based on hyphenation points
const int availableWidth = pageWidth - lineWidth - spacing;
2026-01-08 03:01:36 +05:00
const bool allowFallbackBreaks = isFirstWord; // Only for first word on line
if (availableWidth > 0 &&
hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths, allowFallbackBreaks)) {
2026-01-08 03:01:36 +05:00
// Prefix now fits; append it to this line and move to next line
2026-01-07 20:36:06 +05:00
lineWidth += spacing + wordWidths[currentIndex];
2026-01-08 03:01:36 +05:00
++currentIndex;
2026-01-07 20:36:06 +05:00
break;
}
2026-01-07 20:25:34 +05:00
// Could not split: force at least one word per line to avoid infinite loop
if (currentIndex == lineStart) {
lineWidth += candidateWidth;
2026-01-08 03:01:36 +05:00
++currentIndex;
}
break;
}
lineBreakIndices.push_back(currentIndex);
}
return lineBreakIndices;
}
// Splits words[wordIndex] into prefix (adding a hyphen only when needed) and remainder when a legal breakpoint fits the
// available width.
bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availableWidth, const GfxRenderer& renderer,
const int fontId, std::vector<uint16_t>& wordWidths,
const bool allowFallbackBreaks) {
2026-01-07 20:25:34 +05:00
// Guard against invalid indices or zero available width before attempting to split.
if (availableWidth <= 0 || wordIndex >= words.size()) {
return false;
}
2026-01-08 03:01:36 +05:00
// Get iterators to target word and style.
auto wordIt = words.begin();
auto styleIt = wordStyles.begin();
std::advance(wordIt, wordIndex);
std::advance(styleIt, wordIndex);
2026-01-08 03:01:36 +05:00
const std::string& word = *wordIt;
const auto style = *styleIt;
2026-01-07 20:25:34 +05:00
// Collect candidate breakpoints (byte offsets and hyphen requirements).
auto breakInfos = Hyphenator::breakOffsets(word, /*allowFallback=*/false);
if (breakInfos.empty() && allowFallbackBreaks) {
breakInfos = Hyphenator::breakOffsets(word, /*allowFallback=*/true);
}
if (breakInfos.empty()) {
return false;
}
size_t chosenOffset = 0;
int chosenWidth = -1;
bool chosenNeedsHyphen = true;
2026-01-07 20:25:34 +05:00
// Iterate over each legal breakpoint and retain the widest prefix that still fits.
for (const auto& info : breakInfos) {
const size_t offset = info.byteOffset;
2026-01-08 03:01:36 +05:00
if (offset == 0 || offset >= word.size()) {
continue;
}
const bool needsHyphen = info.requiresInsertedHyphen;
2026-01-08 03:01:36 +05:00
const int prefixWidth = measureWordWidth(renderer, fontId, word.substr(0, offset), style, needsHyphen);
if (prefixWidth > availableWidth || prefixWidth <= chosenWidth) {
continue; // Skip if too wide or not an improvement
}
2026-01-08 03:01:36 +05:00
chosenWidth = prefixWidth;
chosenOffset = offset;
chosenNeedsHyphen = needsHyphen;
}
if (chosenWidth < 0) {
2026-01-07 20:25:34 +05:00
// No hyphenation point produced a prefix that fits in the remaining space.
return false;
}
2026-01-07 20:25:34 +05:00
// Split the word at the selected breakpoint and append a hyphen if required.
2026-01-08 03:01:36 +05:00
std::string remainder = word.substr(chosenOffset);
wordIt->resize(chosenOffset);
if (chosenNeedsHyphen) {
wordIt->push_back('-');
}
2026-01-07 20:25:34 +05:00
// Insert the remainder word (with matching style) directly after the prefix.
auto insertWordIt = std::next(wordIt);
auto insertStyleIt = std::next(styleIt);
words.insert(insertWordIt, remainder);
wordStyles.insert(insertStyleIt, style);
2026-01-07 20:25:34 +05:00
// Update cached widths to reflect the new prefix/remainder pairing.
wordWidths[wordIndex] = static_cast<uint16_t>(chosenWidth);
2026-01-07 22:44:26 +05:00
const uint16_t remainderWidth = measureWordWidth(renderer, fontId, remainder, style);
wordWidths.insert(wordWidths.begin() + wordIndex + 1, remainderWidth);
return true;
}
void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spaceWidth,
const std::vector<uint16_t>& wordWidths, const std::vector<size_t>& lineBreakIndices,
const std::function<void(std::shared_ptr<TextBlock>)>& processLine) {
const size_t lineBreak = lineBreakIndices[breakIndex];
const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0;
const size_t lineWordCount = lineBreak - lastBreakAt;
// Calculate total word width for this line
int lineWordWidthSum = 0;
for (size_t i = lastBreakAt; i < lineBreak; i++) {
lineWordWidthSum += wordWidths[i];
}
// Calculate spacing
const int spareSpace = pageWidth - lineWordWidthSum;
int spacing = spaceWidth;
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
spacing = spareSpace / (lineWordCount - 1);
}
// Calculate initial x position
uint16_t xpos = 0;
if (style == TextBlock::RIGHT_ALIGN) {
xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
} else if (style == TextBlock::CENTER_ALIGN) {
xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
}
// Pre-calculate X positions for words
std::list<uint16_t> lineXPos;
for (size_t i = lastBreakAt; i < lineBreak; i++) {
const uint16_t currentWordWidth = wordWidths[i];
lineXPos.push_back(xpos);
xpos += currentWordWidth + spacing;
}
// Iterators always start at the beginning as we are moving content with splice below
auto wordEndIt = words.begin();
auto wordStyleEndIt = wordStyles.begin();
std::advance(wordEndIt, lineWordCount);
std::advance(wordStyleEndIt, lineWordCount);
// *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
std::list<std::string> lineWords;
lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
std::list<EpdFontFamily::Style> lineWordStyles;
lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt);
2026-01-07 22:44:26 +05:00
for (auto& word : lineWords) {
if (containsSoftHyphen(word)) {
stripSoftHyphensInPlace(word);
}
}
processLine(std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style));
}