Files
crosspoint-reader-mod/lib/Epub/Epub/ParsedText.cpp
cottongin 4cf395aee9 port: upstream PR #1342 - Book Info screen, richer metadata, safer controls
Ports upstream PR #1342 (feat: Add Book Info screen, richer metadata,
and safer file-browser controls) with mod-specific adaptations:

- Parse and cache series, seriesIndex, description from EPUB OPF
- Bump book.bin cache version to 6 for new metadata fields
- Add BookInfoActivity (new screen) accessible via Right button in FileBrowser
- Add ManageBook menu via Left button in FileBrowser (replaces upstream hidden delete)
- Guard all delete/archive actions with ConfirmationActivity (10 call sites)
- Add inputArmed gating to ConfirmationActivity to prevent accidental confirmation
- Safe deserialization: readString now returns bool with MAX_STRING_LENGTH guard
- Add series field to RecentBooksStore with JSON and binary serialization
- Add i18n keys: STR_BOOK_INFO, STR_AUTHOR, STR_SERIES, STR_FILE_SIZE, etc.

Made-with: Cursor
2026-03-09 00:39:32 -04:00

624 lines
26 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "ParsedText.h"
#include <GfxRenderer.h>
#include <Utf8.h>
#include <algorithm>
#include <cmath>
#include <cstring>
#include <functional>
#include <limits>
#include <vector>
#include "hyphenation/Hyphenator.h"
constexpr int MAX_COST = std::numeric_limits<int>::max();
namespace {
// Soft hyphen byte pattern used throughout EPUBs (UTF-8 for U+00AD).
constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD";
constexpr size_t SOFT_HYPHEN_BYTES = 2;
// Returns the first rendered codepoint of a word (skipping leading soft hyphens).
uint32_t firstCodepoint(const std::string& word) {
const auto* ptr = reinterpret_cast<const unsigned char*>(word.c_str());
while (true) {
const uint32_t cp = utf8NextCodepoint(&ptr);
if (cp == 0) return 0;
if (cp != 0x00AD) return cp; // skip soft hyphens
}
}
// Returns the last codepoint of a word by scanning backward for the start of the last UTF-8 sequence.
uint32_t lastCodepoint(const std::string& word) {
if (word.empty()) return 0;
// UTF-8 continuation bytes start with 10xxxxxx; scan backward to find the leading byte.
size_t i = word.size() - 1;
while (i > 0 && (static_cast<uint8_t>(word[i]) & 0xC0) == 0x80) {
--i;
}
const auto* ptr = reinterpret_cast<const unsigned char*>(word.c_str() + i);
return utf8NextCodepoint(&ptr);
}
bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; }
// Removes every soft hyphen in-place so rendered glyphs match measured widths.
void stripSoftHyphensInPlace(std::string& word) {
size_t pos = 0;
while ((pos = word.find(SOFT_HYPHEN_UTF8, pos)) != std::string::npos) {
word.erase(pos, SOFT_HYPHEN_BYTES);
}
}
// Returns the advance width for a word while ignoring soft hyphen glyphs and optionally appending a visible hyphen.
// Uses advance width (sum of glyph advances + kerning) rather than bounding box width so that italic glyph overhangs
// don't inflate inter-word spacing.
uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const std::string& word,
const EpdFontFamily::Style style, const bool appendHyphen = false) {
if (word.size() == 1 && word[0] == ' ' && !appendHyphen) {
return renderer.getSpaceWidth(fontId, style);
}
const bool hasSoftHyphen = containsSoftHyphen(word);
if (!hasSoftHyphen && !appendHyphen) {
return renderer.getTextAdvanceX(fontId, word.c_str(), style);
}
std::string sanitized = word;
if (hasSoftHyphen) {
stripSoftHyphensInPlace(sanitized);
}
if (appendHyphen) {
sanitized.push_back('-');
}
return renderer.getTextAdvanceX(fontId, sanitized.c_str(), style);
}
// ---------------------------------------------------------------------------
// Direct-mapped word-width cache
//
// Avoids redundant getTextAdvanceX calls when the same (word, style, fontId)
// triple appears across paragraphs. A fixed-size static array is used so
// that heap allocation and fragmentation are both zero.
//
// Eviction policy: hash-direct mapping — a word always occupies the single
// slot determined by its hash; a collision simply overwrites that slot.
// This gives O(1) lookup (one hash + one memcmp) regardless of how full the
// cache is, avoiding the O(n) linear-scan overhead that causes a regression
// on corpora with many unique words (e.g. German compound-heavy text).
//
// Words longer than 23 bytes bypass the cache entirely — they are uncommon,
// unlikely to repeat verbatim, and exceed the fixed-width key buffer.
// ---------------------------------------------------------------------------
struct WordWidthCacheEntry {
char word[24]; // NUL-terminated; 23 usable bytes + terminator
int fontId;
uint16_t width;
uint8_t style; // EpdFontFamily::Style narrowed to one byte
bool valid; // false = slot empty (BSS-initialised to 0)
};
// Power-of-two size → slot selection via fast bitmask AND.
// 128 entries × 32 bytes = 4 KB in BSS; covers typical paragraph vocabulary
// with a low collision rate even for German compound-heavy prose.
static constexpr uint32_t WORD_WIDTH_CACHE_SIZE = 128;
static constexpr uint32_t WORD_WIDTH_CACHE_MASK = WORD_WIDTH_CACHE_SIZE - 1;
static WordWidthCacheEntry s_wordWidthCache[WORD_WIDTH_CACHE_SIZE];
// FNV-1a over the word bytes, then XOR-folded with fontId and style.
static uint32_t wordWidthCacheHash(const char* str, const size_t len, const int fontId, const uint8_t style) {
uint32_t h = 2166136261u; // FNV offset basis
for (size_t i = 0; i < len; ++i) {
h ^= static_cast<uint8_t>(str[i]);
h *= 16777619u; // FNV prime
}
h ^= static_cast<uint32_t>(fontId);
h *= 16777619u;
h ^= style;
return h;
}
// Returns the cached width for (word, style, fontId), measuring and caching
// on a miss. Appending a hyphen is not supported — those measurements are
// word-fragment lookups that will not repeat and must not pollute the cache.
static uint16_t cachedMeasureWordWidth(const GfxRenderer& renderer, const int fontId, const std::string& word,
const EpdFontFamily::Style style) {
const size_t len = word.size();
if (len >= 24) {
return measureWordWidth(renderer, fontId, word, style);
}
const uint8_t styleByte = static_cast<uint8_t>(style);
const char* const wordCStr = word.c_str();
const uint32_t slot = wordWidthCacheHash(wordCStr, len, fontId, styleByte) & WORD_WIDTH_CACHE_MASK;
auto& e = s_wordWidthCache[slot];
if (e.valid && e.fontId == fontId && e.style == styleByte && memcmp(e.word, wordCStr, len + 1) == 0) {
return e.width; // O(1) cache hit
}
const uint16_t w = measureWordWidth(renderer, fontId, word, style);
memcpy(e.word, wordCStr, len + 1);
e.fontId = fontId;
e.width = w;
e.style = styleByte;
e.valid = true;
return w;
}
} // namespace
void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline,
const bool attachToPrevious) {
if (word.empty()) return;
words.push_back(std::move(word));
EpdFontFamily::Style combinedStyle = fontStyle;
if (underline) {
combinedStyle = static_cast<EpdFontFamily::Style>(combinedStyle | EpdFontFamily::UNDERLINE);
}
wordStyles.push_back(combinedStyle);
wordContinues.push_back(attachToPrevious);
}
// Consumes data to minimize memory usage
void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fontId, const uint16_t viewportWidth,
const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
const bool includeLastLine) {
if (words.empty()) {
return;
}
// Apply fixed transforms before any per-line layout work.
applyParagraphIndent();
const int pageWidth = viewportWidth;
auto wordWidths = calculateWordWidths(renderer, fontId);
std::vector<size_t> lineBreakIndices;
if (hyphenationEnabled) {
lineBreakIndices = computeHyphenatedLineBreaks(renderer, fontId, pageWidth, wordWidths, wordContinues);
} else {
lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, wordWidths, wordContinues);
}
const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;
for (size_t i = 0; i < lineCount; ++i) {
extractLine(i, pageWidth, wordWidths, wordContinues, lineBreakIndices, processLine, renderer, fontId);
}
// Remove consumed words so size() reflects only remaining words
if (lineCount > 0) {
const size_t consumed = lineBreakIndices[lineCount - 1];
words.erase(words.begin(), words.begin() + consumed);
wordStyles.erase(wordStyles.begin(), wordStyles.begin() + consumed);
wordContinues.erase(wordContinues.begin(), wordContinues.begin() + consumed);
}
}
std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId) {
std::vector<uint16_t> wordWidths;
wordWidths.reserve(words.size());
for (size_t i = 0; i < words.size(); ++i) {
wordWidths.push_back(cachedMeasureWordWidth(renderer, fontId, words[i], wordStyles[i]));
}
return wordWidths;
}
std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth,
std::vector<uint16_t>& wordWidths, std::vector<bool>& continuesVec) {
if (words.empty()) {
return {};
}
// Calculate first line indent (only for left/justified text).
// Positive text-indent (paragraph indent) is suppressed when extraParagraphSpacing is on.
// Negative text-indent (hanging indent, e.g. margin-left:3em; text-indent:-1em) always applies —
// it is structural (positions the bullet/marker), not decorative.
const int firstLineIndent =
blockStyle.textIndentDefined && (blockStyle.textIndent < 0 || !extraParagraphSpacing) &&
(blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left)
? blockStyle.textIndent
: 0;
// Ensure any word that would overflow even as the first entry on a line is split using fallback hyphenation.
for (size_t i = 0; i < wordWidths.size(); ++i) {
// First word needs to fit in reduced width if there's an indent
const int effectiveWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth;
while (wordWidths[i] > effectiveWidth) {
if (!hyphenateWordAtIndex(i, effectiveWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) {
break;
}
}
}
const size_t totalWordCount = words.size();
// DP table to store the minimum badness (cost) of lines starting at index i
std::vector<int> dp(totalWordCount);
// 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i'
std::vector<size_t> ans(totalWordCount);
// Base Case
dp[totalWordCount - 1] = 0;
ans[totalWordCount - 1] = totalWordCount - 1;
for (int i = totalWordCount - 2; i >= 0; --i) {
int currlen = 0;
dp[i] = MAX_COST;
// First line has reduced width due to text-indent
const int effectivePageWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth;
for (size_t j = i; j < totalWordCount; ++j) {
// Add space before word j, unless it's the first word on the line or a continuation
int gap = 0;
if (j > static_cast<size_t>(i) && !continuesVec[j]) {
gap =
renderer.getSpaceAdvance(fontId, lastCodepoint(words[j - 1]), firstCodepoint(words[j]), wordStyles[j - 1]);
} else if (j > static_cast<size_t>(i) && continuesVec[j]) {
// Cross-boundary kerning for continuation words (e.g. nonbreaking spaces, attached punctuation)
gap = renderer.getKerning(fontId, lastCodepoint(words[j - 1]), firstCodepoint(words[j]), wordStyles[j - 1]);
}
currlen += wordWidths[j] + gap;
if (currlen > effectivePageWidth) {
break;
}
// Cannot break after word j if the next word attaches to it (continuation group)
if (j + 1 < totalWordCount && continuesVec[j + 1]) {
continue;
}
int cost;
if (j == totalWordCount - 1) {
cost = 0; // Last line
} else {
const int remainingSpace = effectivePageWidth - currlen;
// Use long long for the square to prevent overflow
const long long cost_ll = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1];
if (cost_ll > MAX_COST) {
cost = MAX_COST;
} else {
cost = static_cast<int>(cost_ll);
}
}
if (cost < dp[i]) {
dp[i] = cost;
ans[i] = j; // j is the index of the last word in this optimal line
}
}
// Handle oversized word: if no valid configuration found, force single-word line
// This prevents cascade failure where one oversized word breaks all preceding words
if (dp[i] == MAX_COST) {
ans[i] = i; // Just this word on its own line
// Inherit cost from next word to allow subsequent words to find valid configurations
if (i + 1 < static_cast<int>(totalWordCount)) {
dp[i] = dp[i + 1];
} else {
dp[i] = 0;
}
}
}
// Stores the index of the word that starts the next line (last_word_index + 1)
std::vector<size_t> lineBreakIndices;
lineBreakIndices.reserve(totalWordCount / 8 + 1);
size_t currentWordIndex = 0;
while (currentWordIndex < totalWordCount) {
size_t nextBreakIndex = ans[currentWordIndex] + 1;
// Safety check: prevent infinite loop if nextBreakIndex doesn't advance
if (nextBreakIndex <= currentWordIndex) {
// Force advance by at least one word to avoid infinite loop
nextBreakIndex = currentWordIndex + 1;
}
lineBreakIndices.push_back(nextBreakIndex);
currentWordIndex = nextBreakIndex;
}
return lineBreakIndices;
}
void ParsedText::applyParagraphIndent() {
if (extraParagraphSpacing || words.empty()) {
return;
}
if (blockStyle.textIndentDefined) {
// CSS text-indent is explicitly set (even if 0) - don't use fallback EmSpace
// The actual indent positioning is handled in extractLine()
} else if (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) {
// No CSS text-indent defined - use EmSpace fallback for visual indent
words.front().insert(0, "\xe2\x80\x83");
}
}
// Builds break indices while opportunistically splitting the word that would overflow the current line.
std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& renderer, const int fontId,
const int pageWidth, std::vector<uint16_t>& wordWidths,
std::vector<bool>& continuesVec) {
// Calculate first line indent (only for left/justified text).
// Positive text-indent (paragraph indent) is suppressed when extraParagraphSpacing is on.
// Negative text-indent (hanging indent, e.g. margin-left:3em; text-indent:-1em) always applies —
// it is structural (positions the bullet/marker), not decorative.
const int firstLineIndent =
blockStyle.textIndentDefined && (blockStyle.textIndent < 0 || !extraParagraphSpacing) &&
(blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left)
? blockStyle.textIndent
: 0;
std::vector<size_t> lineBreakIndices;
size_t currentIndex = 0;
bool isFirstLine = true;
while (currentIndex < wordWidths.size()) {
const size_t lineStart = currentIndex;
int lineWidth = 0;
// First line has reduced width due to text-indent
const int effectivePageWidth = isFirstLine ? pageWidth - firstLineIndent : pageWidth;
// Consume as many words as possible for current line, splitting when prefixes fit
while (currentIndex < wordWidths.size()) {
const bool isFirstWord = currentIndex == lineStart;
int spacing = 0;
if (!isFirstWord && !continuesVec[currentIndex]) {
spacing = renderer.getSpaceAdvance(fontId, lastCodepoint(words[currentIndex - 1]),
firstCodepoint(words[currentIndex]), wordStyles[currentIndex - 1]);
} else if (!isFirstWord && continuesVec[currentIndex]) {
// Cross-boundary kerning for continuation words (e.g. nonbreaking spaces, attached punctuation)
spacing = renderer.getKerning(fontId, lastCodepoint(words[currentIndex - 1]),
firstCodepoint(words[currentIndex]), wordStyles[currentIndex - 1]);
}
const int candidateWidth = spacing + wordWidths[currentIndex];
// Word fits on current line
if (lineWidth + candidateWidth <= effectivePageWidth) {
lineWidth += candidateWidth;
++currentIndex;
continue;
}
// Word would overflow — try to split based on hyphenation points
const int availableWidth = effectivePageWidth - lineWidth - spacing;
const bool allowFallbackBreaks = isFirstWord; // Only for first word on line
if (availableWidth > 0 &&
hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths, allowFallbackBreaks)) {
// Prefix now fits; append it to this line and move to next line
lineWidth += spacing + wordWidths[currentIndex];
++currentIndex;
break;
}
// Could not split: force at least one word per line to avoid infinite loop
if (currentIndex == lineStart) {
lineWidth += candidateWidth;
++currentIndex;
}
break;
}
// Don't break before a continuation word (e.g., orphaned "?" after "question").
// Backtrack to the start of the continuation group so the whole group moves to the next line.
while (currentIndex > lineStart + 1 && currentIndex < wordWidths.size() && continuesVec[currentIndex]) {
--currentIndex;
}
lineBreakIndices.push_back(currentIndex);
isFirstLine = false;
}
return lineBreakIndices;
}
// Splits words[wordIndex] into prefix (adding a hyphen only when needed) and remainder when a legal breakpoint fits the
// available width.
bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availableWidth, const GfxRenderer& renderer,
const int fontId, std::vector<uint16_t>& wordWidths,
const bool allowFallbackBreaks) {
// Guard against invalid indices or zero available width before attempting to split.
if (availableWidth <= 0 || wordIndex >= words.size()) {
return false;
}
const std::string& word = words[wordIndex];
const auto style = wordStyles[wordIndex];
// Collect candidate breakpoints (byte offsets and hyphen requirements).
auto breakInfos = Hyphenator::breakOffsets(word, allowFallbackBreaks);
if (breakInfos.empty()) {
return false;
}
size_t chosenOffset = 0;
int chosenWidth = -1;
bool chosenNeedsHyphen = true;
std::string prefix;
prefix.reserve(word.size());
// Iterate over each legal breakpoint and retain the widest prefix that still fits.
// Breakpoints are in ascending order, so once a prefix is too wide, all subsequent ones will be too.
for (const auto& info : breakInfos) {
const size_t offset = info.byteOffset;
if (offset == 0 || offset >= word.size()) {
continue;
}
const bool needsHyphen = info.requiresInsertedHyphen;
prefix.assign(word, 0, offset);
const int prefixWidth = measureWordWidth(renderer, fontId, prefix, style, needsHyphen);
if (prefixWidth > availableWidth) {
break; // Ascending order: all subsequent breakpoints yield wider prefixes
}
if (prefixWidth <= chosenWidth) {
continue; // Not an improvement
}
chosenWidth = prefixWidth;
chosenOffset = offset;
chosenNeedsHyphen = needsHyphen;
}
if (chosenWidth < 0) {
// No hyphenation point produced a prefix that fits in the remaining space.
return false;
}
// Split the word at the selected breakpoint and append a hyphen if required.
std::string remainder = word.substr(chosenOffset);
words[wordIndex].resize(chosenOffset);
if (chosenNeedsHyphen) {
words[wordIndex].push_back('-');
}
// Insert the remainder word (with matching style and continuation flag) directly after the prefix.
words.insert(words.begin() + wordIndex + 1, remainder);
wordStyles.insert(wordStyles.begin() + wordIndex + 1, style);
// Continuation flag handling after splitting a word into prefix + remainder.
//
// The prefix keeps the original word's continuation flag so that no-break-space groups
// stay linked. The remainder always gets continues=false because it starts on the next
// line and is not attached to the prefix.
//
// Example: "200&#xA0;Quadratkilometer" produces tokens:
// [0] "200" continues=false
// [1] " " continues=true
// [2] "Quadratkilometer" continues=true <-- the word being split
//
// After splitting "Quadratkilometer" at "Quadrat-" / "kilometer":
// [0] "200" continues=false
// [1] " " continues=true
// [2] "Quadrat-" continues=true (KEPT — still attached to the no-break group)
// [3] "kilometer" continues=false (NEW — starts fresh on the next line)
//
// This lets the backtracking loop keep the entire prefix group ("200 Quadrat-") on one
// line, while "kilometer" moves to the next line.
// wordContinues[wordIndex] is intentionally left unchanged — the prefix keeps its original attachment.
wordContinues.insert(wordContinues.begin() + wordIndex + 1, false);
// Update cached widths to reflect the new prefix/remainder pairing.
wordWidths[wordIndex] = static_cast<uint16_t>(chosenWidth);
const uint16_t remainderWidth = measureWordWidth(renderer, fontId, remainder, style);
wordWidths.insert(wordWidths.begin() + wordIndex + 1, remainderWidth);
return true;
}
void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const std::vector<uint16_t>& wordWidths,
const std::vector<bool>& continuesVec, const std::vector<size_t>& lineBreakIndices,
const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
const GfxRenderer& renderer, const int fontId) {
const size_t lineBreak = lineBreakIndices[breakIndex];
const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0;
const size_t lineWordCount = lineBreak - lastBreakAt;
// Calculate first line indent (only for left/justified text).
// Positive text-indent (paragraph indent) is suppressed when extraParagraphSpacing is on.
// Negative text-indent (hanging indent, e.g. margin-left:3em; text-indent:-1em) always applies —
// it is structural (positions the bullet/marker), not decorative.
const bool isFirstLine = breakIndex == 0;
const int firstLineIndent =
isFirstLine && blockStyle.textIndentDefined && (blockStyle.textIndent < 0 || !extraParagraphSpacing) &&
(blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left)
? blockStyle.textIndent
: 0;
// Calculate total word width for this line, count actual word gaps,
// and accumulate total natural gap widths (including space kerning adjustments).
int lineWordWidthSum = 0;
size_t actualGapCount = 0;
int totalNaturalGaps = 0;
for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
lineWordWidthSum += wordWidths[lastBreakAt + wordIdx];
// Count gaps: each word after the first creates a gap, unless it's a continuation
if (wordIdx > 0 && !continuesVec[lastBreakAt + wordIdx]) {
actualGapCount++;
int naturalGap =
renderer.getSpaceAdvance(fontId, lastCodepoint(words[lastBreakAt + wordIdx - 1]),
firstCodepoint(words[lastBreakAt + wordIdx]), wordStyles[lastBreakAt + wordIdx - 1]);
totalNaturalGaps += naturalGap;
} else if (wordIdx > 0 && continuesVec[lastBreakAt + wordIdx]) {
// Cross-boundary kerning for continuation words (e.g. nonbreaking spaces, attached punctuation)
totalNaturalGaps +=
renderer.getKerning(fontId, lastCodepoint(words[lastBreakAt + wordIdx - 1]),
firstCodepoint(words[lastBreakAt + wordIdx]), wordStyles[lastBreakAt + wordIdx - 1]);
}
}
// Calculate spacing (account for indent reducing effective page width on first line)
const int effectivePageWidth = pageWidth - firstLineIndent;
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
// For justified text, compute per-gap extra to distribute remaining space evenly
const int spareSpace = effectivePageWidth - lineWordWidthSum - totalNaturalGaps;
const int justifyExtra = (blockStyle.alignment == CssTextAlign::Justify && !isLastLine && actualGapCount >= 1)
? spareSpace / static_cast<int>(actualGapCount)
: 0;
// Calculate initial x position (first line starts at indent for left/justified text;
// may be negative for hanging indents, e.g. margin-left:3em; text-indent:-1em).
auto xpos = static_cast<int16_t>(firstLineIndent);
if (blockStyle.alignment == CssTextAlign::Right) {
xpos = effectivePageWidth - lineWordWidthSum - totalNaturalGaps;
} else if (blockStyle.alignment == CssTextAlign::Center) {
xpos = (effectivePageWidth - lineWordWidthSum - totalNaturalGaps) / 2;
}
// Pre-calculate X positions for words
// Continuation words attach to the previous word with no space before them
std::vector<int16_t> lineXPos;
lineXPos.reserve(lineWordCount);
for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
lineXPos.push_back(xpos);
const bool nextIsContinuation = wordIdx + 1 < lineWordCount && continuesVec[lastBreakAt + wordIdx + 1];
if (nextIsContinuation) {
int advance = wordWidths[lastBreakAt + wordIdx];
// Cross-boundary kerning for continuation words (e.g. nonbreaking spaces, attached punctuation)
advance +=
renderer.getKerning(fontId, lastCodepoint(words[lastBreakAt + wordIdx]),
firstCodepoint(words[lastBreakAt + wordIdx + 1]), wordStyles[lastBreakAt + wordIdx]);
xpos += advance;
} else {
int gap = wordIdx + 1 < lineWordCount
? renderer.getSpaceAdvance(fontId, lastCodepoint(words[lastBreakAt + wordIdx]),
firstCodepoint(words[lastBreakAt + wordIdx + 1]),
wordStyles[lastBreakAt + wordIdx])
: renderer.getSpaceWidth(fontId, wordStyles[lastBreakAt + wordIdx]);
if (blockStyle.alignment == CssTextAlign::Justify && !isLastLine) {
gap += justifyExtra;
}
xpos += wordWidths[lastBreakAt + wordIdx] + gap;
}
}
// Build line data by moving from the original vectors using index range
std::vector<std::string> lineWords(std::make_move_iterator(words.begin() + lastBreakAt),
std::make_move_iterator(words.begin() + lineBreak));
std::vector<EpdFontFamily::Style> lineWordStyles(wordStyles.begin() + lastBreakAt, wordStyles.begin() + lineBreak);
for (auto& word : lineWords) {
if (containsSoftHyphen(word)) {
stripSoftHyphensInPlace(word);
}
}
processLine(
std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), blockStyle));
}