Refactor ParsedText to remove PrefixWidthCache and simplify word width calculations for improved clarity and performance

This commit is contained in:
Arthur Tazhitdinov 2025-12-26 01:47:40 +05:00
parent 4acdad44ff
commit 73c8b17737
2 changed files with 33 additions and 75 deletions

View File

@ -8,15 +8,12 @@
#include <functional> #include <functional>
#include <iterator> #include <iterator>
#include <limits> #include <limits>
#include <unordered_map>
#include <vector> #include <vector>
constexpr int MAX_COST = std::numeric_limits<int>::max(); constexpr int MAX_COST = std::numeric_limits<int>::max();
namespace { namespace {
using PrefixWidthCache = ParsedText::PrefixWidthCache;
struct HyphenSplitDecision { struct HyphenSplitDecision {
size_t byteOffset; size_t byteOffset;
uint16_t prefixWidth; uint16_t prefixWidth;
@ -27,24 +24,9 @@ struct HyphenationGuard {
size_t tailIndex; size_t tailIndex;
}; };
uint16_t cachedPrefixWidth(PrefixWidthCache& cache, const GfxRenderer& renderer, const int fontId,
const std::string& word, const EpdFontStyle style, const size_t prefixBytes) {
const void* wordKey = static_cast<const void*>(&word);
auto& offsetMap = cache[wordKey];
const auto it = offsetMap.find(prefixBytes);
if (it != offsetMap.end()) {
return it->second;
}
const std::string prefix = word.substr(0, prefixBytes);
const uint16_t width = renderer.getTextWidth(fontId, prefix.c_str(), style);
offsetMap.emplace(prefixBytes, width);
return width;
}
bool chooseSplitForWidth(const GfxRenderer& renderer, const int fontId, const std::string& word, bool chooseSplitForWidth(const GfxRenderer& renderer, const int fontId, const std::string& word,
const EpdFontStyle style, const int availableWidth, const bool includeFallback, const EpdFontStyle style, const int availableWidth, const bool includeFallback,
PrefixWidthCache& cache, HyphenSplitDecision* decision) { HyphenSplitDecision* decision) {
if (!decision || availableWidth <= 0) { if (!decision || availableWidth <= 0) {
return false; return false;
} }
@ -64,7 +46,8 @@ bool chooseSplitForWidth(const GfxRenderer& renderer, const int fontId, const st
uint16_t chosenWidth = 0; uint16_t chosenWidth = 0;
for (const size_t offset : offsets) { for (const size_t offset : offsets) {
const int prefixWidth = cachedPrefixWidth(cache, renderer, fontId, word, style, offset); const std::string prefix = word.substr(0, offset);
const int prefixWidth = renderer.getTextWidth(fontId, prefix.c_str(), style);
if (prefixWidth <= adjustedWidth) { if (prefixWidth <= adjustedWidth) {
chosenOffset = offset; chosenOffset = offset;
chosenWidth = static_cast<uint16_t>(prefixWidth + hyphenWidth); chosenWidth = static_cast<uint16_t>(prefixWidth + hyphenWidth);
@ -101,9 +84,9 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
const int pageWidth = renderer.getScreenWidth() - horizontalMargin; const int pageWidth = renderer.getScreenWidth() - horizontalMargin;
const int spaceWidth = renderer.getSpaceWidth(fontId); const int spaceWidth = renderer.getSpaceWidth(fontId);
PrefixWidthCache prefixWidthCache; // Pre-split oversized tokens so the DP step always has feasible line candidates.
auto wordWidths = calculateWordWidths(renderer, fontId, pageWidth, prefixWidthCache); auto wordWidths = calculateWordWidths(renderer, fontId, pageWidth);
auto lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, prefixWidthCache); auto lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths);
const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1; const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;
for (size_t i = 0; i < lineCount; ++i) { for (size_t i = 0; i < lineCount; ++i) {
@ -112,7 +95,7 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
} }
std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId, std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId,
const int pageWidth, PrefixWidthCache& cache) { const int pageWidth) {
const size_t totalWordCount = words.size(); const size_t totalWordCount = words.size();
std::vector<uint16_t> wordWidths; std::vector<uint16_t> wordWidths;
@ -131,9 +114,8 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
uint16_t width = renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt); uint16_t width = renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt);
if (width > pageWidth) { if (width > pageWidth) {
// Pre-split oversized tokens so the DP step always has feasible line candidates.
HyphenSplitDecision decision; HyphenSplitDecision decision;
if (chooseSplitForWidth(renderer, fontId, *wordsIt, *wordStylesIt, pageWidth, true, cache, &decision)) { if (chooseSplitForWidth(renderer, fontId, *wordsIt, *wordStylesIt, pageWidth, true, &decision)) {
const std::string originalWord = *wordsIt; const std::string originalWord = *wordsIt;
const std::string tail = originalWord.substr(decision.byteOffset); const std::string tail = originalWord.substr(decision.byteOffset);
if (tail.empty()) { if (tail.empty()) {
@ -164,14 +146,12 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
} }
std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth, std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth,
const int spaceWidth, std::vector<uint16_t>& wordWidths, const int spaceWidth, std::vector<uint16_t>& wordWidths) {
PrefixWidthCache& cache) {
if (words.empty()) { if (words.empty()) {
return {}; return {};
} }
std::vector<HyphenationGuard> guards; std::vector<HyphenationGuard> guards;
std::vector<int> lineWordWidthSums;
auto shiftGuardIndices = [&](size_t insertPos) { auto shiftGuardIndices = [&](size_t insertPos) {
for (auto& guard : guards) { for (auto& guard : guards) {
@ -187,17 +167,8 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
auto runDp = [&](std::vector<size_t>& lineBreaks) { auto runDp = [&](std::vector<size_t>& lineBreaks) {
const size_t totalWordCount = wordWidths.size(); const size_t totalWordCount = wordWidths.size();
// DP table to store the minimum badness (cost) of lines starting at index i
std::vector<int> dp(totalWordCount); std::vector<int> dp(totalWordCount);
// 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i'
std::vector<size_t> ans(totalWordCount); std::vector<size_t> ans(totalWordCount);
lineWordWidthSums.assign(totalWordCount, 0);
if (!wordWidths.empty()) {
lineWordWidthSums.back() = wordWidths.back();
}
// Base Case
dp[totalWordCount - 1] = 0; dp[totalWordCount - 1] = 0;
ans[totalWordCount - 1] = totalWordCount - 1; ans[totalWordCount - 1] = totalWordCount - 1;
@ -207,7 +178,6 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
dp[i] = MAX_COST; dp[i] = MAX_COST;
for (size_t j = i; j < totalWordCount; ++j) { for (size_t j = i; j < totalWordCount; ++j) {
// Current line length: previous width + space + current word width
currlen += wordWidths[j] + spaceWidth; currlen += wordWidths[j] + spaceWidth;
if (currlen > pageWidth) { if (currlen > pageWidth) {
@ -230,17 +200,13 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
cost = 0; cost = 0;
} else { } else {
const int remainingSpace = pageWidth - currlen; const int remainingSpace = pageWidth - currlen;
// Use long long for the square to prevent overflow
const long long cost_ll = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1]; const long long cost_ll = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1];
cost = cost_ll > MAX_COST ? MAX_COST : static_cast<int>(cost_ll); cost = cost_ll > MAX_COST ? MAX_COST : static_cast<int>(cost_ll);
} }
if (cost < dp[i]) { if (cost < dp[i]) {
dp[i] = cost; dp[i] = cost;
ans[i] = j; // j is the index of the last word in this optimal line ans[i] = j;
const size_t wordsInLine = j - i + 1;
const int spacesWidth = wordsInLine > 1 ? static_cast<int>(wordsInLine - 1) * spaceWidth : 0;
lineWordWidthSums[i] = currlen - spacesWidth;
} }
} }
} }
@ -273,7 +239,10 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
const bool isLastLine = lineIdx == lineBreakIndices.size() - 1; const bool isLastLine = lineIdx == lineBreakIndices.size() - 1;
const size_t lineWordCount = lineBreak - lastBreakAt; const size_t lineWordCount = lineBreak - lastBreakAt;
const int lineWordWidthSum = (lastBreakAt < lineWordWidthSums.size()) ? lineWordWidthSums[lastBreakAt] : 0; int lineWordWidthSum = 0;
for (size_t idx = lastBreakAt; idx < lineBreak; ++idx) {
lineWordWidthSum += wordWidths[idx];
}
lastBreakAt = lineBreak; lastBreakAt = lineBreak;
if (isLastLine || lineBreak >= wordWidths.size()) { if (isLastLine || lineBreak >= wordWidths.size()) {
@ -299,7 +268,7 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
} }
HyphenSplitDecision decision; HyphenSplitDecision decision;
if (!chooseSplitForWidth(renderer, fontId, *nextWordIt, *nextStyleIt, budgetForPrefix, false, cache, &decision)) { if (!chooseSplitForWidth(renderer, fontId, *nextWordIt, *nextStyleIt, budgetForPrefix, false, &decision)) {
continue; continue;
} }
@ -338,23 +307,19 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0; const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0;
const size_t lineWordCount = lineBreak - lastBreakAt; const size_t lineWordCount = lineBreak - lastBreakAt;
// Calculate total word width for this line
int lineWordWidthSum = 0; int lineWordWidthSum = 0;
for (size_t idx = lastBreakAt; idx < lineBreak; ++idx) { for (size_t idx = lastBreakAt; idx < lineBreak; ++idx) {
lineWordWidthSum += wordWidths[idx]; lineWordWidthSum += wordWidths[idx];
} }
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
// Calculate spacing
const int spareSpace = pageWidth - lineWordWidthSum; const int spareSpace = pageWidth - lineWordWidthSum;
int spacing = spaceWidth; int spacing = spaceWidth;
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) { if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
spacing = spareSpace / (lineWordCount - 1); spacing = spareSpace / (lineWordCount - 1);
} }
// Calculate initial x position
uint16_t xpos = 0; uint16_t xpos = 0;
if (style == TextBlock::RIGHT_ALIGN) { if (style == TextBlock::RIGHT_ALIGN) {
xpos = spareSpace - (lineWordCount - 1) * spaceWidth; xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
@ -362,7 +327,6 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2; xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
} }
// Pre-calculate X positions for words
std::list<uint16_t> lineXPos; std::list<uint16_t> lineXPos;
for (size_t i = lastBreakAt; i < lineBreak; i++) { for (size_t i = lastBreakAt; i < lineBreak; i++) {
const uint16_t currentWordWidth = wordWidths[i]; const uint16_t currentWordWidth = wordWidths[i];
@ -370,13 +334,11 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
xpos += currentWordWidth + spacing; xpos += currentWordWidth + spacing;
} }
// Iterators always start at the beginning as we are moving content with splice below
auto wordEndIt = words.begin(); auto wordEndIt = words.begin();
auto wordStyleEndIt = wordStyles.begin(); auto wordStyleEndIt = wordStyles.begin();
std::advance(wordEndIt, lineWordCount); std::advance(wordEndIt, lineWordCount);
std::advance(wordStyleEndIt, lineWordCount); std::advance(wordStyleEndIt, lineWordCount);
// *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
std::list<std::string> lineWords; std::list<std::string> lineWords;
lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt); lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
std::list<EpdFontStyle> lineWordStyles; std::list<EpdFontStyle> lineWordStyles;

View File

@ -6,7 +6,6 @@
#include <list> #include <list>
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "blocks/TextBlock.h" #include "blocks/TextBlock.h"
@ -14,23 +13,6 @@
class GfxRenderer; class GfxRenderer;
class ParsedText { class ParsedText {
public:
using PrefixWidthCache = std::unordered_map<const void*, std::unordered_map<size_t, uint16_t>>;
explicit ParsedText(const TextBlock::BLOCK_STYLE style, bool extraParagraphSpacing, bool hyphenationEnabled)
: style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {}
~ParsedText() = default;
void addWord(std::string word, EpdFontStyle fontStyle);
void setStyle(TextBlock::BLOCK_STYLE style) { this->style = style; }
TextBlock::BLOCK_STYLE getStyle() const { return style; }
size_t size() const { return words.size(); }
bool isEmpty() const { return words.empty(); }
void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, int horizontalMargin,
const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
bool includeLastLine = true);
private:
std::list<std::string> words; std::list<std::string> words;
std::list<EpdFontStyle> wordStyles; std::list<EpdFontStyle> wordStyles;
TextBlock::BLOCK_STYLE style; TextBlock::BLOCK_STYLE style;
@ -38,10 +20,24 @@ class ParsedText {
bool hyphenationEnabled; bool hyphenationEnabled;
std::vector<size_t> computeLineBreaks(const GfxRenderer& renderer, int fontId, int pageWidth, int spaceWidth, std::vector<size_t> computeLineBreaks(const GfxRenderer& renderer, int fontId, int pageWidth, int spaceWidth,
std::vector<uint16_t>& wordWidths, PrefixWidthCache& cache); std::vector<uint16_t>& wordWidths);
void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths, void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths,
const std::vector<size_t>& lineBreakIndices, const std::vector<size_t>& lineBreakIndices,
const std::function<void(std::shared_ptr<TextBlock>)>& processLine); const std::function<void(std::shared_ptr<TextBlock>)>& processLine);
std::vector<uint16_t> calculateWordWidths(const GfxRenderer& renderer, int fontId, int pageWidth, std::vector<uint16_t> calculateWordWidths(const GfxRenderer& renderer, int fontId, int pageWidth);
PrefixWidthCache& cache);
public:
explicit ParsedText(const TextBlock::BLOCK_STYLE style, const bool extraParagraphSpacing,
const bool hyphenationEnabled)
: style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {}
~ParsedText() = default;
void addWord(std::string word, EpdFontStyle fontStyle);
void setStyle(const TextBlock::BLOCK_STYLE style) { this->style = style; }
TextBlock::BLOCK_STYLE getStyle() const { return style; }
size_t size() const { return words.size(); }
bool isEmpty() const { return words.empty(); }
void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, int horizontalMargin,
const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
bool includeLastLine = true);
}; };