perf: Replace std::list with std::vector in text layout (#1038)
## Summary _Revision to @blindbat's #802. Description comes from the original PR._ - Replace `std::list` with `std::vector` for word storage in `TextBlock` and `ParsedText` - Use index-based access (`words[i]`) instead of iterator advancement (`std::advance(it, n)`) - Remove the separate `continuesVec` copy that was built from `wordContinues` for O(1) access — now unnecessary since `std::vector<bool>` already provides O(1) indexing ## Why `std::list` allocates each node individually on the heap with 16 bytes of prev/next pointer overhead per node. For text layout with many small words, this means: - Scattered heap allocations instead of contiguous memory - Poor cache locality during iteration (each node can be anywhere in memory) - Per-node malloc/free overhead during construction and destruction `std::vector` stores elements contiguously, giving better cache performance during the tight rendering and layout loops. The `extractLine` function also benefits: list splice was O(1) but required maintaining three parallel iterators, while vector range construction with move iterators is simpler and still efficient for the small line-sized chunks involved. ## Files changed - `lib/Epub/Epub/blocks/TextBlock.h` / `.cpp` - `lib/Epub/Epub/ParsedText.h` / `.cpp` ## AI Usage YES ## Test plan - [ ] Open an EPUB with mixed formatting (bold, italic, underline) — verify text renders correctly - [ ] Open a book with justified text — verify word spacing is correct - [ ] Open a book with hyphenation enabled — verify words break correctly at hyphens - [ ] Navigate through pages rapidly — verify no rendering glitches or crashes - [ ] Open a book with long paragraphs — verify text layout matches pre-change behavior --------- Co-authored-by: Kuanysh Bekkulov <kbekkulov@gmail.com>
This commit is contained in:
@@ -5,7 +5,6 @@
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
@@ -82,37 +81,34 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
|
||||
const int spaceWidth = renderer.getSpaceWidth(fontId);
|
||||
auto wordWidths = calculateWordWidths(renderer, fontId);
|
||||
|
||||
// Build indexed continues vector from the parallel list for O(1) access during layout
|
||||
std::vector<bool> continuesVec(wordContinues.begin(), wordContinues.end());
|
||||
|
||||
std::vector<size_t> lineBreakIndices;
|
||||
if (hyphenationEnabled) {
|
||||
// Use greedy layout that can split words mid-loop when a hyphenated prefix fits.
|
||||
lineBreakIndices = computeHyphenatedLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, continuesVec);
|
||||
lineBreakIndices = computeHyphenatedLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, wordContinues);
|
||||
} else {
|
||||
lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, continuesVec);
|
||||
lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, wordContinues);
|
||||
}
|
||||
const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;
|
||||
|
||||
for (size_t i = 0; i < lineCount; ++i) {
|
||||
extractLine(i, pageWidth, spaceWidth, wordWidths, continuesVec, lineBreakIndices, processLine);
|
||||
extractLine(i, pageWidth, spaceWidth, wordWidths, wordContinues, lineBreakIndices, processLine);
|
||||
}
|
||||
|
||||
// Remove consumed words so size() reflects only remaining words
|
||||
if (lineCount > 0) {
|
||||
const size_t consumed = lineBreakIndices[lineCount - 1];
|
||||
words.erase(words.begin(), words.begin() + consumed);
|
||||
wordStyles.erase(wordStyles.begin(), wordStyles.begin() + consumed);
|
||||
wordContinues.erase(wordContinues.begin(), wordContinues.begin() + consumed);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId) {
|
||||
const size_t totalWordCount = words.size();
|
||||
|
||||
std::vector<uint16_t> wordWidths;
|
||||
wordWidths.reserve(totalWordCount);
|
||||
wordWidths.reserve(words.size());
|
||||
|
||||
auto wordsIt = words.begin();
|
||||
auto wordStylesIt = wordStyles.begin();
|
||||
|
||||
while (wordsIt != words.end()) {
|
||||
wordWidths.push_back(measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt));
|
||||
|
||||
std::advance(wordsIt, 1);
|
||||
std::advance(wordStylesIt, 1);
|
||||
for (size_t i = 0; i < words.size(); ++i) {
|
||||
wordWidths.push_back(measureWordWidth(renderer, fontId, words[i], wordStyles[i]));
|
||||
}
|
||||
|
||||
return wordWidths;
|
||||
@@ -137,8 +133,7 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
|
||||
// First word needs to fit in reduced width if there's an indent
|
||||
const int effectiveWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth;
|
||||
while (wordWidths[i] > effectiveWidth) {
|
||||
if (!hyphenateWordAtIndex(i, effectiveWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true,
|
||||
&continuesVec)) {
|
||||
if (!hyphenateWordAtIndex(i, effectiveWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -284,8 +279,8 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
|
||||
const int availableWidth = effectivePageWidth - lineWidth - spacing;
|
||||
const bool allowFallbackBreaks = isFirstWord; // Only for first word on line
|
||||
|
||||
if (availableWidth > 0 && hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths,
|
||||
allowFallbackBreaks, &continuesVec)) {
|
||||
if (availableWidth > 0 &&
|
||||
hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths, allowFallbackBreaks)) {
|
||||
// Prefix now fits; append it to this line and move to next line
|
||||
lineWidth += spacing + wordWidths[currentIndex];
|
||||
++currentIndex;
|
||||
@@ -317,20 +312,14 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
|
||||
// available width.
|
||||
bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availableWidth, const GfxRenderer& renderer,
|
||||
const int fontId, std::vector<uint16_t>& wordWidths,
|
||||
const bool allowFallbackBreaks, std::vector<bool>* continuesVec) {
|
||||
const bool allowFallbackBreaks) {
|
||||
// Guard against invalid indices or zero available width before attempting to split.
|
||||
if (availableWidth <= 0 || wordIndex >= words.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get iterators to target word and style.
|
||||
auto wordIt = words.begin();
|
||||
auto styleIt = wordStyles.begin();
|
||||
std::advance(wordIt, wordIndex);
|
||||
std::advance(styleIt, wordIndex);
|
||||
|
||||
const std::string& word = *wordIt;
|
||||
const auto style = *styleIt;
|
||||
const std::string& word = words[wordIndex];
|
||||
const auto style = wordStyles[wordIndex];
|
||||
|
||||
// Collect candidate breakpoints (byte offsets and hyphen requirements).
|
||||
auto breakInfos = Hyphenator::breakOffsets(word, allowFallbackBreaks);
|
||||
@@ -367,16 +356,14 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
|
||||
|
||||
// Split the word at the selected breakpoint and append a hyphen if required.
|
||||
std::string remainder = word.substr(chosenOffset);
|
||||
wordIt->resize(chosenOffset);
|
||||
words[wordIndex].resize(chosenOffset);
|
||||
if (chosenNeedsHyphen) {
|
||||
wordIt->push_back('-');
|
||||
words[wordIndex].push_back('-');
|
||||
}
|
||||
|
||||
// Insert the remainder word (with matching style and continuation flag) directly after the prefix.
|
||||
auto insertWordIt = std::next(wordIt);
|
||||
auto insertStyleIt = std::next(styleIt);
|
||||
words.insert(insertWordIt, remainder);
|
||||
wordStyles.insert(insertStyleIt, style);
|
||||
words.insert(words.begin() + wordIndex + 1, remainder);
|
||||
wordStyles.insert(wordStyles.begin() + wordIndex + 1, style);
|
||||
|
||||
// Continuation flag handling after splitting a word into prefix + remainder.
|
||||
//
|
||||
@@ -397,17 +384,8 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
|
||||
//
|
||||
// This lets the backtracking loop keep the entire prefix group ("200 Quadrat-") on one
|
||||
// line, while "kilometer" moves to the next line.
|
||||
auto continuesIt = wordContinues.begin();
|
||||
std::advance(continuesIt, wordIndex);
|
||||
// *continuesIt is intentionally left unchanged — the prefix keeps its original attachment.
|
||||
const auto insertContinuesIt = std::next(continuesIt);
|
||||
wordContinues.insert(insertContinuesIt, false);
|
||||
|
||||
// Keep the indexed vector in sync if provided.
|
||||
if (continuesVec) {
|
||||
// (*continuesVec)[wordIndex] stays unchanged — prefix keeps its attachment.
|
||||
continuesVec->insert(continuesVec->begin() + wordIndex + 1, false);
|
||||
}
|
||||
// wordContinues[wordIndex] is intentionally left unchanged — the prefix keeps its original attachment.
|
||||
wordContinues.insert(wordContinues.begin() + wordIndex + 1, false);
|
||||
|
||||
// Update cached widths to reflect the new prefix/remainder pairing.
|
||||
wordWidths[wordIndex] = static_cast<uint16_t>(chosenWidth);
|
||||
@@ -467,7 +445,8 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
|
||||
|
||||
// Pre-calculate X positions for words
|
||||
// Continuation words attach to the previous word with no space before them
|
||||
std::list<uint16_t> lineXPos;
|
||||
std::vector<uint16_t> lineXPos;
|
||||
lineXPos.reserve(lineWordCount);
|
||||
|
||||
for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
|
||||
const uint16_t currentWordWidth = wordWidths[lastBreakAt + wordIdx];
|
||||
@@ -480,23 +459,10 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
|
||||
xpos += currentWordWidth + (nextIsContinuation ? 0 : spacing);
|
||||
}
|
||||
|
||||
// Iterators always start at the beginning as we are moving content with splice below
|
||||
auto wordEndIt = words.begin();
|
||||
auto wordStyleEndIt = wordStyles.begin();
|
||||
auto wordContinuesEndIt = wordContinues.begin();
|
||||
std::advance(wordEndIt, lineWordCount);
|
||||
std::advance(wordStyleEndIt, lineWordCount);
|
||||
std::advance(wordContinuesEndIt, lineWordCount);
|
||||
|
||||
// *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
|
||||
std::list<std::string> lineWords;
|
||||
lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
|
||||
std::list<EpdFontFamily::Style> lineWordStyles;
|
||||
lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt);
|
||||
|
||||
// Consume continues flags (not passed to TextBlock, but must be consumed to stay in sync)
|
||||
std::list<bool> lineContinues;
|
||||
lineContinues.splice(lineContinues.begin(), wordContinues, wordContinues.begin(), wordContinuesEndIt);
|
||||
// Build line data by moving from the original vectors using index range
|
||||
std::vector<std::string> lineWords(std::make_move_iterator(words.begin() + lastBreakAt),
|
||||
std::make_move_iterator(words.begin() + lineBreak));
|
||||
std::vector<EpdFontFamily::Style> lineWordStyles(wordStyles.begin() + lastBreakAt, wordStyles.begin() + lineBreak);
|
||||
|
||||
for (auto& word : lineWords) {
|
||||
if (containsSoftHyphen(word)) {
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
#include <EpdFontFamily.h>
|
||||
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@@ -14,9 +13,9 @@
|
||||
class GfxRenderer;
|
||||
|
||||
class ParsedText {
|
||||
std::list<std::string> words;
|
||||
std::list<EpdFontFamily::Style> wordStyles;
|
||||
std::list<bool> wordContinues; // true = word attaches to previous (no space before it)
|
||||
std::vector<std::string> words;
|
||||
std::vector<EpdFontFamily::Style> wordStyles;
|
||||
std::vector<bool> wordContinues; // true = word attaches to previous (no space before it)
|
||||
BlockStyle blockStyle;
|
||||
bool extraParagraphSpacing;
|
||||
bool hyphenationEnabled;
|
||||
@@ -28,8 +27,7 @@ class ParsedText {
|
||||
int spaceWidth, std::vector<uint16_t>& wordWidths,
|
||||
std::vector<bool>& continuesVec);
|
||||
bool hyphenateWordAtIndex(size_t wordIndex, int availableWidth, const GfxRenderer& renderer, int fontId,
|
||||
std::vector<uint16_t>& wordWidths, bool allowFallbackBreaks,
|
||||
std::vector<bool>* continuesVec = nullptr);
|
||||
std::vector<uint16_t>& wordWidths, bool allowFallbackBreaks);
|
||||
void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths,
|
||||
const std::vector<bool>& continuesVec, const std::vector<size_t>& lineBreakIndices,
|
||||
const std::function<void(std::shared_ptr<TextBlock>)>& processLine);
|
||||
|
||||
@@ -12,16 +12,13 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int
|
||||
return;
|
||||
}
|
||||
|
||||
auto wordIt = words.begin();
|
||||
auto wordStylesIt = wordStyles.begin();
|
||||
auto wordXposIt = wordXpos.begin();
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
const int wordX = *wordXposIt + x;
|
||||
const EpdFontFamily::Style currentStyle = *wordStylesIt;
|
||||
renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, currentStyle);
|
||||
const int wordX = wordXpos[i] + x;
|
||||
const EpdFontFamily::Style currentStyle = wordStyles[i];
|
||||
renderer.drawText(fontId, wordX, y, words[i].c_str(), true, currentStyle);
|
||||
|
||||
if ((currentStyle & EpdFontFamily::UNDERLINE) != 0) {
|
||||
const std::string& w = *wordIt;
|
||||
const std::string& w = words[i];
|
||||
const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), currentStyle);
|
||||
// y is the top of the text line; add ascender to reach baseline, then offset 2px below
|
||||
const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2;
|
||||
@@ -41,10 +38,6 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int
|
||||
|
||||
renderer.drawLine(startX, underlineY, startX + underlineWidth, underlineY, true);
|
||||
}
|
||||
|
||||
std::advance(wordIt, 1);
|
||||
std::advance(wordStylesIt, 1);
|
||||
std::advance(wordXposIt, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,15 +73,15 @@ bool TextBlock::serialize(FsFile& file) const {
|
||||
|
||||
std::unique_ptr<TextBlock> TextBlock::deserialize(FsFile& file) {
|
||||
uint16_t wc;
|
||||
std::list<std::string> words;
|
||||
std::list<uint16_t> wordXpos;
|
||||
std::list<EpdFontFamily::Style> wordStyles;
|
||||
std::vector<std::string> words;
|
||||
std::vector<uint16_t> wordXpos;
|
||||
std::vector<EpdFontFamily::Style> wordStyles;
|
||||
BlockStyle blockStyle;
|
||||
|
||||
// Word count
|
||||
serialization::readPod(file, wc);
|
||||
|
||||
// Sanity check: prevent allocation of unreasonably large lists (max 10000 words per block)
|
||||
// Sanity check: prevent allocation of unreasonably large vectors (max 10000 words per block)
|
||||
if (wc > 10000) {
|
||||
LOG_ERR("TXB", "Deserialization failed: word count %u exceeds maximum", wc);
|
||||
return nullptr;
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
#include <EpdFontFamily.h>
|
||||
#include <HalStorage.h>
|
||||
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Block.h"
|
||||
#include "BlockStyle.h"
|
||||
@@ -12,14 +12,14 @@
|
||||
// Represents a line of text on a page
|
||||
class TextBlock final : public Block {
|
||||
private:
|
||||
std::list<std::string> words;
|
||||
std::list<uint16_t> wordXpos;
|
||||
std::list<EpdFontFamily::Style> wordStyles;
|
||||
std::vector<std::string> words;
|
||||
std::vector<uint16_t> wordXpos;
|
||||
std::vector<EpdFontFamily::Style> wordStyles;
|
||||
BlockStyle blockStyle;
|
||||
|
||||
public:
|
||||
explicit TextBlock(std::list<std::string> words, std::list<uint16_t> word_xpos,
|
||||
std::list<EpdFontFamily::Style> word_styles, const BlockStyle& blockStyle = BlockStyle())
|
||||
explicit TextBlock(std::vector<std::string> words, std::vector<uint16_t> word_xpos,
|
||||
std::vector<EpdFontFamily::Style> word_styles, const BlockStyle& blockStyle = BlockStyle())
|
||||
: words(std::move(words)),
|
||||
wordXpos(std::move(word_xpos)),
|
||||
wordStyles(std::move(word_styles)),
|
||||
|
||||
Reference in New Issue
Block a user