Public release

2025-12-03 22:00:29 +11:00
commit 2ccdbeecc8
54 changed files with 33356 additions and 0 deletions
--- a/lib/Epub/Epub/blocks/TextBlock.cpp
+++ b/lib/Epub/Epub/blocks/TextBlock.cpp
@@ -0,0 +1,235 @@
+#include "TextBlock.h"
+
+#include <EpdRenderer.h>
+#include <Serialization.h>
+
+static bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; }
+
+// move past anything that should be considered part of a work
+static int skipWord(const std::string& text, int index, const int length) {
+  while (index < length && !isWhitespace(text[index])) {
+    index++;
+  }
+  return index;
+}
+
+// skip past any white space characters
+static int skipWhitespace(const std::string& html, int index, const int length) {
+  while (index < length && isWhitespace(html[index])) {
+    index++;
+  }
+  return index;
+}
+
+void TextBlock::addSpan(const std::string& span, const bool is_bold, const bool is_italic) {
+  // adding a span to text block
+  // make a copy of the text as we'll modify it
+  const int length = span.length();
+  // const auto text = new char[length + 1];
+  // strcpy(text, span);
+  // work out where each word is in the span
+  int index = 0;
+  while (index < length) {
+    // skip past any whitespace to the start of a word
+    index = skipWhitespace(span, index, length);
+    const int wordStart = index;
+    // find the end of the word
+    index = skipWord(span, index, length);
+    const int wordLength = index - wordStart;
+    if (wordLength > 0) {
+      words.push_back(span.substr(wordStart, wordLength));
+      wordStyles.push_back((is_bold ? BOLD_SPAN : 0) | (is_italic ? ITALIC_SPAN : 0));
+    }
+  }
+}
+
+std::list<TextBlock*> TextBlock::splitIntoLines(const EpdRenderer* renderer) {
+  const int totalWordCount = words.size();
+  const int pageWidth = renderer->getPageWidth();
+  const int spaceWidth = renderer->getSpaceWidth();
+
+  words.shrink_to_fit();
+  wordStyles.shrink_to_fit();
+  wordXpos.reserve(totalWordCount);
+
+  // measure each word
+  uint16_t wordWidths[totalWordCount];
+  for (int i = 0; i < words.size(); i++) {
+    // measure the word
+    const int width = renderer->getTextWidth(words[i].c_str(), wordStyles[i] & BOLD_SPAN, wordStyles[i] & ITALIC_SPAN);
+    wordWidths[i] = width;
+  }
+
+  // now apply the dynamic programming algorithm to find the best line breaks
+  // DP table in which dp[i] represents cost of line starting with word words[i]
+  int dp[totalWordCount];
+
+  // Array in which ans[i] store index of last word in line starting with word
+  // word[i]
+  size_t ans[totalWordCount];
+
+  // If only one word is present then only one line is required. Cost of last
+  // line is zero. Hence cost of this line is zero. Ending point is also n-1 as
+  // single word is present
+  dp[totalWordCount - 1] = 0;
+  ans[totalWordCount - 1] = totalWordCount - 1;
+
+  // Make each word first word of line by iterating over each index in arr.
+  for (int i = totalWordCount - 2; i >= 0; i--) {
+    int currlen = -1;
+    dp[i] = INT_MAX;
+
+    // Variable to store possible minimum cost of line.
+    int cost;
+
+    // Keep on adding words in current line by iterating from starting word upto
+    // last word in arr.
+    for (int j = i; j < totalWordCount; j++) {
+      // Update the width of the words in current line + the space between two
+      // words.
+      currlen += wordWidths[j] + spaceWidth;
+
+      // If we're bigger than the current pagewidth then we can't add more words
+      if (currlen > pageWidth) break;
+
+      // if we've run out of words then this is last line and the cost should be
+      // 0 Otherwise the cost is the sqaure of the left over space + the costs
+      // of all the previous lines
+      if (j == totalWordCount - 1)
+        cost = 0;
+      else
+        cost = (pageWidth - currlen) * (pageWidth - currlen) + dp[j + 1];
+
+      // Check if this arrangement gives minimum cost for line starting with
+      // word words[i].
+      if (cost < dp[i]) {
+        dp[i] = cost;
+        ans[i] = j;
+      }
+    }
+  }
+
+  // We can now iterate through the answer to find the line break positions
+  std::list<uint16_t> lineBreaks;
+  for (size_t i = 0; i < totalWordCount;) {
+    i = ans[i] + 1;
+    if (i > totalWordCount) {
+      break;
+    }
+    lineBreaks.push_back(i);
+    // Text too big, just exit
+    if (lineBreaks.size() > 1000) {
+      break;
+    }
+  }
+
+  std::list<TextBlock*> lines;
+
+  // With the line breaks calculated we can now position the words along the
+  // line
+  int startWord = 0;
+  for (const auto lineBreak : lineBreaks) {
+    const int lineWordCount = lineBreak - startWord;
+
+    int lineWordWidthSum = 0;
+    for (int i = startWord; i < lineBreak; i++) {
+      lineWordWidthSum += wordWidths[i];
+    }
+
+    // Calculate spacing between words
+    const uint16_t spareSpace = pageWidth - lineWordWidthSum;
+    uint16_t spacing = spaceWidth;
+    // evenly space words if using justified style, not the last line, and at
+    // least 2 words
+    if (style == JUSTIFIED && lineBreak != lineBreaks.back() && lineWordCount >= 2) {
+      spacing = spareSpace / (lineWordCount - 1);
+    }
+
+    uint16_t xpos = 0;
+    if (style == RIGHT_ALIGN) {
+      xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
+    } else if (style == CENTER_ALIGN) {
+      xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
+    }
+
+    for (int i = startWord; i < lineBreak; i++) {
+      wordXpos[i] = xpos;
+      xpos += wordWidths[i] + spacing;
+    }
+
+    std::vector<std::string> lineWords;
+    std::vector<uint16_t> lineXPos;
+    std::vector<uint8_t> lineWordStyles;
+    lineWords.reserve(lineWordCount);
+    lineXPos.reserve(lineWordCount);
+    lineWordStyles.reserve(lineWordCount);
+
+    for (int i = startWord; i < lineBreak; i++) {
+      lineWords.push_back(words[i]);
+      lineXPos.push_back(wordXpos[i]);
+      lineWordStyles.push_back(wordStyles[i]);
+    }
+    const auto textLine = new TextBlock(lineWords, lineXPos, lineWordStyles, style);
+    lines.push_back(textLine);
+    startWord = lineBreak;
+  }
+
+  return lines;
+}
+
+void TextBlock::render(const EpdRenderer* renderer, const int x, const int y) const {
+  for (int i = 0; i < words.size(); i++) {
+    // get the style
+    const uint8_t wordStyle = wordStyles[i];
+    // render the word
+    renderer->drawText(x + wordXpos[i], y, words[i].c_str(), wordStyle & BOLD_SPAN, wordStyle & ITALIC_SPAN);
+  }
+}
+
+void TextBlock::serialize(std::ostream& os) const {
+  // words
+  const uint32_t wc = words.size();
+  serialization::writePod(os, wc);
+  for (const auto& w : words) serialization::writeString(os, w);
+
+  // wordXpos
+  const uint32_t xc = wordXpos.size();
+  serialization::writePod(os, xc);
+  for (auto x : wordXpos) serialization::writePod(os, x);
+
+  // wordStyles
+  const uint32_t sc = wordStyles.size();
+  serialization::writePod(os, sc);
+  for (auto s : wordStyles) serialization::writePod(os, s);
+
+  // style
+  serialization::writePod(os, style);
+}
+
+TextBlock* TextBlock::deserialize(std::istream& is) {
+  uint32_t wc, xc, sc;
+  std::vector<std::string> words;
+  std::vector<uint16_t> wordXpos;
+  std::vector<uint8_t> wordStyles;
+  BLOCK_STYLE style;
+
+  // words
+  serialization::readPod(is, wc);
+  words.resize(wc);
+  for (auto& w : words) serialization::readString(is, w);
+
+  // wordXpos
+  serialization::readPod(is, xc);
+  wordXpos.resize(xc);
+  for (auto& x : wordXpos) serialization::readPod(is, x);
+
+  // wordStyles
+  serialization::readPod(is, sc);
+  wordStyles.resize(sc);
+  for (auto& s : wordStyles) serialization::readPod(is, s);
+
+  // style
+  serialization::readPod(is, style);
+
+  return new TextBlock(words, wordXpos, wordStyles, style);
+}