Merge upstream/master into feature/continue-reading-cover

Resolve conflicts: - GfxRenderer: Add cropX/cropY params to drawBitmap, keep 1-bit BMP support - GfxRenderer: Update drawBitmap1Bit to use readNextRow (API change) - JpegToBmpConverter: Use upstream scaling logic (larger dimension) - HomeActivity: Use StringUtils::checkFileExtension, add hasOpdsUrl - HomeActivity: Keep cover image functionality with own buffer management
2026-01-09 23:57:19 +09:00
parent f0fa90da0c d4ae108d9b
commit 0f9f8d71d9
61 changed files with 3501 additions and 512 deletions
--- a/lib/Epub/Epub.cpp
+++ b/lib/Epub/Epub.cpp
@@ -8,6 +8,7 @@

 #include "Epub/parsers/ContainerParser.h"
 #include "Epub/parsers/ContentOpfParser.h"
+#include "Epub/parsers/TocNavParser.h"
 #include "Epub/parsers/TocNcxParser.h"

 bool Epub::findContentOpfFile(std::string* contentOpfFile) const {
@@ -80,6 +81,10 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) {
    tocNcxItem = opfParser.tocNcxPath;
  }

+  if (!opfParser.tocNavPath.empty()) {
+    tocNavItem = opfParser.tocNavPath;
+  }
+
  Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis());
  return true;
 }
@@ -141,6 +146,60 @@ bool Epub::parseTocNcxFile() const {
  return true;
 }

+bool Epub::parseTocNavFile() const {
+  // the nav file should have been specified in the content.opf file (EPUB 3)
+  if (tocNavItem.empty()) {
+    Serial.printf("[%lu] [EBP] No nav file specified\n", millis());
+    return false;
+  }
+
+  Serial.printf("[%lu] [EBP] Parsing toc nav file: %s\n", millis(), tocNavItem.c_str());
+
+  const auto tmpNavPath = getCachePath() + "/toc.nav";
+  FsFile tempNavFile;
+  if (!SdMan.openFileForWrite("EBP", tmpNavPath, tempNavFile)) {
+    return false;
+  }
+  readItemContentsToStream(tocNavItem, tempNavFile, 1024);
+  tempNavFile.close();
+  if (!SdMan.openFileForRead("EBP", tmpNavPath, tempNavFile)) {
+    return false;
+  }
+  const auto navSize = tempNavFile.size();
+
+  TocNavParser navParser(contentBasePath, navSize, bookMetadataCache.get());
+
+  if (!navParser.setup()) {
+    Serial.printf("[%lu] [EBP] Could not setup toc nav parser\n", millis());
+    return false;
+  }
+
+  const auto navBuffer = static_cast<uint8_t*>(malloc(1024));
+  if (!navBuffer) {
+    Serial.printf("[%lu] [EBP] Could not allocate memory for toc nav parser\n", millis());
+    return false;
+  }
+
+  while (tempNavFile.available()) {
+    const auto readSize = tempNavFile.read(navBuffer, 1024);
+    const auto processedSize = navParser.write(navBuffer, readSize);
+
+    if (processedSize != readSize) {
+      Serial.printf("[%lu] [EBP] Could not process all toc nav data\n", millis());
+      free(navBuffer);
+      tempNavFile.close();
+      return false;
+    }
+  }
+
+  free(navBuffer);
+  tempNavFile.close();
+  SdMan.remove(tmpNavPath.c_str());
+
+  Serial.printf("[%lu] [EBP] Parsed TOC nav items\n", millis());
+  return true;
+}
+
 // load in the meta data for the epub file
 bool Epub::load(const bool buildIfMissing) {
  Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str());
@@ -184,15 +243,31 @@ bool Epub::load(const bool buildIfMissing) {
    return false;
  }

-  // TOC Pass
+  // TOC Pass - try EPUB 3 nav first, fall back to NCX
  if (!bookMetadataCache->beginTocPass()) {
    Serial.printf("[%lu] [EBP] Could not begin writing toc pass\n", millis());
    return false;
  }
-  if (!parseTocNcxFile()) {
-    Serial.printf("[%lu] [EBP] Could not parse toc\n", millis());
-    return false;
+
+  bool tocParsed = false;
+
+  // Try EPUB 3 nav document first (preferred)
+  if (!tocNavItem.empty()) {
+    Serial.printf("[%lu] [EBP] Attempting to parse EPUB 3 nav document\n", millis());
+    tocParsed = parseTocNavFile();
  }
+
+  // Fall back to NCX if nav parsing failed or wasn't available
+  if (!tocParsed && !tocNcxItem.empty()) {
+    Serial.printf("[%lu] [EBP] Falling back to NCX TOC\n", millis());
+    tocParsed = parseTocNcxFile();
+  }
+
+  if (!tocParsed) {
+    Serial.printf("[%lu] [EBP] Warning: Could not parse any TOC format\n", millis());
+    // Continue anyway - book will work without TOC
+  }
+
  if (!bookMetadataCache->endTocPass()) {
    Serial.printf("[%lu] [EBP] Could not end writing toc pass\n", millis());
    return false;
--- a/lib/Epub/Epub.h
+++ b/lib/Epub/Epub.h
@@ -12,8 +12,10 @@
 class ZipFile;

 class Epub {
-  // the ncx file
+  // the ncx file (EPUB 2)
  std::string tocNcxItem;
+  // the nav file (EPUB 3)
+  std::string tocNavItem;
  // where is the EPUBfile?
  std::string filepath;
  // the base path for items in the EPUB file
@@ -26,6 +28,7 @@ class Epub {
  bool findContentOpfFile(std::string* contentOpfFile) const;
  bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata);
  bool parseTocNcxFile() const;
+  bool parseTocNavFile() const;

 public:
  explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) {
--- a/lib/Epub/Epub/Section.cpp
+++ b/lib/Epub/Epub/Section.cpp
@@ -7,9 +7,9 @@
 #include "parsers/ChapterHtmlSlimParser.h"

 namespace {
-constexpr uint8_t SECTION_FILE_VERSION = 8;
-constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint16_t) +
-                                 sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t);
+constexpr uint8_t SECTION_FILE_VERSION = 9;
+constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) +
+                                 sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t);
 }  // namespace

 uint32_t Section::onPageComplete(std::unique_ptr<Page> page) {
@@ -30,19 +30,21 @@ uint32_t Section::onPageComplete(std::unique_ptr<Page> page) {
 }

 void Section::writeSectionFileHeader(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
-                                     const uint16_t viewportWidth, const uint16_t viewportHeight) {
+                                     const uint8_t paragraphAlignment, const uint16_t viewportWidth,
+                                     const uint16_t viewportHeight) {
  if (!file) {
    Serial.printf("[%lu] [SCT] File not open for writing header\n", millis());
    return;
  }
  static_assert(HEADER_SIZE == sizeof(SECTION_FILE_VERSION) + sizeof(fontId) + sizeof(lineCompression) +
-                                   sizeof(extraParagraphSpacing) + sizeof(viewportWidth) + sizeof(viewportHeight) +
-                                   sizeof(pageCount) + sizeof(uint32_t),
+                                   sizeof(extraParagraphSpacing) + sizeof(paragraphAlignment) + sizeof(viewportWidth) +
+                                   sizeof(viewportHeight) + sizeof(pageCount) + sizeof(uint32_t),
                "Header size mismatch");
  serialization::writePod(file, SECTION_FILE_VERSION);
  serialization::writePod(file, fontId);
  serialization::writePod(file, lineCompression);
  serialization::writePod(file, extraParagraphSpacing);
+  serialization::writePod(file, paragraphAlignment);
  serialization::writePod(file, viewportWidth);
  serialization::writePod(file, viewportHeight);
  serialization::writePod(file, pageCount);  // Placeholder for page count (will be initially 0 when written)
@@ -50,7 +52,8 @@ void Section::writeSectionFileHeader(const int fontId, const float lineCompressi
 }

 bool Section::loadSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
-                              const uint16_t viewportWidth, const uint16_t viewportHeight) {
+                              const uint8_t paragraphAlignment, const uint16_t viewportWidth,
+                              const uint16_t viewportHeight) {
  if (!SdMan.openFileForRead("SCT", filePath, file)) {
    return false;
  }
@@ -70,15 +73,17 @@ bool Section::loadSectionFile(const int fontId, const float lineCompression, con
    uint16_t fileViewportWidth, fileViewportHeight;
    float fileLineCompression;
    bool fileExtraParagraphSpacing;
+    uint8_t fileParagraphAlignment;
    serialization::readPod(file, fileFontId);
    serialization::readPod(file, fileLineCompression);
    serialization::readPod(file, fileExtraParagraphSpacing);
+    serialization::readPod(file, fileParagraphAlignment);
    serialization::readPod(file, fileViewportWidth);
    serialization::readPod(file, fileViewportHeight);

    if (fontId != fileFontId || lineCompression != fileLineCompression ||
-        extraParagraphSpacing != fileExtraParagraphSpacing || viewportWidth != fileViewportWidth ||
-        viewportHeight != fileViewportHeight) {
+        extraParagraphSpacing != fileExtraParagraphSpacing || paragraphAlignment != fileParagraphAlignment ||
+        viewportWidth != fileViewportWidth || viewportHeight != fileViewportHeight) {
      file.close();
      Serial.printf("[%lu] [SCT] Deserialization failed: Parameters do not match\n", millis());
      clearCache();
@@ -109,8 +114,8 @@ bool Section::clearCache() const {
 }

 bool Section::createSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
-                                const uint16_t viewportWidth, const uint16_t viewportHeight,
-                                const std::function<void()>& progressSetupFn,
+                                const uint8_t paragraphAlignment, const uint16_t viewportWidth,
+                                const uint16_t viewportHeight, const std::function<void()>& progressSetupFn,
                                const std::function<void(int)>& progressFn) {
  constexpr uint32_t MIN_SIZE_FOR_PROGRESS = 50 * 1024;  // 50KB
  const auto localPath = epub->getSpineItem(spineIndex).href;
@@ -166,11 +171,13 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
  if (!SdMan.openFileForWrite("SCT", filePath, file)) {
    return false;
  }
-  writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight);
+  writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth,
+                         viewportHeight);
  std::vector<uint32_t> lut = {};

  ChapterHtmlSlimParser visitor(
-      tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight,
+      tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth,
+      viewportHeight,
      [this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); },
      progressFn);
  success = visitor.parseAndBuildPages();
--- a/lib/Epub/Epub/Section.h
+++ b/lib/Epub/Epub/Section.h
@@ -14,8 +14,8 @@ class Section {
  std::string filePath;
  FsFile file;

-  void writeSectionFileHeader(int fontId, float lineCompression, bool extraParagraphSpacing, uint16_t viewportWidth,
-                              uint16_t viewportHeight);
+  void writeSectionFileHeader(int fontId, float lineCompression, bool extraParagraphSpacing, uint8_t paragraphAlignment,
+                              uint16_t viewportWidth, uint16_t viewportHeight);
  uint32_t onPageComplete(std::unique_ptr<Page> page);

 public:
@@ -28,11 +28,12 @@ class Section {
        renderer(renderer),
        filePath(epub->getCachePath() + "/sections/" + std::to_string(spineIndex) + ".bin") {}
  ~Section() = default;
-  bool loadSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, uint16_t viewportWidth,
-                       uint16_t viewportHeight);
+  bool loadSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, uint8_t paragraphAlignment,
+                       uint16_t viewportWidth, uint16_t viewportHeight);
  bool clearCache() const;
-  bool createSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, uint16_t viewportWidth,
-                         uint16_t viewportHeight, const std::function<void()>& progressSetupFn = nullptr,
+  bool createSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, uint8_t paragraphAlignment,
+                         uint16_t viewportWidth, uint16_t viewportHeight,
+                         const std::function<void()>& progressSetupFn = nullptr,
                         const std::function<void(int)>& progressFn = nullptr);
  std::unique_ptr<Page> loadPageFromSectionFile();
 };
--- a/lib/Epub/Epub/htmlEntities.cpp
+++ b/lib/Epub/Epub/htmlEntities.cpp
@@ -1,163 +0,0 @@
-// from
-// https://github.com/atomic14/diy-esp32-epub-reader/blob/2c2f57fdd7e2a788d14a0bcb26b9e845a47aac42/lib/Epub/RubbishHtmlParser/htmlEntities.cpp
-
-#include "htmlEntities.h"
-
-#include <cstring>
-#include <unordered_map>
-
-const int MAX_ENTITY_LENGTH = 10;
-
-// Use book: entities_ww2.epub to test this (Page 7: Entities parser test)
-// Note the supported keys are only in lowercase
-// Store the mappings in a unordered hash map
-static std::unordered_map<std::string, std::string> entity_lookup(
-    {{"&quot;", "\""},  {"&frasl;", "⁄"},   {"&amp;", "&"},      {"&lt;", "<"},     {"&gt;", ">"},
-     {"&Agrave;", "À"}, {"&Aacute;", "Á"},  {"&Acirc;", "Â"},    {"&Atilde;", "Ã"}, {"&Auml;", "Ä"},
-     {"&Aring;", "Å"},  {"&AElig;", "Æ"},   {"&Ccedil;", "Ç"},   {"&Egrave;", "È"}, {"&Eacute;", "É"},
-     {"&Ecirc;", "Ê"},  {"&Euml;", "Ë"},    {"&Igrave;", "Ì"},   {"&Iacute;", "Í"}, {"&Icirc;", "Î"},
-     {"&Iuml;", "Ï"},   {"&ETH;", "Ð"},     {"&Ntilde;", "Ñ"},   {"&Ograve;", "Ò"}, {"&Oacute;", "Ó"},
-     {"&Ocirc;", "Ô"},  {"&Otilde;", "Õ"},  {"&Ouml;", "Ö"},     {"&Oslash;", "Ø"}, {"&Ugrave;", "Ù"},
-     {"&Uacute;", "Ú"}, {"&Ucirc;", "Û"},   {"&Uuml;", "Ü"},     {"&Yacute;", "Ý"}, {"&THORN;", "Þ"},
-     {"&szlig;", "ß"},  {"&agrave;", "à"},  {"&aacute;", "á"},   {"&acirc;", "â"},  {"&atilde;", "ã"},
-     {"&auml;", "ä"},   {"&aring;", "å"},   {"&aelig;", "æ"},    {"&ccedil;", "ç"}, {"&egrave;", "è"},
-     {"&eacute;", "é"}, {"&ecirc;", "ê"},   {"&euml;", "ë"},     {"&igrave;", "ì"}, {"&iacute;", "í"},
-     {"&icirc;", "î"},  {"&iuml;", "ï"},    {"&eth;", "ð"},      {"&ntilde;", "ñ"}, {"&ograve;", "ò"},
-     {"&oacute;", "ó"}, {"&ocirc;", "ô"},   {"&otilde;", "õ"},   {"&ouml;", "ö"},   {"&oslash;", "ø"},
-     {"&ugrave;", "ù"}, {"&uacute;", "ú"},  {"&ucirc;", "û"},    {"&uuml;", "ü"},   {"&yacute;", "ý"},
-     {"&thorn;", "þ"},  {"&yuml;", "ÿ"},    {"&nbsp;", " "},     {"&iexcl;", "¡"},  {"&cent;", "¢"},
-     {"&pound;", "£"},  {"&curren;", "¤"},  {"&yen;", "¥"},      {"&brvbar;", "¦"}, {"&sect;", "§"},
-     {"&uml;", "¨"},    {"&copy;", "©"},    {"&ordf;", "ª"},     {"&laquo;", "«"},  {"&not;", "¬"},
-     {"&shy;", ""},    {"&reg;", "®"},     {"&macr;", "¯"},     {"&deg;", "°"},    {"&plusmn;", "±"},
-     {"&sup2;", "²"},   {"&sup3;", "³"},    {"&acute;", "´"},    {"&micro;", "µ"},  {"&para;", "¶"},
-     {"&cedil;", "¸"},  {"&sup1;", "¹"},    {"&ordm;", "º"},     {"&raquo;", "»"},  {"&frac14;", "¼"},
-     {"&frac12;", "½"}, {"&frac34;", "¾"},  {"&iquest;", "¿"},   {"&times;", "×"},  {"&divide;", "÷"},
-     {"&forall;", "∀"}, {"&part;", "∂"},    {"&exist;", "∃"},    {"&empty;", "∅"},  {"&nabla;", "∇"},
-     {"&isin;", "∈"},   {"&notin;", "∉"},   {"&ni;", "∋"},       {"&prod;", "∏"},   {"&sum;", "∑"},
-     {"&minus;", "−"},  {"&lowast;", "∗"},  {"&radic;", "√"},    {"&prop;", "∝"},   {"&infin;", "∞"},
-     {"&ang;", "∠"},    {"&and;", "∧"},     {"&or;", "∨"},       {"&cap;", "∩"},    {"&cup;", "∪"},
-     {"&int;", "∫"},    {"&there4;", "∴"},  {"&sim;", "∼"},      {"&cong;", "≅"},   {"&asymp;", "≈"},
-     {"&ne;", "≠"},     {"&equiv;", "≡"},   {"&le;", "≤"},       {"&ge;", "≥"},     {"&sub;", "⊂"},
-     {"&sup;", "⊃"},    {"&nsub;", "⊄"},    {"&sube;", "⊆"},     {"&supe;", "⊇"},   {"&oplus;", "⊕"},
-     {"&otimes;", "⊗"}, {"&perp;", "⊥"},    {"&sdot;", "⋅"},     {"&Alpha;", "Α"},  {"&Beta;", "Β"},
-     {"&Gamma;", "Γ"},  {"&Delta;", "Δ"},   {"&Epsilon;", "Ε"},  {"&Zeta;", "Ζ"},   {"&Eta;", "Η"},
-     {"&Theta;", "Θ"},  {"&Iota;", "Ι"},    {"&Kappa;", "Κ"},    {"&Lambda;", "Λ"}, {"&Mu;", "Μ"},
-     {"&Nu;", "Ν"},     {"&Xi;", "Ξ"},      {"&Omicron;", "Ο"},  {"&Pi;", "Π"},     {"&Rho;", "Ρ"},
-     {"&Sigma;", "Σ"},  {"&Tau;", "Τ"},     {"&Upsilon;", "Υ"},  {"&Phi;", "Φ"},    {"&Chi;", "Χ"},
-     {"&Psi;", "Ψ"},    {"&Omega;", "Ω"},   {"&alpha;", "α"},    {"&beta;", "β"},   {"&gamma;", "γ"},
-     {"&delta;", "δ"},  {"&epsilon;", "ε"}, {"&zeta;", "ζ"},     {"&eta;", "η"},    {"&theta;", "θ"},
-     {"&iota;", "ι"},   {"&kappa;", "κ"},   {"&lambda;", "λ"},   {"&mu;", "μ"},     {"&nu;", "ν"},
-     {"&xi;", "ξ"},     {"&omicron;", "ο"}, {"&pi;", "π"},       {"&rho;", "ρ"},    {"&sigmaf;", "ς"},
-     {"&sigma;", "σ"},  {"&tau;", "τ"},     {"&upsilon;", "υ"},  {"&phi;", "φ"},    {"&chi;", "χ"},
-     {"&psi;", "ψ"},    {"&omega;", "ω"},   {"&thetasym;", "ϑ"}, {"&upsih;", "ϒ"},  {"&piv;", "ϖ"},
-     {"&OElig;", "Œ"},  {"&oelig;", "œ"},   {"&Scaron;", "Š"},   {"&scaron;", "š"}, {"&Yuml;", "Ÿ"},
-     {"&fnof;", "ƒ"},   {"&circ;", "ˆ"},    {"&tilde;", "˜"},    {"&ensp;", ""},    {"&emsp;", ""},
-     {"&thinsp;", ""},  {"&zwnj;", "‌"},  {"&zwj;", "‍"},    {"&lrm;", "‎"},  {"&rlm;", "‏"},
-     {"&ndash;", "–"},  {"&mdash;", "—"},   {"&lsquo;", "‘"},    {"&rsquo;", "’"},  {"&sbquo;", "‚"},
-     {"&ldquo;", "“"},  {"&rdquo;", "”"},   {"&bdquo;", "„"},    {"&dagger;", "†"}, {"&Dagger;", "‡"},
-     {"&bull;", "•"},   {"&hellip;", "…"},  {"&permil;", "‰"},   {"&prime;", "′"},  {"&Prime;", "″"},
-     {"&lsaquo;", "‹"}, {"&rsaquo;", "›"},  {"&oline;", "‾"},    {"&euro;", "€"},   {"&trade;", "™"},
-     {"&larr;", "←"},   {"&uarr;", "↑"},    {"&rarr;", "→"},     {"&darr;", "↓"},   {"&harr;", "↔"},
-     {"&crarr;", "↵"},  {"&lceil;", "⌈"},   {"&rceil;", "⌉"},    {"&lfloor;", "⌊"}, {"&rfloor;", "⌋"},
-     {"&loz;", "◊"},    {"&spades;", "♠"},  {"&clubs;", "♣"},    {"&hearts;", "♥"}, {"&diams;", "♦"}});
-
-// converts from a unicode code point to the utf8 equivalent
-void convert_to_utf8(const int code, std::string& res) {
-  // convert to a utf8 sequence
-  if (code < 0x80) {
-    res += static_cast<char>(code);
-  } else if (code < 0x800) {
-    res += static_cast<char>(0xc0 | (code >> 6));
-    res += static_cast<char>(0x80 | (code & 0x3f));
-  } else if (code < 0x10000) {
-    res += static_cast<char>(0xe0 | (code >> 12));
-    res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
-    res += static_cast<char>(0x80 | (code & 0x3f));
-  } else if (code < 0x200000) {
-    res += static_cast<char>(0xf0 | (code >> 18));
-    res += static_cast<char>(0x80 | ((code >> 12) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
-    res += static_cast<char>(0x80 | (code & 0x3f));
-  } else if (code < 0x4000000) {
-    res += static_cast<char>(0xf8 | (code >> 24));
-    res += static_cast<char>(0x80 | ((code >> 18) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 12) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
-    res += static_cast<char>(0x80 | (code & 0x3f));
-  } else if (code < 0x80000000) {
-    res += static_cast<char>(0xfc | (code >> 30));
-    res += static_cast<char>(0x80 | ((code >> 24) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 18) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 12) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
-  }
-}
-
-// handles numeric entities - e.g. &#1234; or &#x1234;
-bool process_numeric_entity(const std::string& entity, std::string& res) {
-  int code = 0;
-  // is it hex?
-  if (entity[2] == 'x' || entity[2] == 'X') {
-    // parse the hex code
-    code = strtol(entity.substr(3, entity.size() - 3).c_str(), nullptr, 16);
-  } else {
-    code = strtol(entity.substr(2, entity.size() - 3).c_str(), nullptr, 10);
-  }
-  if (code != 0) {
-    // special handling for nbsp
-    if (code == 0xA0) {
-      res += " ";
-    } else {
-      convert_to_utf8(code, res);
-    }
-    return true;
-  }
-  return false;
-}
-
-// handles named entities - e.g. &amp;
-bool process_string_entity(const std::string& entity, std::string& res) {
-  // it's a named entity - find it in the lookup table
-  // find it in the map
-  const auto it = entity_lookup.find(entity);
-  if (it != entity_lookup.end()) {
-    res += it->second;
-    return true;
-  }
-  return false;
-}
-
-// replace all the entities in the string
-std::string replaceHtmlEntities(const char* text) {
-  std::string res;
-  res.reserve(strlen(text));
-  for (int i = 0; i < strlen(text); ++i) {
-    bool flag = false;
-    // do we have a potential entity?
-    if (text[i] == '&') {
-      // find the end of the entity
-      int j = i + 1;
-      while (j < strlen(text) && text[j] != ';' && j - i < MAX_ENTITY_LENGTH) {
-        j++;
-      }
-      if (j - i > 2) {
-        char entity[j - i + 1];
-        strncpy(entity, text + i, j - i);
-        // is it a numeric code?
-        if (entity[1] == '#') {
-          flag = process_numeric_entity(entity, res);
-        } else {
-          flag = process_string_entity(entity, res);
-        }
-        // skip past the entity if we successfully decoded it
-        if (flag) {
-          i = j;
-        }
-      }
-    }
-    if (!flag) {
-      res += text[i];
-    }
-  }
-  return res;
-}
--- a/lib/Epub/Epub/htmlEntities.h
+++ b/lib/Epub/Epub/htmlEntities.h
@@ -1,7 +0,0 @@
-// from
-// https://github.com/atomic14/diy-esp32-epub-reader/blob/2c2f57fdd7e2a788d14a0bcb26b9e845a47aac42/lib/Epub/RubbishHtmlParser/htmlEntities.cpp
-
-#pragma once
-#include <string>
-
-std::string replaceHtmlEntities(const char* text);
--- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
+++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
@@ -6,7 +6,6 @@
 #include <expat.h>

 #include "../Page.h"
-#include "../htmlEntities.h"

 const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"};
 constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]);
@@ -97,7 +96,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
    if (strcmp(name, "br") == 0) {
      self->startNewTextBlock(self->currentTextBlock->getStyle());
    } else {
-      self->startNewTextBlock(TextBlock::JUSTIFIED);
+      self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment);
    }
  } else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) {
    self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
@@ -130,17 +129,32 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
      // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
      if (self->partWordBufferIndex > 0) {
        self->partWordBuffer[self->partWordBufferIndex] = '\0';
-        self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
+        self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
        self->partWordBufferIndex = 0;
      }
      // Skip the whitespace char
      continue;
    }

+    // Skip soft-hyphen with UTF-8 representation (U+00AD) = 0xC2 0xAD
+    const XML_Char SHY_BYTE_1 = static_cast<XML_Char>(0xC2);
+    const XML_Char SHY_BYTE_2 = static_cast<XML_Char>(0xAD);
+    // 1. Check for the start of the 2-byte Soft Hyphen sequence
+    if (s[i] == SHY_BYTE_1) {
+      // 2. Check if the next byte exists AND if it completes the sequence
+      //    We must check i + 1 < len to prevent reading past the end of the buffer.
+      if ((i + 1 < len) && (s[i + 1] == SHY_BYTE_2)) {
+        // Sequence 0xC2 0xAD found!
+        // Skip the current byte (0xC2) and the next byte (0xAD)
+        i++;       // Increment 'i' one more time to skip the 0xAD byte
+        continue;  // Skip the rest of the loop and move to the next iteration
+      }
+    }
+
    // If we're about to run out of space, then cut the word off and start a new one
    if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
      self->partWordBuffer[self->partWordBufferIndex] = '\0';
-      self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
+      self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
      self->partWordBufferIndex = 0;
    }

@@ -182,7 +196,7 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
      }

      self->partWordBuffer[self->partWordBufferIndex] = '\0';
-      self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
+      self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
      self->partWordBufferIndex = 0;
    }
  }
@@ -206,7 +220,7 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
 }

 bool ChapterHtmlSlimParser::parseAndBuildPages() {
-  startNewTextBlock(TextBlock::JUSTIFIED);
+  startNewTextBlock((TextBlock::Style)this->paragraphAlignment);

  const XML_Parser parser = XML_ParserCreate(nullptr);
  int done;
--- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h
+++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h
@@ -33,6 +33,7 @@ class ChapterHtmlSlimParser {
  int fontId;
  float lineCompression;
  bool extraParagraphSpacing;
+  uint8_t paragraphAlignment;
  uint16_t viewportWidth;
  uint16_t viewportHeight;

@@ -46,7 +47,8 @@ class ChapterHtmlSlimParser {
 public:
  explicit ChapterHtmlSlimParser(const std::string& filepath, GfxRenderer& renderer, const int fontId,
                                 const float lineCompression, const bool extraParagraphSpacing,
-                                 const uint16_t viewportWidth, const uint16_t viewportHeight,
+                                 const uint8_t paragraphAlignment, const uint16_t viewportWidth,
+                                 const uint16_t viewportHeight,
                                 const std::function<void(std::unique_ptr<Page>)>& completePageFn,
                                 const std::function<void(int)>& progressFn = nullptr)
      : filepath(filepath),
@@ -54,6 +56,7 @@ class ChapterHtmlSlimParser {
        fontId(fontId),
        lineCompression(lineCompression),
        extraParagraphSpacing(extraParagraphSpacing),
+        paragraphAlignment(paragraphAlignment),
        viewportWidth(viewportWidth),
        viewportHeight(viewportHeight),
        completePageFn(completePageFn),
--- a/lib/Epub/Epub/parsers/ContentOpfParser.cpp
+++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp
@@ -161,6 +161,7 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
    std::string itemId;
    std::string href;
    std::string mediaType;
+    std::string properties;

    for (int i = 0; atts[i]; i += 2) {
      if (strcmp(atts[i], "id") == 0) {
@@ -169,6 +170,8 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
        href = self->baseContentPath + atts[i + 1];
      } else if (strcmp(atts[i], "media-type") == 0) {
        mediaType = atts[i + 1];
+      } else if (strcmp(atts[i], "properties") == 0) {
+        properties = atts[i + 1];
      }
    }

@@ -188,6 +191,15 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
                      href.c_str());
      }
    }
+
+    // EPUB 3: Check for nav document (properties contains "nav")
+    if (!properties.empty() && self->tocNavPath.empty()) {
+      // Properties is space-separated, check if "nav" is present as a word
+      if (properties == "nav" || properties.find("nav ") == 0 || properties.find(" nav") != std::string::npos) {
+        self->tocNavPath = href;
+        Serial.printf("[%lu] [COF] Found EPUB 3 nav document: %s\n", millis(), href.c_str());
+      }
+    }
    return;
  }

--- a/lib/Epub/Epub/parsers/ContentOpfParser.h
+++ b/lib/Epub/Epub/parsers/ContentOpfParser.h
@@ -35,6 +35,7 @@ class ContentOpfParser final : public Print {
  std::string title;
  std::string author;
  std::string tocNcxPath;
+  std::string tocNavPath;  // EPUB 3 nav document path
  std::string coverItemHref;
  std::string textReferenceHref;

--- a/lib/Epub/Epub/parsers/TocNavParser.cpp
+++ b/lib/Epub/Epub/parsers/TocNavParser.cpp
@@ -0,0 +1,184 @@
+#include "TocNavParser.h"
+
+#include <HardwareSerial.h>
+
+#include "../BookMetadataCache.h"
+
+bool TocNavParser::setup() {
+  parser = XML_ParserCreate(nullptr);
+  if (!parser) {
+    Serial.printf("[%lu] [NAV] Couldn't allocate memory for parser\n", millis());
+    return false;
+  }
+
+  XML_SetUserData(parser, this);
+  XML_SetElementHandler(parser, startElement, endElement);
+  XML_SetCharacterDataHandler(parser, characterData);
+  return true;
+}
+
+TocNavParser::~TocNavParser() {
+  if (parser) {
+    XML_StopParser(parser, XML_FALSE);
+    XML_SetElementHandler(parser, nullptr, nullptr);
+    XML_SetCharacterDataHandler(parser, nullptr);
+    XML_ParserFree(parser);
+    parser = nullptr;
+  }
+}
+
+size_t TocNavParser::write(const uint8_t data) { return write(&data, 1); }
+
+size_t TocNavParser::write(const uint8_t* buffer, const size_t size) {
+  if (!parser) return 0;
+
+  const uint8_t* currentBufferPos = buffer;
+  auto remainingInBuffer = size;
+
+  while (remainingInBuffer > 0) {
+    void* const buf = XML_GetBuffer(parser, 1024);
+    if (!buf) {
+      Serial.printf("[%lu] [NAV] Couldn't allocate memory for buffer\n", millis());
+      XML_StopParser(parser, XML_FALSE);
+      XML_SetElementHandler(parser, nullptr, nullptr);
+      XML_SetCharacterDataHandler(parser, nullptr);
+      XML_ParserFree(parser);
+      parser = nullptr;
+      return 0;
+    }
+
+    const auto toRead = remainingInBuffer < 1024 ? remainingInBuffer : 1024;
+    memcpy(buf, currentBufferPos, toRead);
+
+    if (XML_ParseBuffer(parser, static_cast<int>(toRead), remainingSize == toRead) == XML_STATUS_ERROR) {
+      Serial.printf("[%lu] [NAV] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser),
+                    XML_ErrorString(XML_GetErrorCode(parser)));
+      XML_StopParser(parser, XML_FALSE);
+      XML_SetElementHandler(parser, nullptr, nullptr);
+      XML_SetCharacterDataHandler(parser, nullptr);
+      XML_ParserFree(parser);
+      parser = nullptr;
+      return 0;
+    }
+
+    currentBufferPos += toRead;
+    remainingInBuffer -= toRead;
+    remainingSize -= toRead;
+  }
+  return size;
+}
+
+void XMLCALL TocNavParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
+  auto* self = static_cast<TocNavParser*>(userData);
+
+  // Track HTML structure loosely - we mainly care about finding <nav epub:type="toc">
+  if (strcmp(name, "html") == 0) {
+    self->state = IN_HTML;
+    return;
+  }
+
+  if (self->state == IN_HTML && strcmp(name, "body") == 0) {
+    self->state = IN_BODY;
+    return;
+  }
+
+  // Look for <nav epub:type="toc"> anywhere in body (or nested elements)
+  if (self->state >= IN_BODY && strcmp(name, "nav") == 0) {
+    for (int i = 0; atts[i]; i += 2) {
+      if ((strcmp(atts[i], "epub:type") == 0 || strcmp(atts[i], "type") == 0) && strcmp(atts[i + 1], "toc") == 0) {
+        self->state = IN_NAV_TOC;
+        Serial.printf("[%lu] [NAV] Found nav toc element\n", millis());
+        return;
+      }
+    }
+    return;
+  }
+
+  // Only process ol/li/a if we're inside the toc nav
+  if (self->state < IN_NAV_TOC) {
+    return;
+  }
+
+  if (strcmp(name, "ol") == 0) {
+    self->olDepth++;
+    self->state = IN_OL;
+    return;
+  }
+
+  if (self->state == IN_OL && strcmp(name, "li") == 0) {
+    self->state = IN_LI;
+    self->currentLabel.clear();
+    self->currentHref.clear();
+    return;
+  }
+
+  if (self->state == IN_LI && strcmp(name, "a") == 0) {
+    self->state = IN_ANCHOR;
+    // Get href attribute
+    for (int i = 0; atts[i]; i += 2) {
+      if (strcmp(atts[i], "href") == 0) {
+        self->currentHref = atts[i + 1];
+        break;
+      }
+    }
+    return;
+  }
+}
+
+void XMLCALL TocNavParser::characterData(void* userData, const XML_Char* s, const int len) {
+  auto* self = static_cast<TocNavParser*>(userData);
+
+  // Only collect text when inside an anchor within the TOC nav
+  if (self->state == IN_ANCHOR) {
+    self->currentLabel.append(s, len);
+  }
+}
+
+void XMLCALL TocNavParser::endElement(void* userData, const XML_Char* name) {
+  auto* self = static_cast<TocNavParser*>(userData);
+
+  if (strcmp(name, "a") == 0 && self->state == IN_ANCHOR) {
+    // Create TOC entry when closing anchor tag (we have all data now)
+    if (!self->currentLabel.empty() && !self->currentHref.empty()) {
+      std::string href = self->baseContentPath + self->currentHref;
+      std::string anchor;
+
+      const size_t pos = href.find('#');
+      if (pos != std::string::npos) {
+        anchor = href.substr(pos + 1);
+        href = href.substr(0, pos);
+      }
+
+      if (self->cache) {
+        // olDepth gives us the nesting level (1-based from the outer ol)
+        self->cache->createTocEntry(self->currentLabel, href, anchor, self->olDepth);
+      }
+
+      self->currentLabel.clear();
+      self->currentHref.clear();
+    }
+    self->state = IN_LI;
+    return;
+  }
+
+  if (strcmp(name, "li") == 0 && (self->state == IN_LI || self->state == IN_OL)) {
+    self->state = IN_OL;
+    return;
+  }
+
+  if (strcmp(name, "ol") == 0 && self->state >= IN_NAV_TOC) {
+    self->olDepth--;
+    if (self->olDepth == 0) {
+      self->state = IN_NAV_TOC;
+    } else {
+      self->state = IN_LI;  // Back to parent li
+    }
+    return;
+  }
+
+  if (strcmp(name, "nav") == 0 && self->state >= IN_NAV_TOC) {
+    self->state = IN_BODY;
+    Serial.printf("[%lu] [NAV] Finished parsing nav toc\n", millis());
+    return;
+  }
+}
--- a/lib/Epub/Epub/parsers/TocNavParser.h
+++ b/lib/Epub/Epub/parsers/TocNavParser.h
@@ -0,0 +1,47 @@
+#pragma once
+#include <Print.h>
+#include <expat.h>
+
+#include <string>
+
+class BookMetadataCache;
+
+// Parser for EPUB 3 nav.xhtml navigation documents
+// Parses HTML5 nav elements with epub:type="toc" to extract table of contents
+class TocNavParser final : public Print {
+  enum ParserState {
+    START,
+    IN_HTML,
+    IN_BODY,
+    IN_NAV_TOC,  // Inside <nav epub:type="toc">
+    IN_OL,       // Inside <ol>
+    IN_LI,       // Inside <li>
+    IN_ANCHOR,   // Inside <a>
+  };
+
+  const std::string& baseContentPath;
+  size_t remainingSize;
+  XML_Parser parser = nullptr;
+  ParserState state = START;
+  BookMetadataCache* cache;
+
+  // Track nesting depth for <ol> elements to determine TOC depth
+  uint8_t olDepth = 0;
+  // Current entry data being collected
+  std::string currentLabel;
+  std::string currentHref;
+
+  static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
+  static void characterData(void* userData, const XML_Char* s, int len);
+  static void endElement(void* userData, const XML_Char* name);
+
+ public:
+  explicit TocNavParser(const std::string& baseContentPath, const size_t xmlSize, BookMetadataCache* cache)
+      : baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {}
+  ~TocNavParser() override;
+
+  bool setup();
+
+  size_t write(uint8_t) override;
+  size_t write(const uint8_t* buffer, size_t size) override;
+};