Merge branch 'master' into hyphenation-v2

2026-01-07 20:42:53 +05:00
parent 8580277131 0bae3bbf64
commit f589c60690
32 changed files with 2386 additions and 288 deletions
--- a/lib/Epub/Epub/htmlEntities.cpp
+++ b/lib/Epub/Epub/htmlEntities.cpp
@@ -1,163 +0,0 @@
-// from
-// https://github.com/atomic14/diy-esp32-epub-reader/blob/2c2f57fdd7e2a788d14a0bcb26b9e845a47aac42/lib/Epub/RubbishHtmlParser/htmlEntities.cpp
-
-#include "htmlEntities.h"
-
-#include <cstring>
-#include <unordered_map>
-
-const int MAX_ENTITY_LENGTH = 10;
-
-// Use book: entities_ww2.epub to test this (Page 7: Entities parser test)
-// Note the supported keys are only in lowercase
-// Store the mappings in a unordered hash map
-static std::unordered_map<std::string, std::string> entity_lookup(
-    {{"&quot;", "\""},  {"&frasl;", "⁄"},   {"&amp;", "&"},      {"&lt;", "<"},     {"&gt;", ">"},
-     {"&Agrave;", "À"}, {"&Aacute;", "Á"},  {"&Acirc;", "Â"},    {"&Atilde;", "Ã"}, {"&Auml;", "Ä"},
-     {"&Aring;", "Å"},  {"&AElig;", "Æ"},   {"&Ccedil;", "Ç"},   {"&Egrave;", "È"}, {"&Eacute;", "É"},
-     {"&Ecirc;", "Ê"},  {"&Euml;", "Ë"},    {"&Igrave;", "Ì"},   {"&Iacute;", "Í"}, {"&Icirc;", "Î"},
-     {"&Iuml;", "Ï"},   {"&ETH;", "Ð"},     {"&Ntilde;", "Ñ"},   {"&Ograve;", "Ò"}, {"&Oacute;", "Ó"},
-     {"&Ocirc;", "Ô"},  {"&Otilde;", "Õ"},  {"&Ouml;", "Ö"},     {"&Oslash;", "Ø"}, {"&Ugrave;", "Ù"},
-     {"&Uacute;", "Ú"}, {"&Ucirc;", "Û"},   {"&Uuml;", "Ü"},     {"&Yacute;", "Ý"}, {"&THORN;", "Þ"},
-     {"&szlig;", "ß"},  {"&agrave;", "à"},  {"&aacute;", "á"},   {"&acirc;", "â"},  {"&atilde;", "ã"},
-     {"&auml;", "ä"},   {"&aring;", "å"},   {"&aelig;", "æ"},    {"&ccedil;", "ç"}, {"&egrave;", "è"},
-     {"&eacute;", "é"}, {"&ecirc;", "ê"},   {"&euml;", "ë"},     {"&igrave;", "ì"}, {"&iacute;", "í"},
-     {"&icirc;", "î"},  {"&iuml;", "ï"},    {"&eth;", "ð"},      {"&ntilde;", "ñ"}, {"&ograve;", "ò"},
-     {"&oacute;", "ó"}, {"&ocirc;", "ô"},   {"&otilde;", "õ"},   {"&ouml;", "ö"},   {"&oslash;", "ø"},
-     {"&ugrave;", "ù"}, {"&uacute;", "ú"},  {"&ucirc;", "û"},    {"&uuml;", "ü"},   {"&yacute;", "ý"},
-     {"&thorn;", "þ"},  {"&yuml;", "ÿ"},    {"&nbsp;", " "},     {"&iexcl;", "¡"},  {"&cent;", "¢"},
-     {"&pound;", "£"},  {"&curren;", "¤"},  {"&yen;", "¥"},      {"&brvbar;", "¦"}, {"&sect;", "§"},
-     {"&uml;", "¨"},    {"&copy;", "©"},    {"&ordf;", "ª"},     {"&laquo;", "«"},  {"&not;", "¬"},
-     {"&shy;", ""},    {"&reg;", "®"},     {"&macr;", "¯"},     {"&deg;", "°"},    {"&plusmn;", "±"},
-     {"&sup2;", "²"},   {"&sup3;", "³"},    {"&acute;", "´"},    {"&micro;", "µ"},  {"&para;", "¶"},
-     {"&cedil;", "¸"},  {"&sup1;", "¹"},    {"&ordm;", "º"},     {"&raquo;", "»"},  {"&frac14;", "¼"},
-     {"&frac12;", "½"}, {"&frac34;", "¾"},  {"&iquest;", "¿"},   {"&times;", "×"},  {"&divide;", "÷"},
-     {"&forall;", "∀"}, {"&part;", "∂"},    {"&exist;", "∃"},    {"&empty;", "∅"},  {"&nabla;", "∇"},
-     {"&isin;", "∈"},   {"&notin;", "∉"},   {"&ni;", "∋"},       {"&prod;", "∏"},   {"&sum;", "∑"},
-     {"&minus;", "−"},  {"&lowast;", "∗"},  {"&radic;", "√"},    {"&prop;", "∝"},   {"&infin;", "∞"},
-     {"&ang;", "∠"},    {"&and;", "∧"},     {"&or;", "∨"},       {"&cap;", "∩"},    {"&cup;", "∪"},
-     {"&int;", "∫"},    {"&there4;", "∴"},  {"&sim;", "∼"},      {"&cong;", "≅"},   {"&asymp;", "≈"},
-     {"&ne;", "≠"},     {"&equiv;", "≡"},   {"&le;", "≤"},       {"&ge;", "≥"},     {"&sub;", "⊂"},
-     {"&sup;", "⊃"},    {"&nsub;", "⊄"},    {"&sube;", "⊆"},     {"&supe;", "⊇"},   {"&oplus;", "⊕"},
-     {"&otimes;", "⊗"}, {"&perp;", "⊥"},    {"&sdot;", "⋅"},     {"&Alpha;", "Α"},  {"&Beta;", "Β"},
-     {"&Gamma;", "Γ"},  {"&Delta;", "Δ"},   {"&Epsilon;", "Ε"},  {"&Zeta;", "Ζ"},   {"&Eta;", "Η"},
-     {"&Theta;", "Θ"},  {"&Iota;", "Ι"},    {"&Kappa;", "Κ"},    {"&Lambda;", "Λ"}, {"&Mu;", "Μ"},
-     {"&Nu;", "Ν"},     {"&Xi;", "Ξ"},      {"&Omicron;", "Ο"},  {"&Pi;", "Π"},     {"&Rho;", "Ρ"},
-     {"&Sigma;", "Σ"},  {"&Tau;", "Τ"},     {"&Upsilon;", "Υ"},  {"&Phi;", "Φ"},    {"&Chi;", "Χ"},
-     {"&Psi;", "Ψ"},    {"&Omega;", "Ω"},   {"&alpha;", "α"},    {"&beta;", "β"},   {"&gamma;", "γ"},
-     {"&delta;", "δ"},  {"&epsilon;", "ε"}, {"&zeta;", "ζ"},     {"&eta;", "η"},    {"&theta;", "θ"},
-     {"&iota;", "ι"},   {"&kappa;", "κ"},   {"&lambda;", "λ"},   {"&mu;", "μ"},     {"&nu;", "ν"},
-     {"&xi;", "ξ"},     {"&omicron;", "ο"}, {"&pi;", "π"},       {"&rho;", "ρ"},    {"&sigmaf;", "ς"},
-     {"&sigma;", "σ"},  {"&tau;", "τ"},     {"&upsilon;", "υ"},  {"&phi;", "φ"},    {"&chi;", "χ"},
-     {"&psi;", "ψ"},    {"&omega;", "ω"},   {"&thetasym;", "ϑ"}, {"&upsih;", "ϒ"},  {"&piv;", "ϖ"},
-     {"&OElig;", "Œ"},  {"&oelig;", "œ"},   {"&Scaron;", "Š"},   {"&scaron;", "š"}, {"&Yuml;", "Ÿ"},
-     {"&fnof;", "ƒ"},   {"&circ;", "ˆ"},    {"&tilde;", "˜"},    {"&ensp;", ""},    {"&emsp;", ""},
-     {"&thinsp;", ""},  {"&zwnj;", "‌"},  {"&zwj;", "‍"},    {"&lrm;", "‎"},  {"&rlm;", "‏"},
-     {"&ndash;", "–"},  {"&mdash;", "—"},   {"&lsquo;", "‘"},    {"&rsquo;", "’"},  {"&sbquo;", "‚"},
-     {"&ldquo;", "“"},  {"&rdquo;", "”"},   {"&bdquo;", "„"},    {"&dagger;", "†"}, {"&Dagger;", "‡"},
-     {"&bull;", "•"},   {"&hellip;", "…"},  {"&permil;", "‰"},   {"&prime;", "′"},  {"&Prime;", "″"},
-     {"&lsaquo;", "‹"}, {"&rsaquo;", "›"},  {"&oline;", "‾"},    {"&euro;", "€"},   {"&trade;", "™"},
-     {"&larr;", "←"},   {"&uarr;", "↑"},    {"&rarr;", "→"},     {"&darr;", "↓"},   {"&harr;", "↔"},
-     {"&crarr;", "↵"},  {"&lceil;", "⌈"},   {"&rceil;", "⌉"},    {"&lfloor;", "⌊"}, {"&rfloor;", "⌋"},
-     {"&loz;", "◊"},    {"&spades;", "♠"},  {"&clubs;", "♣"},    {"&hearts;", "♥"}, {"&diams;", "♦"}});
-
-// converts from a unicode code point to the utf8 equivalent
-void convert_to_utf8(const int code, std::string& res) {
-  // convert to a utf8 sequence
-  if (code < 0x80) {
-    res += static_cast<char>(code);
-  } else if (code < 0x800) {
-    res += static_cast<char>(0xc0 | (code >> 6));
-    res += static_cast<char>(0x80 | (code & 0x3f));
-  } else if (code < 0x10000) {
-    res += static_cast<char>(0xe0 | (code >> 12));
-    res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
-    res += static_cast<char>(0x80 | (code & 0x3f));
-  } else if (code < 0x200000) {
-    res += static_cast<char>(0xf0 | (code >> 18));
-    res += static_cast<char>(0x80 | ((code >> 12) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
-    res += static_cast<char>(0x80 | (code & 0x3f));
-  } else if (code < 0x4000000) {
-    res += static_cast<char>(0xf8 | (code >> 24));
-    res += static_cast<char>(0x80 | ((code >> 18) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 12) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
-    res += static_cast<char>(0x80 | (code & 0x3f));
-  } else if (code < 0x80000000) {
-    res += static_cast<char>(0xfc | (code >> 30));
-    res += static_cast<char>(0x80 | ((code >> 24) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 18) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 12) & 0x3f));
-    res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
-  }
-}
-
-// handles numeric entities - e.g. &#1234; or &#x1234;
-bool process_numeric_entity(const std::string& entity, std::string& res) {
-  int code = 0;
-  // is it hex?
-  if (entity[2] == 'x' || entity[2] == 'X') {
-    // parse the hex code
-    code = strtol(entity.substr(3, entity.size() - 3).c_str(), nullptr, 16);
-  } else {
-    code = strtol(entity.substr(2, entity.size() - 3).c_str(), nullptr, 10);
-  }
-  if (code != 0) {
-    // special handling for nbsp
-    if (code == 0xA0) {
-      res += " ";
-    } else {
-      convert_to_utf8(code, res);
-    }
-    return true;
-  }
-  return false;
-}
-
-// handles named entities - e.g. &amp;
-bool process_string_entity(const std::string& entity, std::string& res) {
-  // it's a named entity - find it in the lookup table
-  // find it in the map
-  const auto it = entity_lookup.find(entity);
-  if (it != entity_lookup.end()) {
-    res += it->second;
-    return true;
-  }
-  return false;
-}
-
-// replace all the entities in the string
-std::string replaceHtmlEntities(const char* text) {
-  std::string res;
-  res.reserve(strlen(text));
-  for (int i = 0; i < strlen(text); ++i) {
-    bool flag = false;
-    // do we have a potential entity?
-    if (text[i] == '&') {
-      // find the end of the entity
-      int j = i + 1;
-      while (j < strlen(text) && text[j] != ';' && j - i < MAX_ENTITY_LENGTH) {
-        j++;
-      }
-      if (j - i > 2) {
-        char entity[j - i + 1];
-        strncpy(entity, text + i, j - i);
-        // is it a numeric code?
-        if (entity[1] == '#') {
-          flag = process_numeric_entity(entity, res);
-        } else {
-          flag = process_string_entity(entity, res);
-        }
-        // skip past the entity if we successfully decoded it
-        if (flag) {
-          i = j;
-        }
-      }
-    }
-    if (!flag) {
-      res += text[i];
-    }
-  }
-  return res;
-}
--- a/lib/Epub/Epub/htmlEntities.h
+++ b/lib/Epub/Epub/htmlEntities.h
@@ -1,7 +0,0 @@
-// from
-// https://github.com/atomic14/diy-esp32-epub-reader/blob/2c2f57fdd7e2a788d14a0bcb26b9e845a47aac42/lib/Epub/RubbishHtmlParser/htmlEntities.cpp
-
-#pragma once
-#include <string>
-
-std::string replaceHtmlEntities(const char* text);
--- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
+++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
@@ -6,7 +6,6 @@
 #include <expat.h>

 #include "../Page.h"
-#include "../htmlEntities.h"

 const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"};
 constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]);
@@ -130,7 +129,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
      // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
      if (self->partWordBufferIndex > 0) {
        self->partWordBuffer[self->partWordBufferIndex] = '\0';
-        self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
+        self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
        self->partWordBufferIndex = 0;
      }
      // Skip the whitespace char
@@ -155,7 +154,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
    // If we're about to run out of space, then cut the word off and start a new one
    if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
      self->partWordBuffer[self->partWordBufferIndex] = '\0';
-      self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
+      self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
      self->partWordBufferIndex = 0;
    }

@@ -197,7 +196,7 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
      }

      self->partWordBuffer[self->partWordBufferIndex] = '\0';
-      self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
+      self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
      self->partWordBufferIndex = 0;
    }
  }
--- a/lib/OpdsParser/OpdsParser.cpp
+++ b/lib/OpdsParser/OpdsParser.cpp
@@ -0,0 +1,219 @@
+#include "OpdsParser.h"
+
+#include <HardwareSerial.h>
+
+#include <cstring>
+
+OpdsParser::~OpdsParser() {
+  if (parser) {
+    XML_StopParser(parser, XML_FALSE);
+    XML_SetElementHandler(parser, nullptr, nullptr);
+    XML_SetCharacterDataHandler(parser, nullptr);
+    XML_ParserFree(parser);
+    parser = nullptr;
+  }
+}
+
+bool OpdsParser::parse(const char* xmlData, const size_t length) {
+  clear();
+
+  parser = XML_ParserCreate(nullptr);
+  if (!parser) {
+    Serial.printf("[%lu] [OPDS] Couldn't allocate memory for parser\n", millis());
+    return false;
+  }
+
+  XML_SetUserData(parser, this);
+  XML_SetElementHandler(parser, startElement, endElement);
+  XML_SetCharacterDataHandler(parser, characterData);
+
+  // Parse in chunks to avoid large buffer allocations
+  const char* currentPos = xmlData;
+  size_t remaining = length;
+  constexpr size_t chunkSize = 1024;
+
+  while (remaining > 0) {
+    void* const buf = XML_GetBuffer(parser, chunkSize);
+    if (!buf) {
+      Serial.printf("[%lu] [OPDS] Couldn't allocate memory for buffer\n", millis());
+      XML_ParserFree(parser);
+      parser = nullptr;
+      return false;
+    }
+
+    const size_t toRead = remaining < chunkSize ? remaining : chunkSize;
+    memcpy(buf, currentPos, toRead);
+
+    const bool isFinal = (remaining == toRead);
+    if (XML_ParseBuffer(parser, static_cast<int>(toRead), isFinal) == XML_STATUS_ERROR) {
+      Serial.printf("[%lu] [OPDS] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser),
+                    XML_ErrorString(XML_GetErrorCode(parser)));
+      XML_ParserFree(parser);
+      parser = nullptr;
+      return false;
+    }
+
+    currentPos += toRead;
+    remaining -= toRead;
+  }
+
+  // Clean up parser
+  XML_ParserFree(parser);
+  parser = nullptr;
+
+  Serial.printf("[%lu] [OPDS] Parsed %zu entries\n", millis(), entries.size());
+  return true;
+}
+
+void OpdsParser::clear() {
+  entries.clear();
+  currentEntry = OpdsEntry{};
+  currentText.clear();
+  inEntry = false;
+  inTitle = false;
+  inAuthor = false;
+  inAuthorName = false;
+  inId = false;
+}
+
+std::vector<OpdsEntry> OpdsParser::getBooks() const {
+  std::vector<OpdsEntry> books;
+  for (const auto& entry : entries) {
+    if (entry.type == OpdsEntryType::BOOK) {
+      books.push_back(entry);
+    }
+  }
+  return books;
+}
+
+const char* OpdsParser::findAttribute(const XML_Char** atts, const char* name) {
+  for (int i = 0; atts[i]; i += 2) {
+    if (strcmp(atts[i], name) == 0) {
+      return atts[i + 1];
+    }
+  }
+  return nullptr;
+}
+
+void XMLCALL OpdsParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
+  auto* self = static_cast<OpdsParser*>(userData);
+
+  // Check for entry element (with or without namespace prefix)
+  if (strcmp(name, "entry") == 0 || strstr(name, ":entry") != nullptr) {
+    self->inEntry = true;
+    self->currentEntry = OpdsEntry{};
+    return;
+  }
+
+  if (!self->inEntry) return;
+
+  // Check for title element
+  if (strcmp(name, "title") == 0 || strstr(name, ":title") != nullptr) {
+    self->inTitle = true;
+    self->currentText.clear();
+    return;
+  }
+
+  // Check for author element
+  if (strcmp(name, "author") == 0 || strstr(name, ":author") != nullptr) {
+    self->inAuthor = true;
+    return;
+  }
+
+  // Check for author name element
+  if (self->inAuthor && (strcmp(name, "name") == 0 || strstr(name, ":name") != nullptr)) {
+    self->inAuthorName = true;
+    self->currentText.clear();
+    return;
+  }
+
+  // Check for id element
+  if (strcmp(name, "id") == 0 || strstr(name, ":id") != nullptr) {
+    self->inId = true;
+    self->currentText.clear();
+    return;
+  }
+
+  // Check for link element
+  if (strcmp(name, "link") == 0 || strstr(name, ":link") != nullptr) {
+    const char* rel = findAttribute(atts, "rel");
+    const char* type = findAttribute(atts, "type");
+    const char* href = findAttribute(atts, "href");
+
+    if (href) {
+      // Check for acquisition link with epub type (this is a downloadable book)
+      if (rel && type && strstr(rel, "opds-spec.org/acquisition") != nullptr &&
+          strcmp(type, "application/epub+zip") == 0) {
+        self->currentEntry.type = OpdsEntryType::BOOK;
+        self->currentEntry.href = href;
+      }
+      // Check for navigation link (subsection or no rel specified with atom+xml type)
+      else if (type && strstr(type, "application/atom+xml") != nullptr) {
+        // Only set navigation link if we don't already have an epub link
+        if (self->currentEntry.type != OpdsEntryType::BOOK) {
+          self->currentEntry.type = OpdsEntryType::NAVIGATION;
+          self->currentEntry.href = href;
+        }
+      }
+    }
+  }
+}
+
+void XMLCALL OpdsParser::endElement(void* userData, const XML_Char* name) {
+  auto* self = static_cast<OpdsParser*>(userData);
+
+  // Check for entry end
+  if (strcmp(name, "entry") == 0 || strstr(name, ":entry") != nullptr) {
+    // Only add entry if it has required fields (title and href)
+    if (!self->currentEntry.title.empty() && !self->currentEntry.href.empty()) {
+      self->entries.push_back(self->currentEntry);
+    }
+    self->inEntry = false;
+    self->currentEntry = OpdsEntry{};
+    return;
+  }
+
+  if (!self->inEntry) return;
+
+  // Check for title end
+  if (strcmp(name, "title") == 0 || strstr(name, ":title") != nullptr) {
+    if (self->inTitle) {
+      self->currentEntry.title = self->currentText;
+    }
+    self->inTitle = false;
+    return;
+  }
+
+  // Check for author end
+  if (strcmp(name, "author") == 0 || strstr(name, ":author") != nullptr) {
+    self->inAuthor = false;
+    return;
+  }
+
+  // Check for author name end
+  if (self->inAuthor && (strcmp(name, "name") == 0 || strstr(name, ":name") != nullptr)) {
+    if (self->inAuthorName) {
+      self->currentEntry.author = self->currentText;
+    }
+    self->inAuthorName = false;
+    return;
+  }
+
+  // Check for id end
+  if (strcmp(name, "id") == 0 || strstr(name, ":id") != nullptr) {
+    if (self->inId) {
+      self->currentEntry.id = self->currentText;
+    }
+    self->inId = false;
+    return;
+  }
+}
+
+void XMLCALL OpdsParser::characterData(void* userData, const XML_Char* s, const int len) {
+  auto* self = static_cast<OpdsParser*>(userData);
+
+  // Only accumulate text when in a text element
+  if (self->inTitle || self->inAuthorName || self->inId) {
+    self->currentText.append(s, len);
+  }
+}
--- a/lib/OpdsParser/OpdsParser.h
+++ b/lib/OpdsParser/OpdsParser.h
@@ -0,0 +1,99 @@
+#pragma once
+#include <expat.h>
+
+#include <string>
+#include <vector>
+
+/**
+ * Type of OPDS entry.
+ */
+enum class OpdsEntryType {
+  NAVIGATION,  // Link to another catalog
+  BOOK         // Downloadable book
+};
+
+/**
+ * Represents an entry from an OPDS feed (either a navigation link or a book).
+ */
+struct OpdsEntry {
+  OpdsEntryType type = OpdsEntryType::NAVIGATION;
+  std::string title;
+  std::string author;  // Only for books
+  std::string href;    // Navigation URL or epub download URL
+  std::string id;
+};
+
+// Legacy alias for backward compatibility
+using OpdsBook = OpdsEntry;
+
+/**
+ * Parser for OPDS (Open Publication Distribution System) Atom feeds.
+ * Uses the Expat XML parser to parse OPDS catalog entries.
+ *
+ * Usage:
+ *   OpdsParser parser;
+ *   if (parser.parse(xmlData, xmlLength)) {
+ *     for (const auto& entry : parser.getEntries()) {
+ *       if (entry.type == OpdsEntryType::BOOK) {
+ *         // Downloadable book
+ *       } else {
+ *         // Navigation link to another catalog
+ *       }
+ *     }
+ *   }
+ */
+class OpdsParser {
+ public:
+  OpdsParser() = default;
+  ~OpdsParser();
+
+  // Disable copy
+  OpdsParser(const OpdsParser&) = delete;
+  OpdsParser& operator=(const OpdsParser&) = delete;
+
+  /**
+   * Parse an OPDS XML feed.
+   * @param xmlData Pointer to the XML data
+   * @param length Length of the XML data
+   * @return true if parsing succeeded, false on error
+   */
+  bool parse(const char* xmlData, size_t length);
+
+  /**
+   * Get the parsed entries (both navigation and book entries).
+   * @return Vector of OpdsEntry entries
+   */
+  const std::vector<OpdsEntry>& getEntries() const { return entries; }
+
+  /**
+   * Get only book entries (legacy compatibility).
+   * @return Vector of book entries
+   */
+  std::vector<OpdsEntry> getBooks() const;
+
+  /**
+   * Clear all parsed entries.
+   */
+  void clear();
+
+ private:
+  // Expat callbacks
+  static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts);
+  static void XMLCALL endElement(void* userData, const XML_Char* name);
+  static void XMLCALL characterData(void* userData, const XML_Char* s, int len);
+
+  // Helper to find attribute value
+  static const char* findAttribute(const XML_Char** atts, const char* name);
+
+  XML_Parser parser = nullptr;
+  std::vector<OpdsEntry> entries;
+  OpdsEntry currentEntry;
+  std::string currentText;
+
+  // Parser state
+  bool inEntry = false;
+  bool inTitle = false;
+  bool inAuthor = false;
+  bool inAuthorName = false;
+  bool inId = false;
+};