Merge branch 'master' into hyphenation-v2
This commit is contained in:
@@ -1,163 +0,0 @@
|
||||
// from
|
||||
// https://github.com/atomic14/diy-esp32-epub-reader/blob/2c2f57fdd7e2a788d14a0bcb26b9e845a47aac42/lib/Epub/RubbishHtmlParser/htmlEntities.cpp
|
||||
|
||||
#include "htmlEntities.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <unordered_map>
|
||||
|
||||
const int MAX_ENTITY_LENGTH = 10;
|
||||
|
||||
// Use book: entities_ww2.epub to test this (Page 7: Entities parser test)
|
||||
// Note the supported keys are only in lowercase
|
||||
// Store the mappings in a unordered hash map
|
||||
static std::unordered_map<std::string, std::string> entity_lookup(
|
||||
{{""", "\""}, {"⁄", "⁄"}, {"&", "&"}, {"<", "<"}, {">", ">"},
|
||||
{"À", "À"}, {"Á", "Á"}, {"Â", "Â"}, {"Ã", "Ã"}, {"Ä", "Ä"},
|
||||
{"Å", "Å"}, {"Æ", "Æ"}, {"Ç", "Ç"}, {"È", "È"}, {"É", "É"},
|
||||
{"Ê", "Ê"}, {"Ë", "Ë"}, {"Ì", "Ì"}, {"Í", "Í"}, {"Î", "Î"},
|
||||
{"Ï", "Ï"}, {"Ð", "Ð"}, {"Ñ", "Ñ"}, {"Ò", "Ò"}, {"Ó", "Ó"},
|
||||
{"Ô", "Ô"}, {"Õ", "Õ"}, {"Ö", "Ö"}, {"Ø", "Ø"}, {"Ù", "Ù"},
|
||||
{"Ú", "Ú"}, {"Û", "Û"}, {"Ü", "Ü"}, {"Ý", "Ý"}, {"Þ", "Þ"},
|
||||
{"ß", "ß"}, {"à", "à"}, {"á", "á"}, {"â", "â"}, {"ã", "ã"},
|
||||
{"ä", "ä"}, {"å", "å"}, {"æ", "æ"}, {"ç", "ç"}, {"è", "è"},
|
||||
{"é", "é"}, {"ê", "ê"}, {"ë", "ë"}, {"ì", "ì"}, {"í", "í"},
|
||||
{"î", "î"}, {"ï", "ï"}, {"ð", "ð"}, {"ñ", "ñ"}, {"ò", "ò"},
|
||||
{"ó", "ó"}, {"ô", "ô"}, {"õ", "õ"}, {"ö", "ö"}, {"ø", "ø"},
|
||||
{"ù", "ù"}, {"ú", "ú"}, {"û", "û"}, {"ü", "ü"}, {"ý", "ý"},
|
||||
{"þ", "þ"}, {"ÿ", "ÿ"}, {" ", " "}, {"¡", "¡"}, {"¢", "¢"},
|
||||
{"£", "£"}, {"¤", "¤"}, {"¥", "¥"}, {"¦", "¦"}, {"§", "§"},
|
||||
{"¨", "¨"}, {"©", "©"}, {"ª", "ª"}, {"«", "«"}, {"¬", "¬"},
|
||||
{"­", ""}, {"®", "®"}, {"¯", "¯"}, {"°", "°"}, {"±", "±"},
|
||||
{"²", "²"}, {"³", "³"}, {"´", "´"}, {"µ", "µ"}, {"¶", "¶"},
|
||||
{"¸", "¸"}, {"¹", "¹"}, {"º", "º"}, {"»", "»"}, {"¼", "¼"},
|
||||
{"½", "½"}, {"¾", "¾"}, {"¿", "¿"}, {"×", "×"}, {"÷", "÷"},
|
||||
{"∀", "∀"}, {"∂", "∂"}, {"∃", "∃"}, {"∅", "∅"}, {"∇", "∇"},
|
||||
{"∈", "∈"}, {"∉", "∉"}, {"∋", "∋"}, {"∏", "∏"}, {"∑", "∑"},
|
||||
{"−", "−"}, {"∗", "∗"}, {"√", "√"}, {"∝", "∝"}, {"∞", "∞"},
|
||||
{"∠", "∠"}, {"∧", "∧"}, {"∨", "∨"}, {"∩", "∩"}, {"∪", "∪"},
|
||||
{"∫", "∫"}, {"∴", "∴"}, {"∼", "∼"}, {"≅", "≅"}, {"≈", "≈"},
|
||||
{"≠", "≠"}, {"≡", "≡"}, {"≤", "≤"}, {"≥", "≥"}, {"⊂", "⊂"},
|
||||
{"⊃", "⊃"}, {"⊄", "⊄"}, {"⊆", "⊆"}, {"⊇", "⊇"}, {"⊕", "⊕"},
|
||||
{"⊗", "⊗"}, {"⊥", "⊥"}, {"⋅", "⋅"}, {"Α", "Α"}, {"Β", "Β"},
|
||||
{"Γ", "Γ"}, {"Δ", "Δ"}, {"Ε", "Ε"}, {"Ζ", "Ζ"}, {"Η", "Η"},
|
||||
{"Θ", "Θ"}, {"Ι", "Ι"}, {"Κ", "Κ"}, {"Λ", "Λ"}, {"Μ", "Μ"},
|
||||
{"Ν", "Ν"}, {"Ξ", "Ξ"}, {"Ο", "Ο"}, {"Π", "Π"}, {"Ρ", "Ρ"},
|
||||
{"Σ", "Σ"}, {"Τ", "Τ"}, {"Υ", "Υ"}, {"Φ", "Φ"}, {"Χ", "Χ"},
|
||||
{"Ψ", "Ψ"}, {"Ω", "Ω"}, {"α", "α"}, {"β", "β"}, {"γ", "γ"},
|
||||
{"δ", "δ"}, {"ε", "ε"}, {"ζ", "ζ"}, {"η", "η"}, {"θ", "θ"},
|
||||
{"ι", "ι"}, {"κ", "κ"}, {"λ", "λ"}, {"μ", "μ"}, {"ν", "ν"},
|
||||
{"ξ", "ξ"}, {"ο", "ο"}, {"π", "π"}, {"ρ", "ρ"}, {"ς", "ς"},
|
||||
{"σ", "σ"}, {"τ", "τ"}, {"υ", "υ"}, {"φ", "φ"}, {"χ", "χ"},
|
||||
{"ψ", "ψ"}, {"ω", "ω"}, {"ϑ", "ϑ"}, {"ϒ", "ϒ"}, {"ϖ", "ϖ"},
|
||||
{"Œ", "Œ"}, {"œ", "œ"}, {"Š", "Š"}, {"š", "š"}, {"Ÿ", "Ÿ"},
|
||||
{"ƒ", "ƒ"}, {"ˆ", "ˆ"}, {"˜", "˜"}, {" ", ""}, {" ", ""},
|
||||
{" ", ""}, {"‌", ""}, {"‍", ""}, {"‎", ""}, {"‏", ""},
|
||||
{"–", "–"}, {"—", "—"}, {"‘", "‘"}, {"’", "’"}, {"‚", "‚"},
|
||||
{"“", "“"}, {"”", "”"}, {"„", "„"}, {"†", "†"}, {"‡", "‡"},
|
||||
{"•", "•"}, {"…", "…"}, {"‰", "‰"}, {"′", "′"}, {"″", "″"},
|
||||
{"‹", "‹"}, {"›", "›"}, {"‾", "‾"}, {"€", "€"}, {"™", "™"},
|
||||
{"←", "←"}, {"↑", "↑"}, {"→", "→"}, {"↓", "↓"}, {"↔", "↔"},
|
||||
{"↵", "↵"}, {"⌈", "⌈"}, {"⌉", "⌉"}, {"⌊", "⌊"}, {"⌋", "⌋"},
|
||||
{"◊", "◊"}, {"♠", "♠"}, {"♣", "♣"}, {"♥", "♥"}, {"♦", "♦"}});
|
||||
|
||||
// converts from a unicode code point to the utf8 equivalent
|
||||
void convert_to_utf8(const int code, std::string& res) {
|
||||
// convert to a utf8 sequence
|
||||
if (code < 0x80) {
|
||||
res += static_cast<char>(code);
|
||||
} else if (code < 0x800) {
|
||||
res += static_cast<char>(0xc0 | (code >> 6));
|
||||
res += static_cast<char>(0x80 | (code & 0x3f));
|
||||
} else if (code < 0x10000) {
|
||||
res += static_cast<char>(0xe0 | (code >> 12));
|
||||
res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
|
||||
res += static_cast<char>(0x80 | (code & 0x3f));
|
||||
} else if (code < 0x200000) {
|
||||
res += static_cast<char>(0xf0 | (code >> 18));
|
||||
res += static_cast<char>(0x80 | ((code >> 12) & 0x3f));
|
||||
res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
|
||||
res += static_cast<char>(0x80 | (code & 0x3f));
|
||||
} else if (code < 0x4000000) {
|
||||
res += static_cast<char>(0xf8 | (code >> 24));
|
||||
res += static_cast<char>(0x80 | ((code >> 18) & 0x3f));
|
||||
res += static_cast<char>(0x80 | ((code >> 12) & 0x3f));
|
||||
res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
|
||||
res += static_cast<char>(0x80 | (code & 0x3f));
|
||||
} else if (code < 0x80000000) {
|
||||
res += static_cast<char>(0xfc | (code >> 30));
|
||||
res += static_cast<char>(0x80 | ((code >> 24) & 0x3f));
|
||||
res += static_cast<char>(0x80 | ((code >> 18) & 0x3f));
|
||||
res += static_cast<char>(0x80 | ((code >> 12) & 0x3f));
|
||||
res += static_cast<char>(0x80 | ((code >> 6) & 0x3f));
|
||||
}
|
||||
}
|
||||
|
||||
// handles numeric entities - e.g. Ӓ or ሴ
|
||||
bool process_numeric_entity(const std::string& entity, std::string& res) {
|
||||
int code = 0;
|
||||
// is it hex?
|
||||
if (entity[2] == 'x' || entity[2] == 'X') {
|
||||
// parse the hex code
|
||||
code = strtol(entity.substr(3, entity.size() - 3).c_str(), nullptr, 16);
|
||||
} else {
|
||||
code = strtol(entity.substr(2, entity.size() - 3).c_str(), nullptr, 10);
|
||||
}
|
||||
if (code != 0) {
|
||||
// special handling for nbsp
|
||||
if (code == 0xA0) {
|
||||
res += " ";
|
||||
} else {
|
||||
convert_to_utf8(code, res);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// handles named entities - e.g. &
|
||||
bool process_string_entity(const std::string& entity, std::string& res) {
|
||||
// it's a named entity - find it in the lookup table
|
||||
// find it in the map
|
||||
const auto it = entity_lookup.find(entity);
|
||||
if (it != entity_lookup.end()) {
|
||||
res += it->second;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// replace all the entities in the string
|
||||
std::string replaceHtmlEntities(const char* text) {
|
||||
std::string res;
|
||||
res.reserve(strlen(text));
|
||||
for (int i = 0; i < strlen(text); ++i) {
|
||||
bool flag = false;
|
||||
// do we have a potential entity?
|
||||
if (text[i] == '&') {
|
||||
// find the end of the entity
|
||||
int j = i + 1;
|
||||
while (j < strlen(text) && text[j] != ';' && j - i < MAX_ENTITY_LENGTH) {
|
||||
j++;
|
||||
}
|
||||
if (j - i > 2) {
|
||||
char entity[j - i + 1];
|
||||
strncpy(entity, text + i, j - i);
|
||||
// is it a numeric code?
|
||||
if (entity[1] == '#') {
|
||||
flag = process_numeric_entity(entity, res);
|
||||
} else {
|
||||
flag = process_string_entity(entity, res);
|
||||
}
|
||||
// skip past the entity if we successfully decoded it
|
||||
if (flag) {
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!flag) {
|
||||
res += text[i];
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
// from
|
||||
// https://github.com/atomic14/diy-esp32-epub-reader/blob/2c2f57fdd7e2a788d14a0bcb26b9e845a47aac42/lib/Epub/RubbishHtmlParser/htmlEntities.cpp
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
|
||||
std::string replaceHtmlEntities(const char* text);
|
||||
@@ -6,7 +6,6 @@
|
||||
#include <expat.h>
|
||||
|
||||
#include "../Page.h"
|
||||
#include "../htmlEntities.h"
|
||||
|
||||
const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"};
|
||||
constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]);
|
||||
@@ -130,7 +129,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
||||
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
|
||||
if (self->partWordBufferIndex > 0) {
|
||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||
self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
|
||||
self->partWordBufferIndex = 0;
|
||||
}
|
||||
// Skip the whitespace char
|
||||
@@ -155,7 +154,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
||||
// If we're about to run out of space, then cut the word off and start a new one
|
||||
if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
|
||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||
self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
|
||||
self->partWordBufferIndex = 0;
|
||||
}
|
||||
|
||||
@@ -197,7 +196,7 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
|
||||
}
|
||||
|
||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||
self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
|
||||
self->partWordBufferIndex = 0;
|
||||
}
|
||||
}
|
||||
|
||||
219
lib/OpdsParser/OpdsParser.cpp
Normal file
219
lib/OpdsParser/OpdsParser.cpp
Normal file
@@ -0,0 +1,219 @@
|
||||
#include "OpdsParser.h"
|
||||
|
||||
#include <HardwareSerial.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
OpdsParser::~OpdsParser() {
|
||||
if (parser) {
|
||||
XML_StopParser(parser, XML_FALSE);
|
||||
XML_SetElementHandler(parser, nullptr, nullptr);
|
||||
XML_SetCharacterDataHandler(parser, nullptr);
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool OpdsParser::parse(const char* xmlData, const size_t length) {
|
||||
clear();
|
||||
|
||||
parser = XML_ParserCreate(nullptr);
|
||||
if (!parser) {
|
||||
Serial.printf("[%lu] [OPDS] Couldn't allocate memory for parser\n", millis());
|
||||
return false;
|
||||
}
|
||||
|
||||
XML_SetUserData(parser, this);
|
||||
XML_SetElementHandler(parser, startElement, endElement);
|
||||
XML_SetCharacterDataHandler(parser, characterData);
|
||||
|
||||
// Parse in chunks to avoid large buffer allocations
|
||||
const char* currentPos = xmlData;
|
||||
size_t remaining = length;
|
||||
constexpr size_t chunkSize = 1024;
|
||||
|
||||
while (remaining > 0) {
|
||||
void* const buf = XML_GetBuffer(parser, chunkSize);
|
||||
if (!buf) {
|
||||
Serial.printf("[%lu] [OPDS] Couldn't allocate memory for buffer\n", millis());
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t toRead = remaining < chunkSize ? remaining : chunkSize;
|
||||
memcpy(buf, currentPos, toRead);
|
||||
|
||||
const bool isFinal = (remaining == toRead);
|
||||
if (XML_ParseBuffer(parser, static_cast<int>(toRead), isFinal) == XML_STATUS_ERROR) {
|
||||
Serial.printf("[%lu] [OPDS] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser),
|
||||
XML_ErrorString(XML_GetErrorCode(parser)));
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
currentPos += toRead;
|
||||
remaining -= toRead;
|
||||
}
|
||||
|
||||
// Clean up parser
|
||||
XML_ParserFree(parser);
|
||||
parser = nullptr;
|
||||
|
||||
Serial.printf("[%lu] [OPDS] Parsed %zu entries\n", millis(), entries.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
void OpdsParser::clear() {
|
||||
entries.clear();
|
||||
currentEntry = OpdsEntry{};
|
||||
currentText.clear();
|
||||
inEntry = false;
|
||||
inTitle = false;
|
||||
inAuthor = false;
|
||||
inAuthorName = false;
|
||||
inId = false;
|
||||
}
|
||||
|
||||
std::vector<OpdsEntry> OpdsParser::getBooks() const {
|
||||
std::vector<OpdsEntry> books;
|
||||
for (const auto& entry : entries) {
|
||||
if (entry.type == OpdsEntryType::BOOK) {
|
||||
books.push_back(entry);
|
||||
}
|
||||
}
|
||||
return books;
|
||||
}
|
||||
|
||||
const char* OpdsParser::findAttribute(const XML_Char** atts, const char* name) {
|
||||
for (int i = 0; atts[i]; i += 2) {
|
||||
if (strcmp(atts[i], name) == 0) {
|
||||
return atts[i + 1];
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void XMLCALL OpdsParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
|
||||
auto* self = static_cast<OpdsParser*>(userData);
|
||||
|
||||
// Check for entry element (with or without namespace prefix)
|
||||
if (strcmp(name, "entry") == 0 || strstr(name, ":entry") != nullptr) {
|
||||
self->inEntry = true;
|
||||
self->currentEntry = OpdsEntry{};
|
||||
return;
|
||||
}
|
||||
|
||||
if (!self->inEntry) return;
|
||||
|
||||
// Check for title element
|
||||
if (strcmp(name, "title") == 0 || strstr(name, ":title") != nullptr) {
|
||||
self->inTitle = true;
|
||||
self->currentText.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for author element
|
||||
if (strcmp(name, "author") == 0 || strstr(name, ":author") != nullptr) {
|
||||
self->inAuthor = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for author name element
|
||||
if (self->inAuthor && (strcmp(name, "name") == 0 || strstr(name, ":name") != nullptr)) {
|
||||
self->inAuthorName = true;
|
||||
self->currentText.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for id element
|
||||
if (strcmp(name, "id") == 0 || strstr(name, ":id") != nullptr) {
|
||||
self->inId = true;
|
||||
self->currentText.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for link element
|
||||
if (strcmp(name, "link") == 0 || strstr(name, ":link") != nullptr) {
|
||||
const char* rel = findAttribute(atts, "rel");
|
||||
const char* type = findAttribute(atts, "type");
|
||||
const char* href = findAttribute(atts, "href");
|
||||
|
||||
if (href) {
|
||||
// Check for acquisition link with epub type (this is a downloadable book)
|
||||
if (rel && type && strstr(rel, "opds-spec.org/acquisition") != nullptr &&
|
||||
strcmp(type, "application/epub+zip") == 0) {
|
||||
self->currentEntry.type = OpdsEntryType::BOOK;
|
||||
self->currentEntry.href = href;
|
||||
}
|
||||
// Check for navigation link (subsection or no rel specified with atom+xml type)
|
||||
else if (type && strstr(type, "application/atom+xml") != nullptr) {
|
||||
// Only set navigation link if we don't already have an epub link
|
||||
if (self->currentEntry.type != OpdsEntryType::BOOK) {
|
||||
self->currentEntry.type = OpdsEntryType::NAVIGATION;
|
||||
self->currentEntry.href = href;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void XMLCALL OpdsParser::endElement(void* userData, const XML_Char* name) {
|
||||
auto* self = static_cast<OpdsParser*>(userData);
|
||||
|
||||
// Check for entry end
|
||||
if (strcmp(name, "entry") == 0 || strstr(name, ":entry") != nullptr) {
|
||||
// Only add entry if it has required fields (title and href)
|
||||
if (!self->currentEntry.title.empty() && !self->currentEntry.href.empty()) {
|
||||
self->entries.push_back(self->currentEntry);
|
||||
}
|
||||
self->inEntry = false;
|
||||
self->currentEntry = OpdsEntry{};
|
||||
return;
|
||||
}
|
||||
|
||||
if (!self->inEntry) return;
|
||||
|
||||
// Check for title end
|
||||
if (strcmp(name, "title") == 0 || strstr(name, ":title") != nullptr) {
|
||||
if (self->inTitle) {
|
||||
self->currentEntry.title = self->currentText;
|
||||
}
|
||||
self->inTitle = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for author end
|
||||
if (strcmp(name, "author") == 0 || strstr(name, ":author") != nullptr) {
|
||||
self->inAuthor = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for author name end
|
||||
if (self->inAuthor && (strcmp(name, "name") == 0 || strstr(name, ":name") != nullptr)) {
|
||||
if (self->inAuthorName) {
|
||||
self->currentEntry.author = self->currentText;
|
||||
}
|
||||
self->inAuthorName = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for id end
|
||||
if (strcmp(name, "id") == 0 || strstr(name, ":id") != nullptr) {
|
||||
if (self->inId) {
|
||||
self->currentEntry.id = self->currentText;
|
||||
}
|
||||
self->inId = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void XMLCALL OpdsParser::characterData(void* userData, const XML_Char* s, const int len) {
|
||||
auto* self = static_cast<OpdsParser*>(userData);
|
||||
|
||||
// Only accumulate text when in a text element
|
||||
if (self->inTitle || self->inAuthorName || self->inId) {
|
||||
self->currentText.append(s, len);
|
||||
}
|
||||
}
|
||||
99
lib/OpdsParser/OpdsParser.h
Normal file
99
lib/OpdsParser/OpdsParser.h
Normal file
@@ -0,0 +1,99 @@
|
||||
#pragma once
|
||||
#include <expat.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
* Type of OPDS entry.
|
||||
*/
|
||||
enum class OpdsEntryType {
|
||||
NAVIGATION, // Link to another catalog
|
||||
BOOK // Downloadable book
|
||||
};
|
||||
|
||||
/**
|
||||
* Represents an entry from an OPDS feed (either a navigation link or a book).
|
||||
*/
|
||||
struct OpdsEntry {
|
||||
OpdsEntryType type = OpdsEntryType::NAVIGATION;
|
||||
std::string title;
|
||||
std::string author; // Only for books
|
||||
std::string href; // Navigation URL or epub download URL
|
||||
std::string id;
|
||||
};
|
||||
|
||||
// Legacy alias for backward compatibility
|
||||
using OpdsBook = OpdsEntry;
|
||||
|
||||
/**
|
||||
* Parser for OPDS (Open Publication Distribution System) Atom feeds.
|
||||
* Uses the Expat XML parser to parse OPDS catalog entries.
|
||||
*
|
||||
* Usage:
|
||||
* OpdsParser parser;
|
||||
* if (parser.parse(xmlData, xmlLength)) {
|
||||
* for (const auto& entry : parser.getEntries()) {
|
||||
* if (entry.type == OpdsEntryType::BOOK) {
|
||||
* // Downloadable book
|
||||
* } else {
|
||||
* // Navigation link to another catalog
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
class OpdsParser {
|
||||
public:
|
||||
OpdsParser() = default;
|
||||
~OpdsParser();
|
||||
|
||||
// Disable copy
|
||||
OpdsParser(const OpdsParser&) = delete;
|
||||
OpdsParser& operator=(const OpdsParser&) = delete;
|
||||
|
||||
/**
|
||||
* Parse an OPDS XML feed.
|
||||
* @param xmlData Pointer to the XML data
|
||||
* @param length Length of the XML data
|
||||
* @return true if parsing succeeded, false on error
|
||||
*/
|
||||
bool parse(const char* xmlData, size_t length);
|
||||
|
||||
/**
|
||||
* Get the parsed entries (both navigation and book entries).
|
||||
* @return Vector of OpdsEntry entries
|
||||
*/
|
||||
const std::vector<OpdsEntry>& getEntries() const { return entries; }
|
||||
|
||||
/**
|
||||
* Get only book entries (legacy compatibility).
|
||||
* @return Vector of book entries
|
||||
*/
|
||||
std::vector<OpdsEntry> getBooks() const;
|
||||
|
||||
/**
|
||||
* Clear all parsed entries.
|
||||
*/
|
||||
void clear();
|
||||
|
||||
private:
|
||||
// Expat callbacks
|
||||
static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts);
|
||||
static void XMLCALL endElement(void* userData, const XML_Char* name);
|
||||
static void XMLCALL characterData(void* userData, const XML_Char* s, int len);
|
||||
|
||||
// Helper to find attribute value
|
||||
static const char* findAttribute(const XML_Char** atts, const char* name);
|
||||
|
||||
XML_Parser parser = nullptr;
|
||||
std::vector<OpdsEntry> entries;
|
||||
OpdsEntry currentEntry;
|
||||
std::string currentText;
|
||||
|
||||
// Parser state
|
||||
bool inEntry = false;
|
||||
bool inTitle = false;
|
||||
bool inAuthor = false;
|
||||
bool inAuthorName = false;
|
||||
bool inId = false;
|
||||
};
|
||||
Reference in New Issue
Block a user