Extract EPUB TOC into temp file before parsing (#85)
## Summary * Extract EPUB TOC into temp file before parsing * Streaming ZIP -> XML parser uses up a lot of memory as we're allocating inflation buffers while also holding a few copies of the buffer in different forms * Instead, but streaming the inflated file down to the SD card (like we do for HTML parsing, we can lower memory usage) ## Additional Context * This should help with https://github.com/daveallie/crosspoint-reader/issues/60 and https://github.com/daveallie/crosspoint-reader/issues/10. It won't remove those class of issues completely, but will allow for many more books to be opened.
This commit is contained in:
parent
0d32d21d75
commit
f264efdb12
@ -93,24 +93,42 @@ bool Epub::parseTocNcxFile() {
|
|||||||
|
|
||||||
Serial.printf("[%lu] [EBP] Parsing toc ncx file: %s\n", millis(), tocNcxItem.c_str());
|
Serial.printf("[%lu] [EBP] Parsing toc ncx file: %s\n", millis(), tocNcxItem.c_str());
|
||||||
|
|
||||||
size_t tocSize;
|
const auto tmpNcxPath = getCachePath() + "/toc.ncx";
|
||||||
if (!getItemSize(tocNcxItem, &tocSize)) {
|
File tempNcxFile = SD.open(tmpNcxPath.c_str(), FILE_WRITE);
|
||||||
Serial.printf("[%lu] [EBP] Could not get size of toc ncx\n", millis());
|
readItemContentsToStream(tocNcxItem, tempNcxFile, 1024);
|
||||||
return false;
|
tempNcxFile.close();
|
||||||
}
|
tempNcxFile = SD.open(tmpNcxPath.c_str(), FILE_READ);
|
||||||
|
const auto ncxSize = tempNcxFile.size();
|
||||||
|
|
||||||
TocNcxParser ncxParser(contentBasePath, tocSize);
|
TocNcxParser ncxParser(contentBasePath, ncxSize);
|
||||||
|
|
||||||
if (!ncxParser.setup()) {
|
if (!ncxParser.setup()) {
|
||||||
Serial.printf("[%lu] [EBP] Could not setup toc ncx parser\n", millis());
|
Serial.printf("[%lu] [EBP] Could not setup toc ncx parser\n", millis());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!readItemContentsToStream(tocNcxItem, ncxParser, 1024)) {
|
const auto ncxBuffer = static_cast<uint8_t*>(malloc(1024));
|
||||||
Serial.printf("[%lu] [EBP] Could not read toc ncx stream\n", millis());
|
if (!ncxBuffer) {
|
||||||
|
Serial.printf("[%lu] [EBP] Could not allocate memory for toc ncx parser\n", millis());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while (tempNcxFile.available()) {
|
||||||
|
const auto readSize = tempNcxFile.read(ncxBuffer, 1024);
|
||||||
|
const auto processedSize = ncxParser.write(ncxBuffer, readSize);
|
||||||
|
|
||||||
|
if (processedSize != readSize) {
|
||||||
|
Serial.printf("[%lu] [EBP] Could not process all toc ncx data\n", millis());
|
||||||
|
free(ncxBuffer);
|
||||||
|
tempNcxFile.close();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(ncxBuffer);
|
||||||
|
tempNcxFile.close();
|
||||||
|
SD.remove(tmpNcxPath.c_str());
|
||||||
|
|
||||||
this->toc = std::move(ncxParser.toc);
|
this->toc = std::move(ncxParser.toc);
|
||||||
|
|
||||||
Serial.printf("[%lu] [EBP] Parsed %d TOC items\n", millis(), this->toc.size());
|
Serial.printf("[%lu] [EBP] Parsed %d TOC items\n", millis(), this->toc.size());
|
||||||
@ -293,7 +311,7 @@ std::string& Epub::getSpineItem(const int spineIndex) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EpubTocEntry& Epub::getTocItem(const int tocTndex) {
|
EpubTocEntry& Epub::getTocItem(const int tocTndex) {
|
||||||
static EpubTocEntry emptyEntry("", "", "", 0);
|
static EpubTocEntry emptyEntry = {};
|
||||||
if (toc.empty()) {
|
if (toc.empty()) {
|
||||||
Serial.printf("[%lu] [EBP] getTocItem called but toc is empty\n", millis());
|
Serial.printf("[%lu] [EBP] getTocItem called but toc is empty\n", millis());
|
||||||
return emptyEntry;
|
return emptyEntry;
|
||||||
|
|||||||
@ -2,12 +2,9 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
class EpubTocEntry {
|
struct EpubTocEntry {
|
||||||
public:
|
|
||||||
std::string title;
|
std::string title;
|
||||||
std::string href;
|
std::string href;
|
||||||
std::string anchor;
|
std::string anchor;
|
||||||
int level;
|
uint8_t level;
|
||||||
EpubTocEntry(std::string title, std::string href, std::string anchor, const int level)
|
|
||||||
: title(std::move(title)), href(std::move(href)), anchor(std::move(anchor)), level(level) {}
|
|
||||||
};
|
};
|
||||||
|
|||||||
@ -155,7 +155,7 @@ void XMLCALL TocNcxParser::endElement(void* userData, const XML_Char* name) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Push to vector
|
// Push to vector
|
||||||
self->toc.emplace_back(self->currentLabel, href, anchor, self->currentDepth);
|
self->toc.push_back({std::move(self->currentLabel), std::move(href), std::move(anchor), self->currentDepth});
|
||||||
|
|
||||||
// Clear them so we don't re-add them if there are weird XML structures
|
// Clear them so we don't re-add them if there are weird XML structures
|
||||||
self->currentLabel.clear();
|
self->currentLabel.clear();
|
||||||
|
|||||||
@ -17,7 +17,7 @@ class TocNcxParser final : public Print {
|
|||||||
|
|
||||||
std::string currentLabel;
|
std::string currentLabel;
|
||||||
std::string currentSrc;
|
std::string currentSrc;
|
||||||
size_t currentDepth = 0;
|
uint8_t currentDepth = 0;
|
||||||
|
|
||||||
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
|
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
|
||||||
static void characterData(void* userData, const XML_Char* s, int len);
|
static void characterData(void* userData, const XML_Char* s, int len);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user