Remove HTML entity parsing (#274)
## Summary * Remove HTML entity parsing * This has been completely useless since the introduction of expat * expat tries to parse all entities in the document, but only knows of HTML ones * Parsing will never end with HTML entities in the text, so the additional step to parse them that we had went completely unused * We should figure out the best way to parse that content in the future, but for now remove that module as it generates a lot of heap allocations with its map and strings
This commit is contained in:
@@ -6,7 +6,6 @@
|
||||
#include <expat.h>
|
||||
|
||||
#include "../Page.h"
|
||||
#include "../htmlEntities.h"
|
||||
|
||||
const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"};
|
||||
constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]);
|
||||
@@ -130,7 +129,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
||||
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
|
||||
if (self->partWordBufferIndex > 0) {
|
||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||
self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
|
||||
self->partWordBufferIndex = 0;
|
||||
}
|
||||
// Skip the whitespace char
|
||||
@@ -155,7 +154,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
||||
// If we're about to run out of space, then cut the word off and start a new one
|
||||
if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
|
||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||
self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
|
||||
self->partWordBufferIndex = 0;
|
||||
}
|
||||
|
||||
@@ -197,7 +196,7 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
|
||||
}
|
||||
|
||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||
self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
|
||||
self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
|
||||
self->partWordBufferIndex = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user