feat: dict based Hyphenation (#305)
## Summary * Adds (optional) Hyphenation for English, French, German, Russian languages ## Additional Context * Included hyphenation dictionaries add approximately 280kb to the flash usage (German alone takes 200kb) * Trie encoded dictionaries are adopted from hypher project (https://github.com/typst/hypher) * Soft hyphens (and other explicit hyphens) take precedence over dict-based hyphenation. Overall, the hyphenation rules are quite aggressive, as I believe it makes more sense on our smaller screen. --------- Co-authored-by: Dave Allie <dave@daveallie.com>
This commit is contained in:
committed by
GitHub
parent
5fef99c641
commit
8824c87490
@@ -4,12 +4,14 @@
|
||||
#include <Serialization.h>
|
||||
|
||||
#include "Page.h"
|
||||
#include "hyphenation/Hyphenator.h"
|
||||
#include "parsers/ChapterHtmlSlimParser.h"
|
||||
|
||||
namespace {
|
||||
constexpr uint8_t SECTION_FILE_VERSION = 9;
|
||||
constexpr uint8_t SECTION_FILE_VERSION = 10;
|
||||
constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) +
|
||||
sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t);
|
||||
sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) +
|
||||
sizeof(uint32_t);
|
||||
} // namespace
|
||||
|
||||
uint32_t Section::onPageComplete(std::unique_ptr<Page> page) {
|
||||
@@ -31,14 +33,15 @@ uint32_t Section::onPageComplete(std::unique_ptr<Page> page) {
|
||||
|
||||
void Section::writeSectionFileHeader(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
|
||||
const uint8_t paragraphAlignment, const uint16_t viewportWidth,
|
||||
const uint16_t viewportHeight) {
|
||||
const uint16_t viewportHeight, const bool hyphenationEnabled) {
|
||||
if (!file) {
|
||||
Serial.printf("[%lu] [SCT] File not open for writing header\n", millis());
|
||||
return;
|
||||
}
|
||||
static_assert(HEADER_SIZE == sizeof(SECTION_FILE_VERSION) + sizeof(fontId) + sizeof(lineCompression) +
|
||||
sizeof(extraParagraphSpacing) + sizeof(paragraphAlignment) + sizeof(viewportWidth) +
|
||||
sizeof(viewportHeight) + sizeof(pageCount) + sizeof(uint32_t),
|
||||
sizeof(viewportHeight) + sizeof(pageCount) + sizeof(hyphenationEnabled) +
|
||||
sizeof(uint32_t),
|
||||
"Header size mismatch");
|
||||
serialization::writePod(file, SECTION_FILE_VERSION);
|
||||
serialization::writePod(file, fontId);
|
||||
@@ -47,13 +50,14 @@ void Section::writeSectionFileHeader(const int fontId, const float lineCompressi
|
||||
serialization::writePod(file, paragraphAlignment);
|
||||
serialization::writePod(file, viewportWidth);
|
||||
serialization::writePod(file, viewportHeight);
|
||||
serialization::writePod(file, hyphenationEnabled);
|
||||
serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0 when written)
|
||||
serialization::writePod(file, static_cast<uint32_t>(0)); // Placeholder for LUT offset
|
||||
}
|
||||
|
||||
bool Section::loadSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
|
||||
const uint8_t paragraphAlignment, const uint16_t viewportWidth,
|
||||
const uint16_t viewportHeight) {
|
||||
const uint16_t viewportHeight, const bool hyphenationEnabled) {
|
||||
if (!SdMan.openFileForRead("SCT", filePath, file)) {
|
||||
return false;
|
||||
}
|
||||
@@ -74,16 +78,19 @@ bool Section::loadSectionFile(const int fontId, const float lineCompression, con
|
||||
float fileLineCompression;
|
||||
bool fileExtraParagraphSpacing;
|
||||
uint8_t fileParagraphAlignment;
|
||||
bool fileHyphenationEnabled;
|
||||
serialization::readPod(file, fileFontId);
|
||||
serialization::readPod(file, fileLineCompression);
|
||||
serialization::readPod(file, fileExtraParagraphSpacing);
|
||||
serialization::readPod(file, fileParagraphAlignment);
|
||||
serialization::readPod(file, fileViewportWidth);
|
||||
serialization::readPod(file, fileViewportHeight);
|
||||
serialization::readPod(file, fileHyphenationEnabled);
|
||||
|
||||
if (fontId != fileFontId || lineCompression != fileLineCompression ||
|
||||
extraParagraphSpacing != fileExtraParagraphSpacing || paragraphAlignment != fileParagraphAlignment ||
|
||||
viewportWidth != fileViewportWidth || viewportHeight != fileViewportHeight) {
|
||||
viewportWidth != fileViewportWidth || viewportHeight != fileViewportHeight ||
|
||||
hyphenationEnabled != fileHyphenationEnabled) {
|
||||
file.close();
|
||||
Serial.printf("[%lu] [SCT] Deserialization failed: Parameters do not match\n", millis());
|
||||
clearCache();
|
||||
@@ -115,7 +122,8 @@ bool Section::clearCache() const {
|
||||
|
||||
bool Section::createSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
|
||||
const uint8_t paragraphAlignment, const uint16_t viewportWidth,
|
||||
const uint16_t viewportHeight, const std::function<void()>& progressSetupFn,
|
||||
const uint16_t viewportHeight, const bool hyphenationEnabled,
|
||||
const std::function<void()>& progressSetupFn,
|
||||
const std::function<void(int)>& progressFn) {
|
||||
constexpr uint32_t MIN_SIZE_FOR_PROGRESS = 50 * 1024; // 50KB
|
||||
const auto localPath = epub->getSpineItem(spineIndex).href;
|
||||
@@ -172,14 +180,15 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
|
||||
return false;
|
||||
}
|
||||
writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth,
|
||||
viewportHeight);
|
||||
viewportHeight, hyphenationEnabled);
|
||||
std::vector<uint32_t> lut = {};
|
||||
|
||||
ChapterHtmlSlimParser visitor(
|
||||
tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth,
|
||||
viewportHeight,
|
||||
viewportHeight, hyphenationEnabled,
|
||||
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); },
|
||||
progressFn);
|
||||
Hyphenator::setPreferredLanguage(epub->getLanguage());
|
||||
success = visitor.parseAndBuildPages();
|
||||
|
||||
SdMan.remove(tmpHtmlPath.c_str());
|
||||
|
||||
Reference in New Issue
Block a user