diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index 9365df20..7c0b3c12 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -10,7 +10,7 @@ #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 18; +constexpr uint8_t SECTION_FILE_VERSION = 19; constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t); diff --git a/lib/Epub/Epub/hyphenation/Hyphenator.cpp b/lib/Epub/Epub/hyphenation/Hyphenator.cpp index 4ae5307e..ad5de454 100644 --- a/lib/Epub/Epub/hyphenation/Hyphenator.cpp +++ b/lib/Epub/Epub/hyphenation/Hyphenator.cpp @@ -12,11 +12,24 @@ const LanguageHyphenator* Hyphenator::cachedHyphenator_ = nullptr; namespace { -// Maps a BCP-47 language tag to a language-specific hyphenator. +// Normalize ISO 639-2 (three-letter) codes to ISO 639-1 (two-letter) codes used by the +// hyphenation registry. EPUBs may use either form in their dc:language metadata (e.g. +// "eng" instead of "en"). Both the bibliographic ("fre"/"ger") and terminological +// ("fra"/"deu") ISO 639-2 variants are mapped. +struct Iso639Mapping { + const char* iso639_2; + const char* iso639_1; +}; +static constexpr Iso639Mapping kIso639Mappings[] = { + {"eng", "en"}, {"fra", "fr"}, {"fre", "fr"}, {"deu", "de"}, {"ger", "de"}, + {"rus", "ru"}, {"spa", "es"}, {"ita", "it"}, {"ukr", "uk"}, +}; + +// Maps a BCP-47 or ISO 639-2 language tag to a language-specific hyphenator. const LanguageHyphenator* hyphenatorForLanguage(const std::string& langTag) { if (langTag.empty()) return nullptr; - // Extract primary subtag and normalize to lowercase (e.g., "en-US" -> "en"). + // Extract primary subtag and normalize to lowercase (e.g., "en-US" -> "en", "ENG" -> "en"). std::string primary; primary.reserve(langTag.size()); for (char c : langTag) { @@ -26,6 +39,14 @@ const LanguageHyphenator* hyphenatorForLanguage(const std::string& langTag) { } if (primary.empty()) return nullptr; + // Normalize ISO 639-2 three-letter codes to two-letter equivalents. + for (const auto& mapping : kIso639Mappings) { + if (primary == mapping.iso639_2) { + primary = mapping.iso639_1; + break; + } + } + return getLanguageHyphenatorForPrimaryTag(primary); }