diff --git a/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp b/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp index bbda252..3bb15e6 100644 --- a/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp +++ b/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp @@ -1,5 +1,4 @@ #include "EnglishHyphenator.h" -#include "HyphenationLiterals.h" #include #include @@ -7,6 +6,8 @@ #include #include +#include "HyphenationLiterals.h" + namespace { char lowerLatinChar(const uint32_t cp) { @@ -50,18 +51,15 @@ bool isEnglishFricativeChar(const char c) { using LatinLiteral = HyphenLiteralT; -constexpr std::array ENGLISH_PREFIXES = {{{"anti", 4}, {"auto", 4}, {"counter", 7}, {"de", 2}, - {"dis", 3}, {"hyper", 5}, {"inter", 5}, {"micro", 5}, - {"mis", 3}, {"mono", 4}, {"multi", 5}, {"non", 3}, - {"over", 4}, {"post", 4}, {"pre", 3}, {"pro", 3}, - {"re", 2}, {"sub", 3}, {"super", 5}, {"trans", 5}}}; +constexpr std::array ENGLISH_PREFIXES = { + {{"anti", 4}, {"auto", 4}, {"counter", 7}, {"de", 2}, {"dis", 3}, {"hyper", 5}, {"inter", 5}, + {"micro", 5}, {"mis", 3}, {"mono", 4}, {"multi", 5}, {"non", 3}, {"over", 4}, {"post", 4}, + {"pre", 3}, {"pro", 3}, {"re", 2}, {"sub", 3}, {"super", 5}, {"trans", 5}}}; -constexpr std::array ENGLISH_SUFFIXES = {{{"able", 4}, {"ible", 4}, {"ing", 3}, {"ings", 4}, - {"ed", 2}, {"er", 2}, {"ers", 3}, {"est", 3}, - {"ful", 3}, {"hood", 4}, {"less", 4}, {"lessly", 6}, - {"ly", 2}, {"ment", 4}, {"ments", 5},{"ness", 4}, - {"ous", 3}, {"tion", 4}, {"sion", 4}, {"ward", 4}, - {"wards", 5},{"ship", 4}, {"ships", 5},{"y", 1}}}; +constexpr std::array ENGLISH_SUFFIXES = { + {{"able", 4}, {"ible", 4}, {"ing", 3}, {"ings", 4}, {"ed", 2}, {"er", 2}, {"ers", 3}, {"est", 3}, + {"ful", 3}, {"hood", 4}, {"less", 4}, {"lessly", 6}, {"ly", 2}, {"ment", 4}, {"ments", 5}, {"ness", 4}, + {"ous", 3}, {"tion", 4}, {"sion", 4}, {"ward", 4}, {"wards", 5}, {"ship", 4}, {"ships", 5}, {"y", 1}}}; bool nextToApostrophe(const std::vector& cps, size_t index); @@ -111,8 +109,9 @@ bool englishBreakAllowed(const std::vector& cps, const size_t bre void appendMorphologyBreaks(const std::vector& cps, const std::string& lowerWord, std::vector& indexes) { - appendLiteralBreaks(lowerWord, ENGLISH_PREFIXES, ENGLISH_SUFFIXES, - [&](const size_t breakIndex) { return englishBreakAllowed(cps, breakIndex); }, indexes); + appendLiteralBreaks( + lowerWord, ENGLISH_PREFIXES, ENGLISH_SUFFIXES, + [&](const size_t breakIndex) { return englishBreakAllowed(cps, breakIndex); }, indexes); } struct CharPair { @@ -313,8 +312,7 @@ std::vector englishBreakIndexes(const std::vector& cps) { const size_t rightVowel = vowelPositions[v + 1]; if (rightVowel - leftVowel == 1) { - if (!isEnglishDiphthong(cps[leftVowel].value, cps[rightVowel].value) && - englishBreakAllowed(cps, rightVowel)) { + if (!isEnglishDiphthong(cps[leftVowel].value, cps[rightVowel].value) && englishBreakAllowed(cps, rightVowel)) { indexes.push_back(rightVowel); } continue; diff --git a/lib/Epub/Epub/hyphenation/HyphenationLiterals.h b/lib/Epub/Epub/hyphenation/HyphenationLiterals.h index 5273ac8..9cd1120 100644 --- a/lib/Epub/Epub/hyphenation/HyphenationLiterals.h +++ b/lib/Epub/Epub/hyphenation/HyphenationLiterals.h @@ -30,8 +30,7 @@ bool matchesLiteralAt(const WordContainer& word, const size_t start, const Liter template void appendLiteralBreaks(const WordContainer& lowerWord, const PrefixContainer& prefixes, - const SuffixContainer& suffixes, BreakAllowedFn&& breakAllowed, - std::vector& indexes) { + const SuffixContainer& suffixes, BreakAllowedFn&& breakAllowed, std::vector& indexes) { const size_t length = lowerWord.size(); const auto tryPush = [&](const size_t breakIndex) { diff --git a/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp b/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp index 8807dfe..2efb663 100644 --- a/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp +++ b/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp @@ -1,11 +1,12 @@ #include "RussianHyphenator.h" -#include "HyphenationLiterals.h" #include #include #include #include +#include "HyphenationLiterals.h" + namespace { using CyrillicLiteral = HyphenLiteralT; @@ -23,10 +24,18 @@ constexpr uint32_t PFX_SAMO[4] = {0x0441, 0x0430, 0x043C, 0x043E}; constexpr uint32_t PFX_OBO[3] = {0x043E, 0x0431, 0x043E}; constexpr uint32_t PFX_PROTIV[6] = {0x043F, 0x0440, 0x043E, 0x0442, 0x0438, 0x0432}; -constexpr std::array RUSSIAN_PREFIXES = {{{PFX_BEZ, 3}, {PFX_RAZ, 3}, {PFX_POD, 3}, - {PFX_NAD, 3}, {PFX_PERE, 4}, {PFX_SVERH, 5}, - {PFX_MEZH, 3}, {PFX_SUPER, 5},{PFX_PRED, 4}, - {PFX_SAMO, 4}, {PFX_OBO, 3}, {PFX_PROTIV, 6}}}; +constexpr std::array RUSSIAN_PREFIXES = {{{PFX_BEZ, 3}, + {PFX_RAZ, 3}, + {PFX_POD, 3}, + {PFX_NAD, 3}, + {PFX_PERE, 4}, + {PFX_SVERH, 5}, + {PFX_MEZH, 3}, + {PFX_SUPER, 5}, + {PFX_PRED, 4}, + {PFX_SAMO, 4}, + {PFX_OBO, 3}, + {PFX_PROTIV, 6}}}; constexpr uint32_t SFX_NOST[4] = {0x043D, 0x043E, 0x0441, 0x0442}; constexpr uint32_t SFX_STVO[4] = {0x0441, 0x0442, 0x0432, 0x043E}; @@ -41,10 +50,18 @@ constexpr uint32_t SFX_ISM[3] = {0x0438, 0x0437, 0x043C}; constexpr uint32_t SFX_LIV[5] = {0x043B, 0x0438, 0x0432, 0x044B, 0x0439}; constexpr uint32_t SFX_OST[4] = {0x043E, 0x0441, 0x0442, 0x044C}; -constexpr std::array RUSSIAN_SUFFIXES = {{{SFX_NOST, 4}, {SFX_STVO, 4}, {SFX_ENIE, 4}, - {SFX_ATION, 4}, {SFX_CHIK, 3}, {SFX_NIK, 3}, - {SFX_TEL, 4}, {SFX_SKII, 4}, {SFX_AL, 6}, - {SFX_ISM, 3}, {SFX_LIV, 5}, {SFX_OST, 4}}}; +constexpr std::array RUSSIAN_SUFFIXES = {{{SFX_NOST, 4}, + {SFX_STVO, 4}, + {SFX_ENIE, 4}, + {SFX_ATION, 4}, + {SFX_CHIK, 3}, + {SFX_NIK, 3}, + {SFX_TEL, 4}, + {SFX_SKII, 4}, + {SFX_AL, 6}, + {SFX_ISM, 3}, + {SFX_LIV, 5}, + {SFX_OST, 4}}}; std::vector lowercaseCyrillicWord(const std::vector& cps) { std::vector lower; @@ -308,8 +325,9 @@ bool nextToSoftSign(const std::vector& cps, const size_t index) { void appendMorphologyBreaks(const std::vector& cps, const std::vector& lowerWord, std::vector& indexes) { - appendLiteralBreaks(lowerWord, RUSSIAN_PREFIXES, RUSSIAN_SUFFIXES, - [&](const size_t breakIndex) { return russianBreakAllowed(cps, breakIndex); }, indexes); + appendLiteralBreaks( + lowerWord, RUSSIAN_PREFIXES, RUSSIAN_SUFFIXES, + [&](const size_t breakIndex) { return russianBreakAllowed(cps, breakIndex); }, indexes); } // Produces syllable break indexes tailored to Russian phonotactics.