format fix

2025-12-26 05:12:26 +05:00 · 2025-12-26 05:12:26 +05:00 · 23183a6270
commit 23183a6270
parent 3cf52d8bd1
3 changed files with 44 additions and 29 deletions
--- a/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp
+++ b/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp
@ -1,5 +1,4 @@
 #include "EnglishHyphenator.h"
 #include "HyphenationLiterals.h"
 #include <algorithm>
 #include <array>
@ -7,6 +6,8 @@
 #include <string>
 #include <vector>
 #include "HyphenationLiterals.h"
 namespace {
 char lowerLatinChar(const uint32_t cp) {
@ -50,18 +51,15 @@ bool isEnglishFricativeChar(const char c) {
 using LatinLiteral = HyphenLiteralT<char>;
-constexpr std::array<LatinLiteral, 20> ENGLISH_PREFIXES = {{{"anti", 4},  {"auto", 4}, {"counter", 7}, {"de", 2},
+constexpr std::array<LatinLiteral, 20> ENGLISH_PREFIXES = {
-                                                            {"dis", 3},   {"hyper", 5}, {"inter", 5},   {"micro", 5},
+    {{"anti", 4},  {"auto", 4}, {"counter", 7}, {"de", 2},    {"dis", 3},   {"hyper", 5}, {"inter", 5},
-                                                            {"mis", 3},   {"mono", 4},  {"multi", 5},   {"non", 3},
+     {"micro", 5}, {"mis", 3},  {"mono", 4},    {"multi", 5}, {"non", 3},   {"over", 4},  {"post", 4},
-                                                            {"over", 4},  {"post", 4},  {"pre", 3},     {"pro", 3},
+     {"pre", 3},   {"pro", 3},  {"re", 2},      {"sub", 3},   {"super", 5}, {"trans", 5}}};
                                                            {"re", 2},    {"sub", 3},   {"super", 5},   {"trans", 5}}};
-constexpr std::array<LatinLiteral, 24> ENGLISH_SUFFIXES = {{{"able", 4}, {"ible", 4}, {"ing", 3},  {"ings", 4},
+constexpr std::array<LatinLiteral, 24> ENGLISH_SUFFIXES = {
-                                                            {"ed", 2},   {"er", 2},   {"ers", 3},  {"est", 3},
+    {{"able", 4}, {"ible", 4}, {"ing", 3},  {"ings", 4},   {"ed", 2},    {"er", 2},   {"ers", 3},   {"est", 3},
-                                                            {"ful", 3},  {"hood", 4}, {"less", 4}, {"lessly", 6},
+     {"ful", 3},  {"hood", 4}, {"less", 4}, {"lessly", 6}, {"ly", 2},    {"ment", 4}, {"ments", 5}, {"ness", 4},
-                                                            {"ly", 2},   {"ment", 4}, {"ments", 5},{"ness", 4},
+     {"ous", 3},  {"tion", 4}, {"sion", 4}, {"ward", 4},   {"wards", 5}, {"ship", 4}, {"ships", 5}, {"y", 1}}};
                                                            {"ous", 3},  {"tion", 4}, {"sion", 4}, {"ward", 4},
                                                            {"wards", 5},{"ship", 4}, {"ships", 5},{"y", 1}}};
 bool nextToApostrophe(const std::vector<CodepointInfo>& cps, size_t index);
@ -111,8 +109,9 @@ bool englishBreakAllowed(const std::vector<CodepointInfo>& cps, const size_t bre
 void appendMorphologyBreaks(const std::vector<CodepointInfo>& cps, const std::string& lowerWord,
                            std::vector<size_t>& indexes) {
-  appendLiteralBreaks(lowerWord, ENGLISH_PREFIXES, ENGLISH_SUFFIXES,
+  appendLiteralBreaks(
-                      [&](const size_t breakIndex) { return englishBreakAllowed(cps, breakIndex); }, indexes);
+      lowerWord, ENGLISH_PREFIXES, ENGLISH_SUFFIXES,
      [&](const size_t breakIndex) { return englishBreakAllowed(cps, breakIndex); }, indexes);
 }
 struct CharPair {
@ -313,8 +312,7 @@ std::vector<size_t> englishBreakIndexes(const std::vector<CodepointInfo>& cps) {
    const size_t rightVowel = vowelPositions[v + 1];
    if (rightVowel - leftVowel == 1) {
-      if (!isEnglishDiphthong(cps[leftVowel].value, cps[rightVowel].value) &&
+      if (!isEnglishDiphthong(cps[leftVowel].value, cps[rightVowel].value) && englishBreakAllowed(cps, rightVowel)) {
          englishBreakAllowed(cps, rightVowel)) {
        indexes.push_back(rightVowel);
      }
      continue;
--- a/lib/Epub/Epub/hyphenation/HyphenationLiterals.h
+++ b/lib/Epub/Epub/hyphenation/HyphenationLiterals.h
@ -30,8 +30,7 @@ bool matchesLiteralAt(const WordContainer& word, const size_t start, const Liter
 template <typename WordContainer, typename PrefixContainer, typename SuffixContainer, typename BreakAllowedFn>
 void appendLiteralBreaks(const WordContainer& lowerWord, const PrefixContainer& prefixes,
-                         const SuffixContainer& suffixes, BreakAllowedFn&& breakAllowed,
+                         const SuffixContainer& suffixes, BreakAllowedFn&& breakAllowed, std::vector<size_t>& indexes) {
                         std::vector<size_t>& indexes) {
  const size_t length = lowerWord.size();
  const auto tryPush = [&](const size_t breakIndex) {
--- a/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp
+++ b/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp
@ -1,11 +1,12 @@
 #include "RussianHyphenator.h"
 #include "HyphenationLiterals.h"
 #include <algorithm>
 #include <array>
 #include <limits>
 #include <vector>
 #include "HyphenationLiterals.h"
 namespace {
 using CyrillicLiteral = HyphenLiteralT<uint32_t>;
@ -23,10 +24,18 @@ constexpr uint32_t PFX_SAMO[4] = {0x0441, 0x0430, 0x043C, 0x043E};
 constexpr uint32_t PFX_OBO[3] = {0x043E, 0x0431, 0x043E};
 constexpr uint32_t PFX_PROTIV[6] = {0x043F, 0x0440, 0x043E, 0x0442, 0x0438, 0x0432};
-constexpr std::array<CyrillicLiteral, 12> RUSSIAN_PREFIXES = {{{PFX_BEZ, 3},   {PFX_RAZ, 3},  {PFX_POD, 3},
+constexpr std::array<CyrillicLiteral, 12> RUSSIAN_PREFIXES = {{{PFX_BEZ, 3},
-                                                               {PFX_NAD, 3},   {PFX_PERE, 4}, {PFX_SVERH, 5},
+                                                               {PFX_RAZ, 3},
-                                                               {PFX_MEZH, 3},  {PFX_SUPER, 5},{PFX_PRED, 4},
+                                                               {PFX_POD, 3},
-                                                               {PFX_SAMO, 4},  {PFX_OBO, 3},  {PFX_PROTIV, 6}}};
+                                                               {PFX_NAD, 3},
                                                               {PFX_PERE, 4},
                                                               {PFX_SVERH, 5},
                                                               {PFX_MEZH, 3},
                                                               {PFX_SUPER, 5},
                                                               {PFX_PRED, 4},
                                                               {PFX_SAMO, 4},
                                                               {PFX_OBO, 3},
                                                               {PFX_PROTIV, 6}}};
 constexpr uint32_t SFX_NOST[4] = {0x043D, 0x043E, 0x0441, 0x0442};
 constexpr uint32_t SFX_STVO[4] = {0x0441, 0x0442, 0x0432, 0x043E};
@ -41,10 +50,18 @@ constexpr uint32_t SFX_ISM[3] = {0x0438, 0x0437, 0x043C};
 constexpr uint32_t SFX_LIV[5] = {0x043B, 0x0438, 0x0432, 0x044B, 0x0439};
 constexpr uint32_t SFX_OST[4] = {0x043E, 0x0441, 0x0442, 0x044C};
-constexpr std::array<CyrillicLiteral, 12> RUSSIAN_SUFFIXES = {{{SFX_NOST, 4}, {SFX_STVO, 4}, {SFX_ENIE, 4},
+constexpr std::array<CyrillicLiteral, 12> RUSSIAN_SUFFIXES = {{{SFX_NOST, 4},
-                                                               {SFX_ATION, 4}, {SFX_CHIK, 3}, {SFX_NIK, 3},
+                                                               {SFX_STVO, 4},
-                                                               {SFX_TEL, 4},   {SFX_SKII, 4}, {SFX_AL, 6},
+                                                               {SFX_ENIE, 4},
-                                                               {SFX_ISM, 3},   {SFX_LIV, 5}, {SFX_OST, 4}}};
+                                                               {SFX_ATION, 4},
                                                               {SFX_CHIK, 3},
                                                               {SFX_NIK, 3},
                                                               {SFX_TEL, 4},
                                                               {SFX_SKII, 4},
                                                               {SFX_AL, 6},
                                                               {SFX_ISM, 3},
                                                               {SFX_LIV, 5},
                                                               {SFX_OST, 4}}};
 std::vector<uint32_t> lowercaseCyrillicWord(const std::vector<CodepointInfo>& cps) {
  std::vector<uint32_t> lower;
@ -308,8 +325,9 @@ bool nextToSoftSign(const std::vector<CodepointInfo>& cps, const size_t index) {
 void appendMorphologyBreaks(const std::vector<CodepointInfo>& cps, const std::vector<uint32_t>& lowerWord,
                            std::vector<size_t>& indexes) {
-  appendLiteralBreaks(lowerWord, RUSSIAN_PREFIXES, RUSSIAN_SUFFIXES,
+  appendLiteralBreaks(
-                      [&](const size_t breakIndex) { return russianBreakAllowed(cps, breakIndex); }, indexes);
+      lowerWord, RUSSIAN_PREFIXES, RUSSIAN_SUFFIXES,
      [&](const size_t breakIndex) { return russianBreakAllowed(cps, breakIndex); }, indexes);
 }
 // Produces syllable break indexes tailored to Russian phonotactics.