File size: 4,458 Bytes
80d8416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""Hyphenation module"""

import string

from hyphen import Hyphenator, dictools

from modules.console_colors import (
    ULTRASINGER_HEAD,
    blue_highlighted,
)

# PyHyphen tries to retrieve dictionaries for download 'https://cgit.freedesktop.org/libreoffice/dictionaries/plain/'
# Updated PyHyphen dictools Languages, so they can be installed
LANGUAGES = [
"af_ZA",
"an_ES",
"ar",
"be_BY",
"bg_BG",
"bn_BD",
"bo",
"br_FR",
"bs_BA",
"ca",
"ckb",
"cs_CZ",
"da_DK",
"de",
"el_GR",
"en",
"eo",
"es",
"et_EE",
"fa_IR",
"fr_FR",
"gd_GB",
"gl",
"gu_IN",
"gug",
"he_IL",
"hi_IN",
"hr_HR",
"hu_HU",
"id",
"is",
"it_IT",
"kmr_Latn",
"ko_KR",
"lo_LA",
"lt_LT",
"lv_LV",
"mn_MN",
"ne_NP",
"nl_NL",
"no",
"oc_FR",
"pl_PL",
"pt_BR",
"pt_PT",
"ro",
"ru_RU",
"si_LK",
"sk_SK",
"sl_SI",
"sq_AL",
"sr",
"sv_SE",
"sw_TZ",
"te_IN",
"th_TH",
"tr_TR",
"uk_UA",
"vi",
"zu_ZA",
]

def language_check(language="en") -> str | None:
    """Check if language is supported"""

    lang_region = None
    installed = dictools.list_installed()
    installed_region_keys = [i for i in installed if i.startswith(language) and "_" in i]
    try:
        # Try to find installed language with region prediction
        lang_region = next(i for i in installed_region_keys if i == f"{language}_{language.upper()}")
    except StopIteration:
        if installed_region_keys:
            # Take first installed region language
            lang_region = installed_region_keys[0]
        else:
            # Take downloadable language key
            downloadable_key = [i for i in LANGUAGES if i.startswith(language)]
            downloadable_folder_key = [i for i in downloadable_key if i == language]
            if downloadable_folder_key:
                lang_region = downloadable_key[0]
            else:
                try:
                    # Try to find downloadable language with region prediction
                    lang_region = next(i for i in downloadable_key if i == f"{language}_{language.upper()}")
                except StopIteration:
                    if downloadable_key:
                        # Take first installed region language
                        lang_region = downloadable_key[0]

    if lang_region is None:
        return None

    print(
        f"{ULTRASINGER_HEAD} Hyphenate using language code: {blue_highlighted(lang_region)}"
    )
    return lang_region


def contains_punctuation(word: str) -> bool:
    """Check if word contains punctuation"""

    return any(elem in word for elem in string.punctuation)


def clean_word(word: str):
    """Remove punctuation from word"""
    cleaned_string = ""
    removed_indices = []
    removed_symbols = []
    for i, char in enumerate(word):
        if char not in string.punctuation and char not in " ":
            cleaned_string += char
        else:
            removed_indices.append(i)
            removed_symbols.append(char)
    return cleaned_string, removed_indices, removed_symbols


def insert_removed_symbols(separated_array, removed_indices, symbols):
    """Insert symbols into the syllables"""
    result = []
    symbol_index = 0
    i = 0

    # Add removed symbols to the syllables
    for syllable in separated_array:
        tmp = ""
        for char in syllable:
            if i in removed_indices:
                tmp += symbols[symbol_index]
                symbol_index += 1
                i += 1
            tmp += char
            i += 1
        result.append(tmp)

    # Add remaining symbols to the last syllable
    if symbol_index < len(symbols):
        tmp = result[-1]
        for i in range(symbol_index, len(symbols)):
            tmp += symbols[i]
        result[-1] = tmp

    return result


def create_hyphenator(lang_region: str) -> Hyphenator:
    """Create hyphenator"""
    hyphenator = Hyphenator(lang_region)
    return hyphenator


def hyphenation(word: str, hyphenator: Hyphenator) -> list[str] | None:
    """Hyphenate word"""

    cleaned_string, removed_indices, removed_symbols = clean_word(word)

    # Hyphenation of word longer than 100 characters throws exception
    if len(cleaned_string) > 100:
        return None

    syllabus = hyphenator.syllables(cleaned_string)

    length = len(syllabus)
    if length > 1:
        hyphen = []
        for i in range(length):
            hyphen.append(syllabus[i])
        hyphen = insert_removed_symbols(hyphen, removed_indices, removed_symbols)
    else:
        hyphen = None

    return hyphen