Spaces:
Sleeping
Sleeping
"""Hyphenation module""" | |
import string | |
from hyphen import Hyphenator, dictools | |
from modules.console_colors import ( | |
ULTRASINGER_HEAD, | |
blue_highlighted, | |
) | |
# PyHyphen tries to retrieve dictionaries for download 'https://cgit.freedesktop.org/libreoffice/dictionaries/plain/' | |
# Updated PyHyphen dictools Languages, so they can be installed | |
LANGUAGES = [ | |
"af_ZA", | |
"an_ES", | |
"ar", | |
"be_BY", | |
"bg_BG", | |
"bn_BD", | |
"bo", | |
"br_FR", | |
"bs_BA", | |
"ca", | |
"ckb", | |
"cs_CZ", | |
"da_DK", | |
"de", | |
"el_GR", | |
"en", | |
"eo", | |
"es", | |
"et_EE", | |
"fa_IR", | |
"fr_FR", | |
"gd_GB", | |
"gl", | |
"gu_IN", | |
"gug", | |
"he_IL", | |
"hi_IN", | |
"hr_HR", | |
"hu_HU", | |
"id", | |
"is", | |
"it_IT", | |
"kmr_Latn", | |
"ko_KR", | |
"lo_LA", | |
"lt_LT", | |
"lv_LV", | |
"mn_MN", | |
"ne_NP", | |
"nl_NL", | |
"no", | |
"oc_FR", | |
"pl_PL", | |
"pt_BR", | |
"pt_PT", | |
"ro", | |
"ru_RU", | |
"si_LK", | |
"sk_SK", | |
"sl_SI", | |
"sq_AL", | |
"sr", | |
"sv_SE", | |
"sw_TZ", | |
"te_IN", | |
"th_TH", | |
"tr_TR", | |
"uk_UA", | |
"vi", | |
"zu_ZA", | |
] | |
def language_check(language="en") -> str | None: | |
"""Check if language is supported""" | |
lang_region = None | |
installed = dictools.list_installed() | |
installed_region_keys = [i for i in installed if i.startswith(language) and "_" in i] | |
try: | |
# Try to find installed language with region prediction | |
lang_region = next(i for i in installed_region_keys if i == f"{language}_{language.upper()}") | |
except StopIteration: | |
if installed_region_keys: | |
# Take first installed region language | |
lang_region = installed_region_keys[0] | |
else: | |
# Take downloadable language key | |
downloadable_key = [i for i in LANGUAGES if i.startswith(language)] | |
downloadable_folder_key = [i for i in downloadable_key if i == language] | |
if downloadable_folder_key: | |
lang_region = downloadable_key[0] | |
else: | |
try: | |
# Try to find downloadable language with region prediction | |
lang_region = next(i for i in downloadable_key if i == f"{language}_{language.upper()}") | |
except StopIteration: | |
if downloadable_key: | |
# Take first installed region language | |
lang_region = downloadable_key[0] | |
if lang_region is None: | |
return None | |
print( | |
f"{ULTRASINGER_HEAD} Hyphenate using language code: {blue_highlighted(lang_region)}" | |
) | |
return lang_region | |
def contains_punctuation(word: str) -> bool: | |
"""Check if word contains punctuation""" | |
return any(elem in word for elem in string.punctuation) | |
def clean_word(word: str): | |
"""Remove punctuation from word""" | |
cleaned_string = "" | |
removed_indices = [] | |
removed_symbols = [] | |
for i, char in enumerate(word): | |
if char not in string.punctuation and char not in " ": | |
cleaned_string += char | |
else: | |
removed_indices.append(i) | |
removed_symbols.append(char) | |
return cleaned_string, removed_indices, removed_symbols | |
def insert_removed_symbols(separated_array, removed_indices, symbols): | |
"""Insert symbols into the syllables""" | |
result = [] | |
symbol_index = 0 | |
i = 0 | |
# Add removed symbols to the syllables | |
for syllable in separated_array: | |
tmp = "" | |
for char in syllable: | |
if i in removed_indices: | |
tmp += symbols[symbol_index] | |
symbol_index += 1 | |
i += 1 | |
tmp += char | |
i += 1 | |
result.append(tmp) | |
# Add remaining symbols to the last syllable | |
if symbol_index < len(symbols): | |
tmp = result[-1] | |
for i in range(symbol_index, len(symbols)): | |
tmp += symbols[i] | |
result[-1] = tmp | |
return result | |
def create_hyphenator(lang_region: str) -> Hyphenator: | |
"""Create hyphenator""" | |
hyphenator = Hyphenator(lang_region) | |
return hyphenator | |
def hyphenation(word: str, hyphenator: Hyphenator) -> list[str] | None: | |
"""Hyphenate word""" | |
cleaned_string, removed_indices, removed_symbols = clean_word(word) | |
# Hyphenation of word longer than 100 characters throws exception | |
if len(cleaned_string) > 100: | |
return None | |
syllabus = hyphenator.syllables(cleaned_string) | |
length = len(syllabus) | |
if length > 1: | |
hyphen = [] | |
for i in range(length): | |
hyphen.append(syllabus[i]) | |
hyphen = insert_removed_symbols(hyphen, removed_indices, removed_symbols) | |
else: | |
hyphen = None | |
return hyphen | |