"""Hyphenation module""" import string from hyphen import Hyphenator, dictools from modules.console_colors import ( ULTRASINGER_HEAD, blue_highlighted, ) # PyHyphen tries to retrieve dictionaries for download 'https://cgit.freedesktop.org/libreoffice/dictionaries/plain/' # Updated PyHyphen dictools Languages, so they can be installed LANGUAGES = [ "af_ZA", "an_ES", "ar", "be_BY", "bg_BG", "bn_BD", "bo", "br_FR", "bs_BA", "ca", "ckb", "cs_CZ", "da_DK", "de", "el_GR", "en", "eo", "es", "et_EE", "fa_IR", "fr_FR", "gd_GB", "gl", "gu_IN", "gug", "he_IL", "hi_IN", "hr_HR", "hu_HU", "id", "is", "it_IT", "kmr_Latn", "ko_KR", "lo_LA", "lt_LT", "lv_LV", "mn_MN", "ne_NP", "nl_NL", "no", "oc_FR", "pl_PL", "pt_BR", "pt_PT", "ro", "ru_RU", "si_LK", "sk_SK", "sl_SI", "sq_AL", "sr", "sv_SE", "sw_TZ", "te_IN", "th_TH", "tr_TR", "uk_UA", "vi", "zu_ZA", ] def language_check(language="en") -> str | None: """Check if language is supported""" lang_region = None installed = dictools.list_installed() installed_region_keys = [i for i in installed if i.startswith(language) and "_" in i] try: # Try to find installed language with region prediction lang_region = next(i for i in installed_region_keys if i == f"{language}_{language.upper()}") except StopIteration: if installed_region_keys: # Take first installed region language lang_region = installed_region_keys[0] else: # Take downloadable language key downloadable_key = [i for i in LANGUAGES if i.startswith(language)] downloadable_folder_key = [i for i in downloadable_key if i == language] if downloadable_folder_key: lang_region = downloadable_key[0] else: try: # Try to find downloadable language with region prediction lang_region = next(i for i in downloadable_key if i == f"{language}_{language.upper()}") except StopIteration: if downloadable_key: # Take first installed region language lang_region = downloadable_key[0] if lang_region is None: return None print( f"{ULTRASINGER_HEAD} Hyphenate using language code: {blue_highlighted(lang_region)}" ) return lang_region def contains_punctuation(word: str) -> bool: """Check if word contains punctuation""" return any(elem in word for elem in string.punctuation) def clean_word(word: str): """Remove punctuation from word""" cleaned_string = "" removed_indices = [] removed_symbols = [] for i, char in enumerate(word): if char not in string.punctuation and char not in " ": cleaned_string += char else: removed_indices.append(i) removed_symbols.append(char) return cleaned_string, removed_indices, removed_symbols def insert_removed_symbols(separated_array, removed_indices, symbols): """Insert symbols into the syllables""" result = [] symbol_index = 0 i = 0 # Add removed symbols to the syllables for syllable in separated_array: tmp = "" for char in syllable: if i in removed_indices: tmp += symbols[symbol_index] symbol_index += 1 i += 1 tmp += char i += 1 result.append(tmp) # Add remaining symbols to the last syllable if symbol_index < len(symbols): tmp = result[-1] for i in range(symbol_index, len(symbols)): tmp += symbols[i] result[-1] = tmp return result def create_hyphenator(lang_region: str) -> Hyphenator: """Create hyphenator""" hyphenator = Hyphenator(lang_region) return hyphenator def hyphenation(word: str, hyphenator: Hyphenator) -> list[str] | None: """Hyphenate word""" cleaned_string, removed_indices, removed_symbols = clean_word(word) # Hyphenation of word longer than 100 characters throws exception if len(cleaned_string) > 100: return None syllabus = hyphenator.syllables(cleaned_string) length = len(syllabus) if length > 1: hyphen = [] for i in range(length): hyphen.append(syllabus[i]) hyphen = insert_removed_symbols(hyphen, removed_indices, removed_symbols) else: hyphen = None return hyphen