import streamlit as st import epitran import langcodes from langcodes import LanguageTagError # TODO: reverse transliterate? def get_lang_description_from_mapping_name(string_to_check): if "generic-Latn" == string_to_check: return "Generic Latin Script" if len(string_to_check)<2: return None try: description = None lang = langcodes.get(string_to_check) if lang: items = [] for key, value in lang.describe().items(): items.append(f"{key}: {value}") description = ", ".join(items)) return description except LanguageTagError as e: if any(["out of place" in str(e), "must be followed by something" in str(e)]): # print("*****") # print(e) # LanguageTagError: This extlang subtag, 'red', is out of place. Expected territory, variant, extension, or end of string. # LanguageTagError: This script subtag, 'east', is out of place. Expected territory, variant, extension, or end of string. # LanguageTagError: The subtag 'p' must be followed by something substrings = string_to_check.split("-") substrings = substrings[:-1] # remove the last one string_to_check = "-".join(substrings) return get_lang_from_mapping_name(string_to_check) else: print("*****") print(e) return None def get_valid_epitran_mappings_list(): map_path = Path(epitran.__path__[0]) / "data"/"map" map_files = list(map_path.glob("*.*")) valid_mappings = [map_file.stem for map_file in map_files] return valid_mappings if __name__ == "__main__": valid_epitran_mappings = get_valid_epitran_mappings_list() selected_mapping = st.selectbox("Which language/script pair would you like to use?", valid_epitran_mappings) description = get_lang_description_from_mapping_name(selected_mapping) st.write(f"You selected {selected_mapping}") # iso_lang_code = st.text_input( # label="Three-letter ISO-639-3 (https://iso639-3.sil.org/) language code", # value="swa" # ) # st.write(f"iso code is {iso_lang_code}") # # iso_script_code = st.text_input( # label="ISO 15924 (https://unicode.org/iso15924/iso15924-codes.html) script code, e.g. 'Latn' for Latin script, 'Hans' for Chinese script, etc.", # value="Latn" # ) # st.write(f'iso code is {iso_script_code}') input_text = st.text_area(label="Whatever you type here will be transliterated!", value="Gari langu linaloangama limejaa na mikunga") # combined_code = "-".join([iso_lang_code, iso_script_code]) # st.write(f"Combined code: {combined_code}") st.info("attempting to instantiate epitran transliterator for your language/script") epi = epitran.Epitran(selected_mapping ) st.info(f"transliterating `{input_text}`\n\tusing {epi}...") transliteration = epi.transliterate(input_text) st.success(transliteration)