phonemize-text / app.py
cdleong's picture
Update app.py
383b08c
raw
history blame
3.07 kB
import streamlit as st
import epitran
import langcodes
from langcodes import LanguageTagError
from pathlib import Path
# TODO: reverse transliterate?
def get_lang_description_from_mapping_name(string_to_check):
if "generic-Latn" == string_to_check:
return "Generic Latin Script"
if len(string_to_check)<2:
return None
try:
description = None
lang = langcodes.get(string_to_check)
if lang:
items = []
for key, value in lang.describe().items():
items.append(f"{key}: {value}")
description = ", ".join(items)
return description
except LanguageTagError as e:
if any(["out of place" in str(e), "must be followed by something" in str(e)]):
# print("*****")
# print(e)
# LanguageTagError: This extlang subtag, 'red', is out of place. Expected territory, variant, extension, or end of string.
# LanguageTagError: This script subtag, 'east', is out of place. Expected territory, variant, extension, or end of string.
# LanguageTagError: The subtag 'p' must be followed by something
substrings = string_to_check.split("-")
substrings = substrings[:-1] # remove the last one
string_to_check = "-".join(substrings)
return get_lang_description_from_mapping_name(string_to_check)
else:
print("*****")
print(e)
return None
def get_valid_epitran_mappings_list():
map_path = Path(epitran.__path__[0]) / "data" / "map"
map_files = map_path.glob("*.*")
valid_mappings = [map_file.stem for map_file in map_files]
return valid_mappings
if __name__ == "__main__":
st.write("# Phonemize your text with epitran!")
st.write("Epitran supports the following languages/scripts:")
valid_epitran_mappings = get_valid_epitran_mappings_list()
st.write(valid_epitran_mappings)
selected_mapping = st.selectbox("Select input language/script:", valid_epitran_mappings)
description = get_lang_description_from_mapping_name(selected_mapping)
st.write(f"Selected input language/script: {description}")
# iso_lang_code = st.text_input(
# label="Three-letter ISO-639-3 (https://iso639-3.sil.org/) language code",
# value="swa"
# )
# st.write(f"iso code is {iso_lang_code}")
#
# iso_script_code = st.text_input(
# label="ISO 15924 (https://unicode.org/iso15924/iso15924-codes.html) script code, e.g. 'Latn' for Latin script, 'Hans' for Chinese script, etc.",
# value="Latn"
# )
# st.write(f'iso code is {iso_script_code}')
input_text = st.text_area(label="Whatever you type here will be transliterated!", value="Gari langu linaloangama limejaa na mikunga")
# combined_code = "-".join([iso_lang_code, iso_script_code])
# st.write(f"Combined code: {combined_code}")
st.info("attempting to instantiate epitran transliterator for your language/script")
epi = epitran.Epitran(selected_mapping)
st.info(f"transliterating `{input_text}`\n\tusing {selected_mapping}...")
transliteration = epi.transliterate(input_text)
st.success(transliteration)