Spaces:
Running
Running
import streamlit as st | |
from transformers import MarianMTModel, MarianTokenizer | |
# Define a comprehensive dictionary of language names, codes, and model names | |
LANGUAGE_MODELS = { | |
'Afrikaans': ('af', 'Helsinki-NLP/opus-mt-en-af'), | |
'Albanian': ('sq', 'Helsinki-NLP/opus-mt-en-sq'), | |
'Amharic': ('am', 'Helsinki-NLP/opus-mt-en-am'), | |
'Arabic': ('ar', 'Helsinki-NLP/opus-mt-en-ar'), | |
'Armenian': ('hy', 'Helsinki-NLP/opus-mt-en-hy'), | |
'Bengali': ('bn', 'Helsinki-NLP/opus-mt-en-bn'), | |
'Bosnian': ('bs', 'Helsinki-NLP/opus-mt-en-bs'), | |
'Catalan': ('ca', 'Helsinki-NLP/opus-mt-en-ca'), | |
'Croatian': ('hr', 'Helsinki-NLP/opus-mt-en-hr'), | |
'Czech': ('cs', 'Helsinki-NLP/opus-mt-en-cs'), | |
'Danish': ('da', 'Helsinki-NLP/opus-mt-en-da'), | |
'Dutch': ('nl', 'Helsinki-NLP/opus-mt-en-nl'), | |
'Esperanto': ('eo', 'Helsinki-NLP/opus-mt-en-eo'), | |
'Estonian': ('et', 'Helsinki-NLP/opus-mt-en-et'), | |
'Finnish': ('fi', 'Helsinki-NLP/opus-mt-en-fi'), | |
'French': ('fr', 'Helsinki-NLP/opus-mt-en-fr'), | |
'German': ('de', 'Helsinki-NLP/opus-mt-en-de'), | |
'Greek': ('el', 'Helsinki-NLP/opus-mt-en-el'), | |
'Gujarati': ('gu', 'Helsinki-NLP/opus-mt-en-gu'), | |
'Haitian Creole': ('ht', 'Helsinki-NLP/opus-mt-en-ht'), | |
'Hausa': ('ha', 'Helsinki-NLP/opus-mt-en-ha'), | |
'Hawaiian': ('haw', 'Helsinki-NLP/opus-mt-en-haw'), | |
'Hebrew': ('he', 'Helsinki-NLP/opus-mt-en-he'), | |
'Hindi': ('hi', 'Helsinki-NLP/opus-mt-en-hi'), | |
'Hungarian': ('hu', 'Helsinki-NLP/opus-mt-en-hu'), | |
'Icelandic': ('is', 'Helsinki-NLP/opus-mt-en-is'), | |
'Igbo': ('ig', 'Helsinki-NLP/opus-mt-en-ig'), | |
'Indonesian': ('id', 'Helsinki-NLP/opus-mt-en-id'), | |
'Irish': ('ga', 'Helsinki-NLP/opus-mt-en-ga'), | |
'Italian': ('it', 'Helsinki-NLP/opus-mt-en-it'), | |
'Japanese': ('ja', 'Helsinki-NLP/opus-mt-en-ja'), | |
'Javanese': ('jw', 'Helsinki-NLP/opus-mt-en-jw'), | |
'Kannada': ('kn', 'Helsinki-NLP/opus-mt-en-kn'), | |
'Khmer': ('km', 'Helsinki-NLP/opus-mt-en-km'), | |
'Korean': ('ko', 'Helsinki-NLP/opus-mt-en-ko'), | |
'Latin': ('la', 'Helsinki-NLP/opus-mt-en-la'), | |
'Latvian': ('lv', 'Helsinki-NLP/opus-mt-en-lv'), | |
'Lithuanian': ('lt', 'Helsinki-NLP/opus-mt-en-lt'), | |
'Luxembourgish': ('lb', 'Helsinki-NLP/opus-mt-en-lb'), | |
'Macedonian': ('mk', 'Helsinki-NLP/opus-mt-en-mk'), | |
'Malagasy': ('mg', 'Helsinki-NLP/opus-mt-en-mg'), | |
'Malayalam': ('ml', 'Helsinki-NLP/opus-mt-en-ml'), | |
'Maltese': ('mt', 'Helsinki-NLP/opus-mt-en-mt'), | |
'Maori': ('mi', 'Helsinki-NLP/opus-mt-en-mi'), | |
'Marathi': ('mr', 'Helsinki-NLP/opus-mt-en-mr'), | |
'Myanmar': ('my', 'Helsinki-NLP/opus-mt-en-my'), | |
'Nepali': ('ne', 'Helsinki-NLP/opus-mt-en-ne'), | |
'Norwegian': ('no', 'Helsinki-NLP/opus-mt-en-no'), | |
'Nyanja': ('ny', 'Helsinki-NLP/opus-mt-en-ny'), | |
'Odia': ('or', 'Helsinki-NLP/opus-mt-en-or'), | |
'Oromo': ('om', 'Helsinki-NLP/opus-mt-en-om'), | |
'Pashto': ('ps', 'Helsinki-NLP/opus-mt-en-ps'), | |
'Persian': ('fa', 'Helsinki-NLP/opus-mt-en-fa'), | |
'Polish': ('pl', 'Helsinki-NLP/opus-mt-en-pl'), | |
'Portuguese': ('pt', 'Helsinki-NLP/opus-mt-en-pt'), | |
'Punjabi': ('pa', 'Helsinki-NLP/opus-mt-en-pa'), | |
'Quechua': ('qu', 'Helsinki-NLP/opus-mt-en-qu'), | |
'Romanian': ('ro', 'Helsinki-NLP/opus-mt-en-ro'), | |
'Russian': ('ru', 'Helsinki-NLP/opus-mt-en-ru'), | |
'Samoan': ('sm', 'Helsinki-NLP/opus-mt-en-sm'), | |
'Scots Gaelic': ('gd', 'Helsinki-NLP/opus-mt-en-gd'), | |
'Serbian': ('sr', 'Helsinki-NLP/opus-mt-en-sr'), | |
'Sesotho': ('st', 'Helsinki-NLP/opus-mt-en-st'), | |
'Shona': ('sn', 'Helsinki-NLP/opus-mt-en-sn'), | |
'Sindhi': ('sd', 'Helsinki-NLP/opus-mt-en-sd'), | |
'Sinhala': ('si', 'Helsinki-NLP/opus-mt-en-si'), | |
'Slovak': ('sk', 'Helsinki-NLP/opus-mt-en-sk'), | |
'Slovenian': ('sl', 'Helsinki-NLP/opus-mt-en-sl'), | |
'Somali': ('so', 'Helsinki-NLP/opus-mt-en-so'), | |
'Spanish': ('es', 'Helsinki-NLP/opus-mt-en-es'), | |
'Sundanese': ('su', 'Helsinki-NLP/opus-mt-en-su'), | |
'Swahili': ('sw', 'Helsinki-NLP/opus-mt-en-sw'), | |
'Swedish': ('sv', 'Helsinki-NLP/opus-mt-en-sv'), | |
'Tajik': ('tg', 'Helsinki-NLP/opus-mt-en-tg'), | |
'Tamil': ('ta', 'Helsinki-NLP/opus-mt-en-ta'), | |
'Telugu': ('te', 'Helsinki-NLP/opus-mt-en-te'), | |
'Thai': ('th', 'Helsinki-NLP/opus-mt-en-th'), | |
'Turkmen': ('tk', 'Helsinki-NLP/opus-mt-en-tk'), | |
'Turkish': ('tr', 'Helsinki-NLP/opus-mt-en-tr'), | |
'Ukrainian': ('uk', 'Helsinki-NLP/opus-mt-en-uk'), | |
'Urdu': ('ur', 'Helsinki-NLP/opus-mt-en-ur'), | |
'Vietnamese': ('vi', 'Helsinki-NLP/opus-mt-en-vi'), | |
'Welsh': ('cy', 'Helsinki-NLP/opus-mt-en-cy'), | |
'Xhosa': ('xh', 'Helsinki-NLP/opus-mt-en-xh'), | |
'Yiddish': ('yi', 'Helsinki-NLP/opus-mt-en-yi'), | |
'Yoruba': ('yo', 'Helsinki-NLP/opus-mt-en-yo'), | |
'Zulu': ('zu', 'Helsinki-NLP/opus-mt-en-zu'), | |
} | |
def load_model(target_language): | |
code, model_name = LANGUAGE_MODELS.get(target_language, (None, None)) | |
if not model_name: | |
st.error(f"Model for language '{target_language}' not found.") | |
return None, None | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
return tokenizer, model | |
def translate_text(text, target_language): | |
tokenizer, model = load_model(target_language) | |
if tokenizer is None or model is None: | |
return "" | |
# Tokenize the input text | |
inputs = tokenizer(text, return_tensors="pt", padding=True) | |
# Generate translation | |
translated = model.generate(**inputs) | |
# Decode the translated text | |
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True) | |
return translated_text | |
def main(): | |
st.title("Language Translator") | |
st.write("Translate English text to any language.") | |
# Input text from the user | |
source_text = st.text_area("Enter text in English:", "") | |
# Select target language | |
target_language = st.selectbox( | |
"Select target language:", | |
options=list(LANGUAGE_MODELS.keys()) | |
) | |
if st.button("Translate"): | |
if source_text: | |
translated_text = translate_text(source_text, target_language) | |
st.write(f"Translated text ({target_language}):") | |
st.write(translated_text) | |
else: | |
st.warning("Please enter text to translate.") | |
if __name__ == "__main__": | |
main() | |