import streamlit as st from transformers import MarianMTModel, MarianTokenizer # Define a comprehensive dictionary of language names, codes, and model names LANGUAGE_MODELS = { 'Afrikaans': ('af', 'Helsinki-NLP/opus-mt-en-af'), 'Albanian': ('sq', 'Helsinki-NLP/opus-mt-en-sq'), 'Amharic': ('am', 'Helsinki-NLP/opus-mt-en-am'), 'Arabic': ('ar', 'Helsinki-NLP/opus-mt-en-ar'), 'Armenian': ('hy', 'Helsinki-NLP/opus-mt-en-hy'), 'Bengali': ('bn', 'Helsinki-NLP/opus-mt-en-bn'), 'Bosnian': ('bs', 'Helsinki-NLP/opus-mt-en-bs'), 'Catalan': ('ca', 'Helsinki-NLP/opus-mt-en-ca'), 'Croatian': ('hr', 'Helsinki-NLP/opus-mt-en-hr'), 'Czech': ('cs', 'Helsinki-NLP/opus-mt-en-cs'), 'Danish': ('da', 'Helsinki-NLP/opus-mt-en-da'), 'Dutch': ('nl', 'Helsinki-NLP/opus-mt-en-nl'), 'Esperanto': ('eo', 'Helsinki-NLP/opus-mt-en-eo'), 'Estonian': ('et', 'Helsinki-NLP/opus-mt-en-et'), 'Finnish': ('fi', 'Helsinki-NLP/opus-mt-en-fi'), 'French': ('fr', 'Helsinki-NLP/opus-mt-en-fr'), 'German': ('de', 'Helsinki-NLP/opus-mt-en-de'), 'Greek': ('el', 'Helsinki-NLP/opus-mt-en-el'), 'Gujarati': ('gu', 'Helsinki-NLP/opus-mt-en-gu'), 'Haitian Creole': ('ht', 'Helsinki-NLP/opus-mt-en-ht'), 'Hausa': ('ha', 'Helsinki-NLP/opus-mt-en-ha'), 'Hawaiian': ('haw', 'Helsinki-NLP/opus-mt-en-haw'), 'Hebrew': ('he', 'Helsinki-NLP/opus-mt-en-he'), 'Hindi': ('hi', 'Helsinki-NLP/opus-mt-en-hi'), 'Hungarian': ('hu', 'Helsinki-NLP/opus-mt-en-hu'), 'Icelandic': ('is', 'Helsinki-NLP/opus-mt-en-is'), 'Igbo': ('ig', 'Helsinki-NLP/opus-mt-en-ig'), 'Indonesian': ('id', 'Helsinki-NLP/opus-mt-en-id'), 'Irish': ('ga', 'Helsinki-NLP/opus-mt-en-ga'), 'Italian': ('it', 'Helsinki-NLP/opus-mt-en-it'), 'Japanese': ('ja', 'Helsinki-NLP/opus-mt-en-ja'), 'Javanese': ('jw', 'Helsinki-NLP/opus-mt-en-jw'), 'Kannada': ('kn', 'Helsinki-NLP/opus-mt-en-kn'), 'Khmer': ('km', 'Helsinki-NLP/opus-mt-en-km'), 'Korean': ('ko', 'Helsinki-NLP/opus-mt-en-ko'), 'Latin': ('la', 'Helsinki-NLP/opus-mt-en-la'), 'Latvian': ('lv', 'Helsinki-NLP/opus-mt-en-lv'), 'Lithuanian': ('lt', 'Helsinki-NLP/opus-mt-en-lt'), 'Luxembourgish': ('lb', 'Helsinki-NLP/opus-mt-en-lb'), 'Macedonian': ('mk', 'Helsinki-NLP/opus-mt-en-mk'), 'Malagasy': ('mg', 'Helsinki-NLP/opus-mt-en-mg'), 'Malayalam': ('ml', 'Helsinki-NLP/opus-mt-en-ml'), 'Maltese': ('mt', 'Helsinki-NLP/opus-mt-en-mt'), 'Maori': ('mi', 'Helsinki-NLP/opus-mt-en-mi'), 'Marathi': ('mr', 'Helsinki-NLP/opus-mt-en-mr'), 'Myanmar': ('my', 'Helsinki-NLP/opus-mt-en-my'), 'Nepali': ('ne', 'Helsinki-NLP/opus-mt-en-ne'), 'Norwegian': ('no', 'Helsinki-NLP/opus-mt-en-no'), 'Nyanja': ('ny', 'Helsinki-NLP/opus-mt-en-ny'), 'Odia': ('or', 'Helsinki-NLP/opus-mt-en-or'), 'Oromo': ('om', 'Helsinki-NLP/opus-mt-en-om'), 'Pashto': ('ps', 'Helsinki-NLP/opus-mt-en-ps'), 'Persian': ('fa', 'Helsinki-NLP/opus-mt-en-fa'), 'Polish': ('pl', 'Helsinki-NLP/opus-mt-en-pl'), 'Portuguese': ('pt', 'Helsinki-NLP/opus-mt-en-pt'), 'Punjabi': ('pa', 'Helsinki-NLP/opus-mt-en-pa'), 'Quechua': ('qu', 'Helsinki-NLP/opus-mt-en-qu'), 'Romanian': ('ro', 'Helsinki-NLP/opus-mt-en-ro'), 'Russian': ('ru', 'Helsinki-NLP/opus-mt-en-ru'), 'Samoan': ('sm', 'Helsinki-NLP/opus-mt-en-sm'), 'Scots Gaelic': ('gd', 'Helsinki-NLP/opus-mt-en-gd'), 'Serbian': ('sr', 'Helsinki-NLP/opus-mt-en-sr'), 'Sesotho': ('st', 'Helsinki-NLP/opus-mt-en-st'), 'Shona': ('sn', 'Helsinki-NLP/opus-mt-en-sn'), 'Sindhi': ('sd', 'Helsinki-NLP/opus-mt-en-sd'), 'Sinhala': ('si', 'Helsinki-NLP/opus-mt-en-si'), 'Slovak': ('sk', 'Helsinki-NLP/opus-mt-en-sk'), 'Slovenian': ('sl', 'Helsinki-NLP/opus-mt-en-sl'), 'Somali': ('so', 'Helsinki-NLP/opus-mt-en-so'), 'Spanish': ('es', 'Helsinki-NLP/opus-mt-en-es'), 'Sundanese': ('su', 'Helsinki-NLP/opus-mt-en-su'), 'Swahili': ('sw', 'Helsinki-NLP/opus-mt-en-sw'), 'Swedish': ('sv', 'Helsinki-NLP/opus-mt-en-sv'), 'Tajik': ('tg', 'Helsinki-NLP/opus-mt-en-tg'), 'Tamil': ('ta', 'Helsinki-NLP/opus-mt-en-ta'), 'Telugu': ('te', 'Helsinki-NLP/opus-mt-en-te'), 'Thai': ('th', 'Helsinki-NLP/opus-mt-en-th'), 'Turkmen': ('tk', 'Helsinki-NLP/opus-mt-en-tk'), 'Turkish': ('tr', 'Helsinki-NLP/opus-mt-en-tr'), 'Ukrainian': ('uk', 'Helsinki-NLP/opus-mt-en-uk'), 'Urdu': ('ur', 'Helsinki-NLP/opus-mt-en-ur'), 'Vietnamese': ('vi', 'Helsinki-NLP/opus-mt-en-vi'), 'Welsh': ('cy', 'Helsinki-NLP/opus-mt-en-cy'), 'Xhosa': ('xh', 'Helsinki-NLP/opus-mt-en-xh'), 'Yiddish': ('yi', 'Helsinki-NLP/opus-mt-en-yi'), 'Yoruba': ('yo', 'Helsinki-NLP/opus-mt-en-yo'), 'Zulu': ('zu', 'Helsinki-NLP/opus-mt-en-zu'), } @st.cache_resource def load_model(target_language): code, model_name = LANGUAGE_MODELS.get(target_language, (None, None)) if not model_name: st.error(f"Model for language '{target_language}' not found.") return None, None tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) return tokenizer, model def translate_text(text, target_language): tokenizer, model = load_model(target_language) if tokenizer is None or model is None: return "" # Tokenize the input text inputs = tokenizer(text, return_tensors="pt", padding=True) # Generate translation translated = model.generate(**inputs) # Decode the translated text translated_text = tokenizer.decode(translated[0], skip_special_tokens=True) return translated_text def main(): st.title("Language Translator") st.write("Translate English text to any language.") # Input text from the user source_text = st.text_area("Enter text in English:", "") # Select target language target_language = st.selectbox( "Select target language:", options=list(LANGUAGE_MODELS.keys()) ) if st.button("Translate"): if source_text: translated_text = translate_text(source_text, target_language) st.write(f"Translated text ({target_language}):") st.write(translated_text) else: st.warning("Please enter text to translate.") if __name__ == "__main__": main()