|
|
|
import streamlit as st |
|
from transformers import MarianMTModel, MarianTokenizer |
|
import torch |
|
|
|
|
|
def load_model(src_lang, tgt_lang): |
|
model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}" |
|
tokenizer = MarianTokenizer.from_pretrained(model_name) |
|
model = MarianMTModel.from_pretrained(model_name) |
|
return tokenizer, model |
|
|
|
|
|
def translate_text(tokenizer, model, text): |
|
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) |
|
with torch.no_grad(): |
|
translated = model.generate(**inputs) |
|
result = tokenizer.decode(translated[0], skip_special_tokens=True) |
|
return result |
|
|
|
|
|
st.title("\U0001F310 Multilingual Translation App") |
|
st.write("Translate text between multiple languages using open-source models!") |
|
|
|
|
|
language_pairs = { |
|
"English to French": ("en", "fr"), |
|
"French to English": ("fr", "en"), |
|
"English to Spanish": ("en", "es"), |
|
"Spanish to English": ("es", "en"), |
|
"English to German": ("en", "de"), |
|
"German to English": ("de", "en"), |
|
"English to Italian": ("en", "it"), |
|
"Italian to English": ("it", "en"), |
|
"English to Portuguese": ("en", "pt"), |
|
"Portuguese to English": ("pt", "en"), |
|
"English to Russian": ("en", "ru"), |
|
"Russian to English": ("ru", "en"), |
|
"English to Chinese": ("en", "zh"), |
|
"Chinese to English": ("zh", "en"), |
|
"English to Japanese": ("en", "ja"), |
|
"Japanese to English": ("ja", "en"), |
|
"English to Dutch": ("en", "nl"), |
|
"Dutch to English": ("nl", "en"), |
|
"English to Arabic": ("en", "ar"), |
|
"Arabic to English": ("ar", "en"), |
|
"English to Hindi": ("en", "hi"), |
|
"Hindi to English": ("hi", "en"), |
|
"English to Korean": ("en", "ko"), |
|
"Korean to English": ("ko", "en"), |
|
"English to Urdu": ("en", "ur"), |
|
"Urdu to English": ("ur", "en") |
|
} |
|
|
|
|
|
language_choice = st.selectbox("Select Language Pair", list(language_pairs.keys())) |
|
src_lang, tgt_lang = language_pairs[language_choice] |
|
|
|
|
|
text_input = st.text_area("Enter text to translate:") |
|
|
|
|
|
if st.button("Translate"): |
|
if text_input: |
|
st.info("Translating... Please wait!") |
|
try: |
|
tokenizer, model = load_model(src_lang, tgt_lang) |
|
translated_text = translate_text(tokenizer, model, text_input) |
|
st.success("Translated Text:") |
|
st.write(translated_text) |
|
except Exception as e: |
|
st.error(f"Error: {e}. Please ensure the model exists for the selected pair.") |
|
else: |
|
st.warning("Please enter some text to translate.") |
|
|
|
|
|
st.markdown("---") |
|
st.markdown("AYA Weekend Hackathon.") |
|
|