|
import streamlit as st |
|
from transformers import MarianMTModel, MarianTokenizer |
|
|
|
|
|
models = { |
|
"French": "Helsinki-NLP/opus-mt-en-fr", |
|
"German": "Helsinki-NLP/opus-mt-en-de", |
|
"Spanish": "Helsinki-NLP/opus-mt-en-es", |
|
"Italian": "Helsinki-NLP/opus-mt-en-it", |
|
"Portuguese": "Helsinki-NLP/opus-mt-en-pt", |
|
"Dutch": "Helsinki-NLP/opus-mt-en-nl", |
|
"Russian": "Helsinki-NLP/opus-mt-en-ru", |
|
"Chinese": "Helsinki-NLP/opus-mt-en-zh", |
|
"Japanese": "Helsinki-NLP/opus-mt-en-ja", |
|
"Korean": "Helsinki-NLP/opus-mt-en-ko", |
|
"Arabic": "Helsinki-NLP/opus-mt-en-ar", |
|
"Hindi": "Helsinki-NLP/opus-mt-en-hi", |
|
"Thai": "Helsinki-NLP/opus-mt-en-th", |
|
"Vietnamese": "Helsinki-NLP/opus-mt-en-vi", |
|
"Swahili": "Helsinki-NLP/opus-mt-en-sw" |
|
} |
|
|
|
def load_model_and_tokenizer(model_name): |
|
model = MarianMTModel.from_pretrained(model_name) |
|
tokenizer = MarianTokenizer.from_pretrained(model_name) |
|
return model, tokenizer |
|
|
|
def translate_text(text, target_lang): |
|
model_name = models.get(target_lang, None) |
|
if not model_name: |
|
return "Translation model for the selected language is not available." |
|
|
|
try: |
|
model, tokenizer = load_model_and_tokenizer(model_name) |
|
inputs = tokenizer(text, return_tensors="pt", padding=True) |
|
translated = model.generate(**inputs) |
|
result = tokenizer.decode(translated[0], skip_special_tokens=True) |
|
return result |
|
except Exception as e: |
|
return f"Error during translation: {str(e)}" |
|
|
|
def main(): |
|
st.title("Universal Language Translator") |
|
|
|
|
|
input_text = st.text_area("Enter text in English:", "") |
|
|
|
|
|
target_language = st.selectbox("Select target language:", list(models.keys())) |
|
|
|
if st.button("Translate"): |
|
if input_text: |
|
result = translate_text(input_text, target_language) |
|
st.write(f"Translation ({target_language}):") |
|
st.write(result) |
|
else: |
|
st.warning("Please enter some text to translate.") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|