File size: 3,435 Bytes
8a60c72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer

# Define a list of supported language pairs
languages = {
    'English': 'en',
    'Urdu': 'ur',
    'French': 'fr',
    'Spanish': 'es',
    'German': 'de',
    'Chinese': 'zh',
    'Italian': 'it',
    'Russian': 'ru',
    'Japanese': 'ja',
    'Arabic': 'ar',
    'Hindi': 'hi',
}

# Define supported language pairs
language_pairs = {
    ('en', 'ur'): 'Helsinki-NLP/opus-mt-en-ur',
    ('ur', 'en'): 'Helsinki-NLP/opus-mt-ur-en',
    ('en', 'fr'): 'Helsinki-NLP/opus-mt-en-fr',
    ('fr', 'en'): 'Helsinki-NLP/opus-mt-fr-en',
    ('en', 'es'): 'Helsinki-NLP/opus-mt-en-es',
    ('es', 'en'): 'Helsinki-NLP/opus-mt-es-en',
    ('en', 'de'): 'Helsinki-NLP/opus-mt-en-de',
    ('de', 'en'): 'Helsinki-NLP/opus-mt-de-en',
    ('en', 'zh'): 'Helsinki-NLP/opus-mt-en-zh',
    ('zh', 'en'): 'Helsinki-NLP/opus-mt-zh-en',
    ('en', 'it'): 'Helsinki-NLP/opus-mt-en-it',
    ('it', 'en'): 'Helsinki-NLP/opus-mt-it-en',
    ('en', 'ru'): 'Helsinki-NLP/opus-mt-en-ru',
    ('ru', 'en'): 'Helsinki-NLP/opus-mt-ru-en',
    ('en', 'ja'): 'Helsinki-NLP/opus-mt-en-ja',
    ('ja', 'en'): 'Helsinki-NLP/opus-mt-ja-en',
    ('en', 'ar'): 'Helsinki-NLP/opus-mt-en-ar',
    ('ar', 'en'): 'Helsinki-NLP/opus-mt-ar-en',
    ('en', 'hi'): 'Helsinki-NLP/opus-mt-en-hi',
    ('hi', 'en'): 'Helsinki-NLP/opus-mt-hi-en',
    # Add more pairs as available
}

def load_model(src_lang, tgt_lang):
    model_name = language_pairs.get((src_lang, tgt_lang))
    if not model_name:
        raise ValueError(f"No available model for {src_lang} to {tgt_lang}")
    
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    return model, tokenizer

def translate(text, src_lang, tgt_lang):
    model, tokenizer = load_model(src_lang, tgt_lang)
    inputs = tokenizer.encode(text, return_tensors="pt", padding=True)
    translated = model.generate(inputs)
    return tokenizer.decode(translated[0], skip_special_tokens=True)

def translate_chain(text, src_lang, tgt_lang):
    if src_lang != 'en':
        text = translate(text, src_lang, 'en')
    if tgt_lang != 'en':
        text = translate(text, 'en', tgt_lang)
    return text

def translate_ui(text, source_language, target_language):
    src_lang = languages[source_language]
    tgt_lang = languages[target_language]
    
    try:
        return translate(text, src_lang, tgt_lang)
    except ValueError:
        return translate_chain(text, src_lang, tgt_lang)

# Streamlit App UI
st.title("Multilingual Translator")
st.write("Translate text between various languages including Urdu, French, Spanish, and more.")

# Input text
text = st.text_area("Enter text to translate", height=100)

# Source and Target Languages
source_language = st.selectbox("Select Source Language", list(languages.keys()))
target_language = st.selectbox("Select Target Language", list(languages.keys()))

# Translate Button
if st.button("Translate"):
    if text.strip():
        translation = translate_ui(text, source_language, target_language)
        st.text_area("Translated Text", translation, height=100)
    else:
        st.warning("Please enter text to translate.")

# About Section
st.sidebar.title("About")
st.sidebar.info(
    """
    This app allows you to translate text between multiple languages using the MarianMT model from Hugging Face's Helsinki-NLP collection.
    """
)