File size: 6,361 Bytes
e44a4c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer

# Define a comprehensive dictionary of language names, codes, and model names
LANGUAGE_MODELS = {
    'Afrikaans': ('af', 'Helsinki-NLP/opus-mt-en-af'),
    'Albanian': ('sq', 'Helsinki-NLP/opus-mt-en-sq'),
    'Amharic': ('am', 'Helsinki-NLP/opus-mt-en-am'),
    'Arabic': ('ar', 'Helsinki-NLP/opus-mt-en-ar'),
    'Armenian': ('hy', 'Helsinki-NLP/opus-mt-en-hy'),
    'Bengali': ('bn', 'Helsinki-NLP/opus-mt-en-bn'),
    'Bosnian': ('bs', 'Helsinki-NLP/opus-mt-en-bs'),
    'Catalan': ('ca', 'Helsinki-NLP/opus-mt-en-ca'),
    'Croatian': ('hr', 'Helsinki-NLP/opus-mt-en-hr'),
    'Czech': ('cs', 'Helsinki-NLP/opus-mt-en-cs'),
    'Danish': ('da', 'Helsinki-NLP/opus-mt-en-da'),
    'Dutch': ('nl', 'Helsinki-NLP/opus-mt-en-nl'),
    'Esperanto': ('eo', 'Helsinki-NLP/opus-mt-en-eo'),
    'Estonian': ('et', 'Helsinki-NLP/opus-mt-en-et'),
    'Finnish': ('fi', 'Helsinki-NLP/opus-mt-en-fi'),
    'French': ('fr', 'Helsinki-NLP/opus-mt-en-fr'),
    'German': ('de', 'Helsinki-NLP/opus-mt-en-de'),
    'Greek': ('el', 'Helsinki-NLP/opus-mt-en-el'),
    'Gujarati': ('gu', 'Helsinki-NLP/opus-mt-en-gu'),
    'Haitian Creole': ('ht', 'Helsinki-NLP/opus-mt-en-ht'),
    'Hausa': ('ha', 'Helsinki-NLP/opus-mt-en-ha'),
    'Hawaiian': ('haw', 'Helsinki-NLP/opus-mt-en-haw'),
    'Hebrew': ('he', 'Helsinki-NLP/opus-mt-en-he'),
    'Hindi': ('hi', 'Helsinki-NLP/opus-mt-en-hi'),
    'Hungarian': ('hu', 'Helsinki-NLP/opus-mt-en-hu'),
    'Icelandic': ('is', 'Helsinki-NLP/opus-mt-en-is'),
    'Igbo': ('ig', 'Helsinki-NLP/opus-mt-en-ig'),
    'Indonesian': ('id', 'Helsinki-NLP/opus-mt-en-id'),
    'Irish': ('ga', 'Helsinki-NLP/opus-mt-en-ga'),
    'Italian': ('it', 'Helsinki-NLP/opus-mt-en-it'),
    'Japanese': ('ja', 'Helsinki-NLP/opus-mt-en-ja'),
    'Javanese': ('jw', 'Helsinki-NLP/opus-mt-en-jw'),
    'Kannada': ('kn', 'Helsinki-NLP/opus-mt-en-kn'),
    'Khmer': ('km', 'Helsinki-NLP/opus-mt-en-km'),
    'Korean': ('ko', 'Helsinki-NLP/opus-mt-en-ko'),
    'Latin': ('la', 'Helsinki-NLP/opus-mt-en-la'),
    'Latvian': ('lv', 'Helsinki-NLP/opus-mt-en-lv'),
    'Lithuanian': ('lt', 'Helsinki-NLP/opus-mt-en-lt'),
    'Luxembourgish': ('lb', 'Helsinki-NLP/opus-mt-en-lb'),
    'Macedonian': ('mk', 'Helsinki-NLP/opus-mt-en-mk'),
    'Malagasy': ('mg', 'Helsinki-NLP/opus-mt-en-mg'),
    'Malayalam': ('ml', 'Helsinki-NLP/opus-mt-en-ml'),
    'Maltese': ('mt', 'Helsinki-NLP/opus-mt-en-mt'),
    'Maori': ('mi', 'Helsinki-NLP/opus-mt-en-mi'),
    'Marathi': ('mr', 'Helsinki-NLP/opus-mt-en-mr'),
    'Myanmar': ('my', 'Helsinki-NLP/opus-mt-en-my'),
    'Nepali': ('ne', 'Helsinki-NLP/opus-mt-en-ne'),
    'Norwegian': ('no', 'Helsinki-NLP/opus-mt-en-no'),
    'Nyanja': ('ny', 'Helsinki-NLP/opus-mt-en-ny'),
    'Odia': ('or', 'Helsinki-NLP/opus-mt-en-or'),
    'Oromo': ('om', 'Helsinki-NLP/opus-mt-en-om'),
    'Pashto': ('ps', 'Helsinki-NLP/opus-mt-en-ps'),
    'Persian': ('fa', 'Helsinki-NLP/opus-mt-en-fa'),
    'Polish': ('pl', 'Helsinki-NLP/opus-mt-en-pl'),
    'Portuguese': ('pt', 'Helsinki-NLP/opus-mt-en-pt'),
    'Punjabi': ('pa', 'Helsinki-NLP/opus-mt-en-pa'),
    'Quechua': ('qu', 'Helsinki-NLP/opus-mt-en-qu'),
    'Romanian': ('ro', 'Helsinki-NLP/opus-mt-en-ro'),
    'Russian': ('ru', 'Helsinki-NLP/opus-mt-en-ru'),
    'Samoan': ('sm', 'Helsinki-NLP/opus-mt-en-sm'),
    'Scots Gaelic': ('gd', 'Helsinki-NLP/opus-mt-en-gd'),
    'Serbian': ('sr', 'Helsinki-NLP/opus-mt-en-sr'),
    'Sesotho': ('st', 'Helsinki-NLP/opus-mt-en-st'),
    'Shona': ('sn', 'Helsinki-NLP/opus-mt-en-sn'),
    'Sindhi': ('sd', 'Helsinki-NLP/opus-mt-en-sd'),
    'Sinhala': ('si', 'Helsinki-NLP/opus-mt-en-si'),
    'Slovak': ('sk', 'Helsinki-NLP/opus-mt-en-sk'),
    'Slovenian': ('sl', 'Helsinki-NLP/opus-mt-en-sl'),
    'Somali': ('so', 'Helsinki-NLP/opus-mt-en-so'),
    'Spanish': ('es', 'Helsinki-NLP/opus-mt-en-es'),
    'Sundanese': ('su', 'Helsinki-NLP/opus-mt-en-su'),
    'Swahili': ('sw', 'Helsinki-NLP/opus-mt-en-sw'),
    'Swedish': ('sv', 'Helsinki-NLP/opus-mt-en-sv'),
    'Tajik': ('tg', 'Helsinki-NLP/opus-mt-en-tg'),
    'Tamil': ('ta', 'Helsinki-NLP/opus-mt-en-ta'),
    'Telugu': ('te', 'Helsinki-NLP/opus-mt-en-te'),
    'Thai': ('th', 'Helsinki-NLP/opus-mt-en-th'),
    'Turkmen': ('tk', 'Helsinki-NLP/opus-mt-en-tk'),
    'Turkish': ('tr', 'Helsinki-NLP/opus-mt-en-tr'),
    'Ukrainian': ('uk', 'Helsinki-NLP/opus-mt-en-uk'),
    'Urdu': ('ur', 'Helsinki-NLP/opus-mt-en-ur'),
    'Vietnamese': ('vi', 'Helsinki-NLP/opus-mt-en-vi'),
    'Welsh': ('cy', 'Helsinki-NLP/opus-mt-en-cy'),
    'Xhosa': ('xh', 'Helsinki-NLP/opus-mt-en-xh'),
    'Yiddish': ('yi', 'Helsinki-NLP/opus-mt-en-yi'),
    'Yoruba': ('yo', 'Helsinki-NLP/opus-mt-en-yo'),
    'Zulu': ('zu', 'Helsinki-NLP/opus-mt-en-zu'),
}

@st.cache_resource
def load_model(target_language):
    code, model_name = LANGUAGE_MODELS.get(target_language, (None, None))
    if not model_name:
        st.error(f"Model for language '{target_language}' not found.")
        return None, None
    
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    return tokenizer, model

def translate_text(text, target_language):
    tokenizer, model = load_model(target_language)
    if tokenizer is None or model is None:
        return ""
    
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", padding=True)
    
    # Generate translation
    translated = model.generate(**inputs)
    
    # Decode the translated text
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

def main():
    st.title("Language Translator")
    st.write("Translate English text to any language.")

    # Input text from the user
    source_text = st.text_area("Enter text in English:", "")
    
    # Select target language
    target_language = st.selectbox(
        "Select target language:",
        options=list(LANGUAGE_MODELS.keys())
    )
    
    if st.button("Translate"):
        if source_text:
            translated_text = translate_text(source_text, target_language)
            st.write(f"Translated text ({target_language}):")
            st.write(translated_text)
        else:
            st.warning("Please enter text to translate.")

if __name__ == "__main__":
    main()