datascientist22 commited on
Commit
51d3578
·
verified ·
1 Parent(s): 4e07da5

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +149 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import MarianMTModel, MarianTokenizer
3
+
4
+ # Define a dictionary to map language names to model identifiers
5
+ models = {
6
+ 'Afrikaans': 'Helsinki-NLP/opus-mt-en-af',
7
+ 'Amharic': 'Helsinki-NLP/opus-mt-en-am',
8
+ 'Arabic': 'Helsinki-NLP/opus-mt-en-ar',
9
+ 'Asturian': 'Helsinki-NLP/opus-mt-en-ast',
10
+ 'Azerbaijani': 'Helsinki-NLP/opus-mt-en-az',
11
+ 'Bashkir': 'Helsinki-NLP/opus-mt-en-ba',
12
+ 'Belarusian': 'Helsinki-NLP/opus-mt-en-be',
13
+ 'Bulgarian': 'Helsinki-NLP/opus-mt-en-bg',
14
+ 'Bengali': 'Helsinki-NLP/opus-mt-en-bn',
15
+ 'Breton': 'Helsinki-NLP/opus-mt-en-br',
16
+ 'Bosnian': 'Helsinki-NLP/opus-mt-en-bs',
17
+ 'Catalan': 'Helsinki-NLP/opus-mt-en-ca',
18
+ 'Cebuano': 'Helsinki-NLP/opus-mt-en-ceb',
19
+ 'Czech': 'Helsinki-NLP/opus-mt-en-cs',
20
+ 'Welsh': 'Helsinki-NLP/opus-mt-en-cy',
21
+ 'Danish': 'Helsinki-NLP/opus-mt-en-da',
22
+ 'German': 'Helsinki-NLP/opus-mt-en-de',
23
+ 'Greek': 'Helsinki-NLP/opus-mt-en-el',
24
+ 'English': 'Helsinki-NLP/opus-mt-en-en',
25
+ 'Spanish': 'Helsinki-NLP/opus-mt-en-es',
26
+ 'Estonian': 'Helsinki-NLP/opus-mt-en-et',
27
+ 'Persian': 'Helsinki-NLP/opus-mt-en-fa',
28
+ 'Fulah': 'Helsinki-NLP/opus-mt-en-ff',
29
+ 'Finnish': 'Helsinki-NLP/opus-mt-en-fi',
30
+ 'French': 'Helsinki-NLP/opus-mt-en-fr',
31
+ 'Western Frisian': 'Helsinki-NLP/opus-mt-en-fy',
32
+ 'Irish': 'Helsinki-NLP/opus-mt-en-ga',
33
+ 'Scottish Gaelic': 'Helsinki-NLP/opus-mt-en-gd',
34
+ 'Galician': 'Helsinki-NLP/opus-mt-en-gl',
35
+ 'Gujarati': 'Helsinki-NLP/opus-mt-en-gu',
36
+ 'Hausa': 'Helsinki-NLP/opus-mt-en-ha',
37
+ 'Hebrew': 'Helsinki-NLP/opus-mt-en-he',
38
+ 'Hindi': 'Helsinki-NLP/opus-mt-en-hi',
39
+ 'Croatian': 'Helsinki-NLP/opus-mt-en-hr',
40
+ 'Haitian Creole': 'Helsinki-NLP/opus-mt-en-ht',
41
+ 'Hungarian': 'Helsinki-NLP/opus-mt-en-hu',
42
+ 'Armenian': 'Helsinki-NLP/opus-mt-en-hy',
43
+ 'Indonesian': 'Helsinki-NLP/opus-mt-en-id',
44
+ 'Igbo': 'Helsinki-NLP/opus-mt-en-ig',
45
+ 'Iloko': 'Helsinki-NLP/opus-mt-en-ilo',
46
+ 'Icelandic': 'Helsinki-NLP/opus-mt-en-is',
47
+ 'Italian': 'Helsinki-NLP/opus-mt-en-it',
48
+ 'Japanese': 'Helsinki-NLP/opus-mt-en-ja',
49
+ 'Javanese': 'Helsinki-NLP/opus-mt-en-jv',
50
+ 'Georgian': 'Helsinki-NLP/opus-mt-en-ka',
51
+ 'Kazakh': 'Helsinki-NLP/opus-mt-en-kk',
52
+ 'Central Khmer': 'Helsinki-NLP/opus-mt-en-km',
53
+ 'Kannada': 'Helsinki-NLP/opus-mt-en-kn',
54
+ 'Korean': 'Helsinki-NLP/opus-mt-en-ko',
55
+ 'Luxembourgish': 'Helsinki-NLP/opus-mt-en-lb',
56
+ 'Ganda': 'Helsinki-NLP/opus-mt-en-lg',
57
+ 'Lingala': 'Helsinki-NLP/opus-mt-en-ln',
58
+ 'Lao': 'Helsinki-NLP/opus-mt-en-lo',
59
+ 'Lithuanian': 'Helsinki-NLP/opus-mt-en-lt',
60
+ 'Latvian': 'Helsinki-NLP/opus-mt-en-lv',
61
+ 'Malagasy': 'Helsinki-NLP/opus-mt-en-mg',
62
+ 'Macedonian': 'Helsinki-NLP/opus-mt-en-mk',
63
+ 'Malayalam': 'Helsinki-NLP/opus-mt-en-ml',
64
+ 'Mongolian': 'Helsinki-NLP/opus-mt-en-mn',
65
+ 'Marathi': 'Helsinki-NLP/opus-mt-en-mr',
66
+ 'Malay': 'Helsinki-NLP/opus-mt-en-ms',
67
+ 'Burmese': 'Helsinki-NLP/opus-mt-en-my',
68
+ 'Nepali': 'Helsinki-NLP/opus-mt-en-ne',
69
+ 'Dutch': 'Helsinki-NLP/opus-mt-en-nl',
70
+ 'Norwegian': 'Helsinki-NLP/opus-mt-en-no',
71
+ 'Northern Sotho': 'Helsinki-NLP/opus-mt-en-ns',
72
+ 'Occitan': 'Helsinki-NLP/opus-mt-en-oc',
73
+ 'Oriya': 'Helsinki-NLP/opus-mt-en-or',
74
+ 'Panjabi': 'Helsinki-NLP/opus-mt-en-pa',
75
+ 'Polish': 'Helsinki-NLP/opus-mt-en-pl',
76
+ 'Pushto': 'Helsinki-NLP/opus-mt-en-ps',
77
+ 'Portuguese': 'Helsinki-NLP/opus-mt-en-pt',
78
+ 'Romanian': 'Helsinki-NLP/opus-mt-en-ro',
79
+ 'Russian': 'Helsinki-NLP/opus-mt-en-ru',
80
+ 'Sindhi': 'Helsinki-NLP/opus-mt-en-sd',
81
+ 'Sinhala': 'Helsinki-NLP/opus-mt-en-si',
82
+ 'Slovak': 'Helsinki-NLP/opus-mt-en-sk',
83
+ 'Slovenian': 'Helsinki-NLP/opus-mt-en-sl',
84
+ 'Somali': 'Helsinki-NLP/opus-mt-en-so',
85
+ 'Albanian': 'Helsinki-NLP/opus-mt-en-sq',
86
+ 'Serbian': 'Helsinki-NLP/opus-mt-en-sr',
87
+ 'Swati': 'Helsinki-NLP/opus-mt-en-ss',
88
+ 'Sundanese': 'Helsinki-NLP/opus-mt-en-su',
89
+ 'Swedish': 'Helsinki-NLP/opus-mt-en-sv',
90
+ 'Swahili': 'Helsinki-NLP/opus-mt-en-sw',
91
+ 'Tamil': 'Helsinki-NLP/opus-mt-en-ta',
92
+ 'Thai': 'Helsinki-NLP/opus-mt-en-th',
93
+ 'Tagalog': 'Helsinki-NLP/opus-mt-en-tl',
94
+ 'Tswana': 'Helsinki-NLP/opus-mt-en-tn',
95
+ 'Turkish': 'Helsinki-NLP/opus-mt-en-tr',
96
+ 'Ukrainian': 'Helsinki-NLP/opus-mt-en-uk',
97
+ 'Urdu': 'Helsinki-NLP/opus-mt-en-ur',
98
+ 'Uzbek': 'Helsinki-NLP/opus-mt-en-uz',
99
+ 'Vietnamese': 'Helsinki-NLP/opus-mt-en-vi',
100
+ 'Wolof': 'Helsinki-NLP/opus-mt-en-wo',
101
+ 'Xhosa': 'Helsinki-NLP/opus-mt-en-xh',
102
+ 'Yiddish': 'Helsinki-NLP/opus-mt-en-yi',
103
+ 'Yoruba': 'Helsinki-NLP/opus-mt-en-yo',
104
+ 'Chinese': 'Helsinki-NLP/opus-mt-en-zh',
105
+ 'Zulu': 'Helsinki-NLP/opus-mt-en-zu'
106
+ }
107
+
108
+ def load_model(language):
109
+ """Load the model and tokenizer for the specified target language."""
110
+ model_name = models.get(language)
111
+ if model_name:
112
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
113
+ model = MarianMTModel.from_pretrained(model_name)
114
+ return model, tokenizer
115
+ else:
116
+ st.error(f"Model for {language} not found.")
117
+ return None, None
118
+
119
+ def translate_text(text, model, tokenizer):
120
+ """Translate text using the provided model and tokenizer."""
121
+ inputs = tokenizer.encode(text, return_tensors="pt")
122
+ translated = model.generate(inputs)
123
+ translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
124
+ return translated_text
125
+
126
+ def main():
127
+ st.title("🌐 Multilingual Translator")
128
+ st.markdown("Created by: [**Engr. Hamesh Raj**](https://www.linkedin.com/in/datascientisthameshraj/)")
129
+
130
+ # Target language selection
131
+ target_language = st.selectbox("Select target language:", list(models.keys()))
132
+
133
+ # Input text area
134
+ text_to_translate = st.text_area("Enter text in English:")
135
+
136
+ if st.button("Translate"):
137
+ if text_to_translate:
138
+ # Load the model based on target language
139
+ model, tokenizer = load_model(target_language)
140
+
141
+ if model and tokenizer:
142
+ translated_text = translate_text(text_to_translate, model, tokenizer)
143
+ st.write(f"**Translation in {target_language}:**")
144
+ st.write(translated_text)
145
+ else:
146
+ st.warning("Please enter text to translate.")
147
+
148
+ if __name__ == "__main__":
149
+ main()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ torch
4
+ sentencepiece