Spaces:
Sleeping
Sleeping
Delete app.py
Browse files
app.py
DELETED
@@ -1,143 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
from transformers import MarianMTModel, MarianTokenizer
|
3 |
-
|
4 |
-
# Define a comprehensive dictionary of language names, codes, and model names
|
5 |
-
LANGUAGE_MODELS = {
|
6 |
-
'Afrikaans': ('af', 'Helsinki-NLP/opus-mt-en-af'),
|
7 |
-
'Albanian': ('sq', 'Helsinki-NLP/opus-mt-en-sq'),
|
8 |
-
'Amharic': ('am', 'Helsinki-NLP/opus-mt-en-am'),
|
9 |
-
'Arabic': ('ar', 'Helsinki-NLP/opus-mt-en-ar'),
|
10 |
-
'Armenian': ('hy', 'Helsinki-NLP/opus-mt-en-hy'),
|
11 |
-
'Bengali': ('bn', 'Helsinki-NLP/opus-mt-en-bn'),
|
12 |
-
'Bosnian': ('bs', 'Helsinki-NLP/opus-mt-en-bs'),
|
13 |
-
'Catalan': ('ca', 'Helsinki-NLP/opus-mt-en-ca'),
|
14 |
-
'Croatian': ('hr', 'Helsinki-NLP/opus-mt-en-hr'),
|
15 |
-
'Czech': ('cs', 'Helsinki-NLP/opus-mt-en-cs'),
|
16 |
-
'Danish': ('da', 'Helsinki-NLP/opus-mt-en-da'),
|
17 |
-
'Dutch': ('nl', 'Helsinki-NLP/opus-mt-en-nl'),
|
18 |
-
'Esperanto': ('eo', 'Helsinki-NLP/opus-mt-en-eo'),
|
19 |
-
'Estonian': ('et', 'Helsinki-NLP/opus-mt-en-et'),
|
20 |
-
'Finnish': ('fi', 'Helsinki-NLP/opus-mt-en-fi'),
|
21 |
-
'French': ('fr', 'Helsinki-NLP/opus-mt-en-fr'),
|
22 |
-
'German': ('de', 'Helsinki-NLP/opus-mt-en-de'),
|
23 |
-
'Greek': ('el', 'Helsinki-NLP/opus-mt-en-el'),
|
24 |
-
'Gujarati': ('gu', 'Helsinki-NLP/opus-mt-en-gu'),
|
25 |
-
'Haitian Creole': ('ht', 'Helsinki-NLP/opus-mt-en-ht'),
|
26 |
-
'Hausa': ('ha', 'Helsinki-NLP/opus-mt-en-ha'),
|
27 |
-
'Hawaiian': ('haw', 'Helsinki-NLP/opus-mt-en-haw'),
|
28 |
-
'Hebrew': ('he', 'Helsinki-NLP/opus-mt-en-he'),
|
29 |
-
'Hindi': ('hi', 'Helsinki-NLP/opus-mt-en-hi'),
|
30 |
-
'Hungarian': ('hu', 'Helsinki-NLP/opus-mt-en-hu'),
|
31 |
-
'Icelandic': ('is', 'Helsinki-NLP/opus-mt-en-is'),
|
32 |
-
'Igbo': ('ig', 'Helsinki-NLP/opus-mt-en-ig'),
|
33 |
-
'Indonesian': ('id', 'Helsinki-NLP/opus-mt-en-id'),
|
34 |
-
'Irish': ('ga', 'Helsinki-NLP/opus-mt-en-ga'),
|
35 |
-
'Italian': ('it', 'Helsinki-NLP/opus-mt-en-it'),
|
36 |
-
'Japanese': ('ja', 'Helsinki-NLP/opus-mt-en-ja'),
|
37 |
-
'Javanese': ('jw', 'Helsinki-NLP/opus-mt-en-jw'),
|
38 |
-
'Kannada': ('kn', 'Helsinki-NLP/opus-mt-en-kn'),
|
39 |
-
'Khmer': ('km', 'Helsinki-NLP/opus-mt-en-km'),
|
40 |
-
'Korean': ('ko', 'Helsinki-NLP/opus-mt-en-ko'),
|
41 |
-
'Latin': ('la', 'Helsinki-NLP/opus-mt-en-la'),
|
42 |
-
'Latvian': ('lv', 'Helsinki-NLP/opus-mt-en-lv'),
|
43 |
-
'Lithuanian': ('lt', 'Helsinki-NLP/opus-mt-en-lt'),
|
44 |
-
'Luxembourgish': ('lb', 'Helsinki-NLP/opus-mt-en-lb'),
|
45 |
-
'Macedonian': ('mk', 'Helsinki-NLP/opus-mt-en-mk'),
|
46 |
-
'Malagasy': ('mg', 'Helsinki-NLP/opus-mt-en-mg'),
|
47 |
-
'Malayalam': ('ml', 'Helsinki-NLP/opus-mt-en-ml'),
|
48 |
-
'Maltese': ('mt', 'Helsinki-NLP/opus-mt-en-mt'),
|
49 |
-
'Maori': ('mi', 'Helsinki-NLP/opus-mt-en-mi'),
|
50 |
-
'Marathi': ('mr', 'Helsinki-NLP/opus-mt-en-mr'),
|
51 |
-
'Myanmar': ('my', 'Helsinki-NLP/opus-mt-en-my'),
|
52 |
-
'Nepali': ('ne', 'Helsinki-NLP/opus-mt-en-ne'),
|
53 |
-
'Norwegian': ('no', 'Helsinki-NLP/opus-mt-en-no'),
|
54 |
-
'Nyanja': ('ny', 'Helsinki-NLP/opus-mt-en-ny'),
|
55 |
-
'Odia': ('or', 'Helsinki-NLP/opus-mt-en-or'),
|
56 |
-
'Oromo': ('om', 'Helsinki-NLP/opus-mt-en-om'),
|
57 |
-
'Pashto': ('ps', 'Helsinki-NLP/opus-mt-en-ps'),
|
58 |
-
'Persian': ('fa', 'Helsinki-NLP/opus-mt-en-fa'),
|
59 |
-
'Polish': ('pl', 'Helsinki-NLP/opus-mt-en-pl'),
|
60 |
-
'Portuguese': ('pt', 'Helsinki-NLP/opus-mt-en-pt'),
|
61 |
-
'Punjabi': ('pa', 'Helsinki-NLP/opus-mt-en-pa'),
|
62 |
-
'Quechua': ('qu', 'Helsinki-NLP/opus-mt-en-qu'),
|
63 |
-
'Romanian': ('ro', 'Helsinki-NLP/opus-mt-en-ro'),
|
64 |
-
'Russian': ('ru', 'Helsinki-NLP/opus-mt-en-ru'),
|
65 |
-
'Samoan': ('sm', 'Helsinki-NLP/opus-mt-en-sm'),
|
66 |
-
'Scots Gaelic': ('gd', 'Helsinki-NLP/opus-mt-en-gd'),
|
67 |
-
'Serbian': ('sr', 'Helsinki-NLP/opus-mt-en-sr'),
|
68 |
-
'Sesotho': ('st', 'Helsinki-NLP/opus-mt-en-st'),
|
69 |
-
'Shona': ('sn', 'Helsinki-NLP/opus-mt-en-sn'),
|
70 |
-
'Sindhi': ('sd', 'Helsinki-NLP/opus-mt-en-sd'),
|
71 |
-
'Sinhala': ('si', 'Helsinki-NLP/opus-mt-en-si'),
|
72 |
-
'Slovak': ('sk', 'Helsinki-NLP/opus-mt-en-sk'),
|
73 |
-
'Slovenian': ('sl', 'Helsinki-NLP/opus-mt-en-sl'),
|
74 |
-
'Somali': ('so', 'Helsinki-NLP/opus-mt-en-so'),
|
75 |
-
'Spanish': ('es', 'Helsinki-NLP/opus-mt-en-es'),
|
76 |
-
'Sundanese': ('su', 'Helsinki-NLP/opus-mt-en-su'),
|
77 |
-
'Swahili': ('sw', 'Helsinki-NLP/opus-mt-en-sw'),
|
78 |
-
'Swedish': ('sv', 'Helsinki-NLP/opus-mt-en-sv'),
|
79 |
-
'Tajik': ('tg', 'Helsinki-NLP/opus-mt-en-tg'),
|
80 |
-
'Tamil': ('ta', 'Helsinki-NLP/opus-mt-en-ta'),
|
81 |
-
'Telugu': ('te', 'Helsinki-NLP/opus-mt-en-te'),
|
82 |
-
'Thai': ('th', 'Helsinki-NLP/opus-mt-en-th'),
|
83 |
-
'Turkmen': ('tk', 'Helsinki-NLP/opus-mt-en-tk'),
|
84 |
-
'Turkish': ('tr', 'Helsinki-NLP/opus-mt-en-tr'),
|
85 |
-
'Ukrainian': ('uk', 'Helsinki-NLP/opus-mt-en-uk'),
|
86 |
-
'Urdu': ('ur', 'Helsinki-NLP/opus-mt-en-ur'),
|
87 |
-
'Vietnamese': ('vi', 'Helsinki-NLP/opus-mt-en-vi'),
|
88 |
-
'Welsh': ('cy', 'Helsinki-NLP/opus-mt-en-cy'),
|
89 |
-
'Xhosa': ('xh', 'Helsinki-NLP/opus-mt-en-xh'),
|
90 |
-
'Yiddish': ('yi', 'Helsinki-NLP/opus-mt-en-yi'),
|
91 |
-
'Yoruba': ('yo', 'Helsinki-NLP/opus-mt-en-yo'),
|
92 |
-
'Zulu': ('zu', 'Helsinki-NLP/opus-mt-en-zu'),
|
93 |
-
}
|
94 |
-
|
95 |
-
@st.cache_resource
|
96 |
-
def load_model(target_language):
|
97 |
-
code, model_name = LANGUAGE_MODELS.get(target_language, (None, None))
|
98 |
-
if not model_name:
|
99 |
-
st.error(f"Model for language '{target_language}' not found.")
|
100 |
-
return None, None
|
101 |
-
|
102 |
-
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
103 |
-
model = MarianMTModel.from_pretrained(model_name)
|
104 |
-
return tokenizer, model
|
105 |
-
|
106 |
-
def translate_text(text, target_language):
|
107 |
-
tokenizer, model = load_model(target_language)
|
108 |
-
if tokenizer is None or model is None:
|
109 |
-
return ""
|
110 |
-
|
111 |
-
# Tokenize the input text
|
112 |
-
inputs = tokenizer(text, return_tensors="pt", padding=True)
|
113 |
-
|
114 |
-
# Generate translation
|
115 |
-
translated = model.generate(**inputs)
|
116 |
-
|
117 |
-
# Decode the translated text
|
118 |
-
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
119 |
-
return translated_text
|
120 |
-
|
121 |
-
def main():
|
122 |
-
st.title("Language Translator")
|
123 |
-
st.write("Translate English text to any language.")
|
124 |
-
|
125 |
-
# Input text from the user
|
126 |
-
source_text = st.text_area("Enter text in English:", "")
|
127 |
-
|
128 |
-
# Select target language
|
129 |
-
target_language = st.selectbox(
|
130 |
-
"Select target language:",
|
131 |
-
options=list(LANGUAGE_MODELS.keys())
|
132 |
-
)
|
133 |
-
|
134 |
-
if st.button("Translate"):
|
135 |
-
if source_text:
|
136 |
-
translated_text = translate_text(source_text, target_language)
|
137 |
-
st.write(f"Translated text ({target_language}):")
|
138 |
-
st.write(translated_text)
|
139 |
-
else:
|
140 |
-
st.warning("Please enter text to translate.")
|
141 |
-
|
142 |
-
if __name__ == "__main__":
|
143 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|