PolyTranslate / app.py
AreesaAshfaq's picture
Update app.py
a316b7b verified
raw
history blame
3.32 kB
import streamlit as st
from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
# Define the dictionary of language models
LANGUAGE_MODELS = {
'Afrikaans': 'af',
'Albanian': 'sq',
'Amharic': 'am',
'Arabic': 'ar',
'Armenian': 'hy',
'Bengali': 'bn',
'Bosnian': 'bs',
'Catalan': 'ca',
'Croatian': 'hr',
'Czech': 'cs',
'Danish': 'da',
'Dutch': 'nl',
'Esperanto': 'eo',
'Estonian': 'et',
'Finnish': 'fi',
'French': 'fr',
'German': 'de',
'Greek': 'el',
'Gujarati': 'gu',
'Haitian Creole': 'ht',
'Hausa': 'ha',
'Hawaiian': 'haw',
'Hebrew': 'he',
'Hindi': 'hi',
'Hungarian': 'hu',
'Icelandic': 'is',
'Igbo': 'ig',
'Indonesian': 'id',
'Irish': 'ga',
'Italian': 'it',
'Japanese': 'ja',
'Javanese': 'jw',
'Kannada': 'kn',
'Khmer': 'km',
'Korean': 'ko',
'Latin': 'la',
'Latvian': 'lv',
'Lithuanian': 'lt',
'Luxembourgish': 'lb',
'Macedonian': 'mk',
'Malagasy': 'mg',
'Malayalam': 'ml',
'Maltese': 'mt',
'Maori': 'mi',
'Marathi': 'mr',
'Myanmar': 'my',
'Nepali': 'ne',
'Norwegian': 'no',
'Nyanja': 'ny',
'Odia': 'or',
'Oromo': 'om',
'Pashto': 'ps',
'Persian': 'fa',
'Polish': 'pl',
'Portuguese': 'pt',
'Punjabi': 'pa',
'Quechua': 'qu',
'Romanian': 'ro',
'Russian': 'ru',
'Samoan': 'sm',
'Scots Gaelic': 'gd',
'Serbian': 'sr',
'Sesotho': 'st',
'Shona': 'sn',
'Sindhi': 'sd',
'Sinhala': 'si',
'Slovak': 'sk',
'Slovenian': 'sl',
'Somali': 'so',
'Spanish': 'es',
'Sundanese': 'su',
'Swahili': 'sw',
'Swedish': 'sv',
'Tajik': 'tg',
'Tamil': 'ta',
'Telugu': 'te',
'Thai': 'th',
'Turkmen': 'tk',
'Turkish': 'tr',
'Ukrainian': 'uk',
'Urdu': 'ur',
'Vietnamese': 'vi',
'Welsh': 'cy',
'Xhosa': 'xh',
'Yiddish': 'yi',
'Yoruba': 'yo',
'Zulu': 'zu',
}
@st.cache_resource
def load_model():
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
return tokenizer, model
def translate(text, target_language):
tokenizer, model = load_model()
# Set the target language code for translation
target_lang_code = LANGUAGE_MODELS.get(target_language)
if not target_lang_code:
return "Target language not supported."
tokenizer.src_lang = "en"
encoded_input = tokenizer(text, return_tensors="pt")
generated_tokens = model.generate(**encoded_input, forced_bos_token_id=tokenizer.get_lang_id(target_lang_code))
translation = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
return translation
st.title('English to Any Language Translator')
text_input = st.text_area("Enter text in English:", "Hello, how are you?")
target_language = st.selectbox(
'Select the target language:',
list(LANGUAGE_MODELS.keys())
)
if st.button('Translate'):
with st.spinner('Translating...'):
try:
translation = translate(text_input, target_language)
st.write(f'Translation ({target_language}):')
st.write(translation)
except Exception as e:
st.error(f"Error: {e}")