Spaces:
Running
Running
File size: 3,319 Bytes
4eac303 a316b7b 4eac303 a316b7b 4eac303 a316b7b 4eac303 a316b7b 4eac303 a316b7b 4eac303 a316b7b 4eac303 a316b7b 4eac303 a316b7b 4eac303 a316b7b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import streamlit as st
from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
# Define the dictionary of language models
LANGUAGE_MODELS = {
'Afrikaans': 'af',
'Albanian': 'sq',
'Amharic': 'am',
'Arabic': 'ar',
'Armenian': 'hy',
'Bengali': 'bn',
'Bosnian': 'bs',
'Catalan': 'ca',
'Croatian': 'hr',
'Czech': 'cs',
'Danish': 'da',
'Dutch': 'nl',
'Esperanto': 'eo',
'Estonian': 'et',
'Finnish': 'fi',
'French': 'fr',
'German': 'de',
'Greek': 'el',
'Gujarati': 'gu',
'Haitian Creole': 'ht',
'Hausa': 'ha',
'Hawaiian': 'haw',
'Hebrew': 'he',
'Hindi': 'hi',
'Hungarian': 'hu',
'Icelandic': 'is',
'Igbo': 'ig',
'Indonesian': 'id',
'Irish': 'ga',
'Italian': 'it',
'Japanese': 'ja',
'Javanese': 'jw',
'Kannada': 'kn',
'Khmer': 'km',
'Korean': 'ko',
'Latin': 'la',
'Latvian': 'lv',
'Lithuanian': 'lt',
'Luxembourgish': 'lb',
'Macedonian': 'mk',
'Malagasy': 'mg',
'Malayalam': 'ml',
'Maltese': 'mt',
'Maori': 'mi',
'Marathi': 'mr',
'Myanmar': 'my',
'Nepali': 'ne',
'Norwegian': 'no',
'Nyanja': 'ny',
'Odia': 'or',
'Oromo': 'om',
'Pashto': 'ps',
'Persian': 'fa',
'Polish': 'pl',
'Portuguese': 'pt',
'Punjabi': 'pa',
'Quechua': 'qu',
'Romanian': 'ro',
'Russian': 'ru',
'Samoan': 'sm',
'Scots Gaelic': 'gd',
'Serbian': 'sr',
'Sesotho': 'st',
'Shona': 'sn',
'Sindhi': 'sd',
'Sinhala': 'si',
'Slovak': 'sk',
'Slovenian': 'sl',
'Somali': 'so',
'Spanish': 'es',
'Sundanese': 'su',
'Swahili': 'sw',
'Swedish': 'sv',
'Tajik': 'tg',
'Tamil': 'ta',
'Telugu': 'te',
'Thai': 'th',
'Turkmen': 'tk',
'Turkish': 'tr',
'Ukrainian': 'uk',
'Urdu': 'ur',
'Vietnamese': 'vi',
'Welsh': 'cy',
'Xhosa': 'xh',
'Yiddish': 'yi',
'Yoruba': 'yo',
'Zulu': 'zu',
}
@st.cache_resource
def load_model():
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
return tokenizer, model
def translate(text, target_language):
tokenizer, model = load_model()
# Set the target language code for translation
target_lang_code = LANGUAGE_MODELS.get(target_language)
if not target_lang_code:
return "Target language not supported."
tokenizer.src_lang = "en"
encoded_input = tokenizer(text, return_tensors="pt")
generated_tokens = model.generate(**encoded_input, forced_bos_token_id=tokenizer.get_lang_id(target_lang_code))
translation = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
return translation
st.title('English to Any Language Translator')
text_input = st.text_area("Enter text in English:", "Hello, how are you?")
target_language = st.selectbox(
'Select the target language:',
list(LANGUAGE_MODELS.keys())
)
if st.button('Translate'):
with st.spinner('Translating...'):
try:
translation = translate(text_input, target_language)
st.write(f'Translation ({target_language}):')
st.write(translation)
except Exception as e:
st.error(f"Error: {e}")
|