Spaces:
Sleeping
Sleeping
File size: 4,907 Bytes
9d96850 fcfd7f5 a47d303 9d96850 9cb40d6 9d96850 18e757e 9d96850 18e757e 9d96850 d0b5518 c2e4f74 d0b5518 fcfd7f5 9cb40d6 fcfd7f5 9cb40d6 fcfd7f5 c2e4f74 fcfd7f5 9cb40d6 fcfd7f5 c2e4f74 b853b65 31ed4a8 2d10fd6 9d96850 18e757e c2e4f74 18e757e 9cb40d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer
from gtts import gTTS
import os
# Install SentencePiece if not already installed
try:
import sentencepiece
except ImportError:
st.warning("The SentencePiece library is required but not installed. Installing now...")
os.system("pip install sentencepiece")
import sentencepiece
# Streamlit app
st.title("Text Translator with Voice")
# Input text
user_text = st.text_input("Enter the text you want to translate:")
# Define a dictionary of language codes and their full names
language_names = {
'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu',
'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chichewa': 'ny',
'Chinese (Simplified)': 'zh-cn', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en',
'Esperanto': 'eo', 'Estonian': 'et', 'Filipino': 'tl', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl',
'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw',
'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id',
'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km',
'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt',
'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi',
'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Odia': 'or',
'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru',
'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala': 'si',
'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv',
'Tajik': 'tg', 'Tamil': 'ta', 'Tatar': 'tt', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Turkmen': 'tk', 'Ukrainian': 'uk',
'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo',
'Zulu': 'zu'
}
# Function to load the translation model and tokenizer
@st.cache_resource
def load_translation_model(language_pair):
try:
model_name = f'Helsinki-NLP/opus-mt-{language_pair}'
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)
return model, tokenizer
except Exception as e:
st.error(f"Failed to load model for language pair {language_pair}: {str(e)}")
return None, None
# Function to translate text
def translate_text(text, target_language_code):
try:
# Load model and tokenizer for the selected language
language_pair = f'en-{target_language_code}'
model, tokenizer = load_translation_model(language_pair)
if model is None or tokenizer is None:
return "Translation failed: Model or tokenizer not loaded."
# Tokenize the input text
inputs = tokenizer(text, return_tensors="pt", padding=True)
# Translate the text
translated_tokens = model.generate(**inputs)
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
return translated_text
except Exception as e:
return f"Translation failed: {str(e)}"
# Target language selection
st.subheader("Select the target language for translation:")
selected_target_language = st.selectbox("Select a target language:", list(language_names.keys()))
# Display the translated text and speak it
if user_text:
translated_text = translate_text(user_text, language_names[selected_target_language])
if "Translation failed" not in translated_text:
# Apply advanced CSS for better display in a bordered box
styled_text = f'<div style="border: 2px solid #0072B5; padding: 10px; background-color: #E5E5E5; border-radius: 5px; color: black;">{translated_text}</div>'
st.markdown(styled_text, unsafe_allow_html=True)
# Text-to-Speech (TTS) conversion and playback
tts = gTTS(translated_text, lang=language_names[selected_target_language])
tts_file_path = "translated_audio.mp3"
tts.save(tts_file_path)
# Play the TTS audio
st.audio(tts_file_path, format="audio/mp3")
# Clean up the TTS audio file
os.remove(tts_file_path)
else:
st.error(translated_text) |