Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import MarianMTModel, MarianTokenizer | |
import pyttsx3 | |
import time | |
# Install required libraries if not already installed | |
try: | |
import sentencepiece | |
except ImportError: | |
st.warning("The SentencePiece library is required but not installed. Installing now...") | |
os.system("pip install sentencepiece") | |
import sentencepiece | |
# Streamlit app title | |
st.title("π Advanced Text Translator with Voice οΏ½") | |
# Define a dictionary of language codes and their full names | |
language_names = { | |
'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu', | |
'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chichewa': 'ny', | |
'Chinese (Simplified)': 'zh-cn', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en', | |
'Esperanto': 'eo', 'Estonian': 'et', 'Filipino': 'tl', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl', | |
'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw', | |
'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id', | |
'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km', | |
'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt', | |
'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi', | |
'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Odia': 'or', | |
'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru', | |
'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala': 'si', | |
'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv', | |
'Tajik': 'tg', 'Tamil': 'ta', 'Tatar': 'tt', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Turkmen': 'tk', 'Ukrainian': 'uk', | |
'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo', | |
'Zulu': 'zu' | |
} | |
# Function to load the translation model and tokenizer | |
def load_translation_model(language_pair): | |
try: | |
model_name = f'Helsinki-NLP/opus-mt-{language_pair}' | |
model = MarianMTModel.from_pretrained(model_name) | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
return model, tokenizer | |
except Exception as e: | |
st.error(f"Failed to load model for language pair {language_pair}: {str(e)}") | |
return None, None | |
# Function to translate text | |
def translate_text(text, target_language_code): | |
try: | |
# Load model and tokenizer for the selected language | |
language_pair = f'en-{target_language_code}' | |
model, tokenizer = load_translation_model(language_pair) | |
if model is None or tokenizer is None: | |
return "Translation failed: Model or tokenizer not loaded." | |
# Tokenize the input text | |
inputs = tokenizer(text, return_tensors="pt", padding=True) | |
# Translate the text | |
translated_tokens = model.generate(**inputs) | |
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) | |
return translated_text | |
except Exception as e: | |
return f"Translation failed: {str(e)}" | |
# Function to initialize the TTS engine | |
def init_tts_engine(): | |
engine = pyttsx3.init() | |
engine.setProperty('rate', 150) # Speed of speech | |
engine.setProperty('volume', 1.0) # Volume level (0.0 to 1.0) | |
return engine | |
# Streamlit UI | |
st.subheader("Enter the text you want to translate:") | |
user_text = st.text_input("Input text:", placeholder="Type your text here...") | |
st.subheader("Select the target language for translation:") | |
selected_target_language = st.selectbox("Target language:", list(language_names.keys())) | |
# Translate and speak the text | |
if user_text: | |
with st.spinner("Translating text..."): | |
translated_text = translate_text(user_text, language_names[selected_target_language]) | |
if "Translation failed" not in translated_text: | |
# Display translated text in a styled box | |
st.success("Translation successful! π") | |
st.markdown( | |
f'<div style="border: 2px solid #0072B5; padding: 10px; background-color: #E5E5E5; border-radius: 5px; color: black;">' | |
f'<b>Translated Text:</b> {translated_text}' | |
f'</div>', | |
unsafe_allow_html=True | |
) | |
# Convert translated text to speech | |
with st.spinner("Generating audio..."): | |
try: | |
engine = init_tts_engine() | |
engine.say(translated_text) | |
engine.runAndWait() | |
st.success("Audio playback complete! π") | |
except Exception as e: | |
st.error(f"Text-to-speech failed: {str(e)}") | |
else: | |
st.error(translated_text) |