Spaces:
Sleeping
Sleeping
File size: 5,322 Bytes
9d96850 fcfd7f5 3854d49 9d96850 3854d49 9cb40d6 3854d49 9d96850 d0b5518 c2e4f74 d0b5518 fcfd7f5 9cb40d6 fcfd7f5 9cb40d6 fcfd7f5 c2e4f74 fcfd7f5 9cb40d6 fcfd7f5 c2e4f74 3854d49 31ed4a8 3854d49 9d96850 3854d49 c2e4f74 3854d49 18e757e 9cb40d6 3854d49 9cb40d6 3854d49 9cb40d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer
import pyttsx3
import time
# Install required libraries if not already installed
try:
import sentencepiece
except ImportError:
st.warning("The SentencePiece library is required but not installed. Installing now...")
os.system("pip install sentencepiece")
import sentencepiece
# Streamlit app title
st.title("π Advanced Text Translator with Voice οΏ½")
# Define a dictionary of language codes and their full names
language_names = {
'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu',
'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chichewa': 'ny',
'Chinese (Simplified)': 'zh-cn', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en',
'Esperanto': 'eo', 'Estonian': 'et', 'Filipino': 'tl', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl',
'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw',
'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id',
'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km',
'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt',
'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi',
'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Odia': 'or',
'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru',
'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala': 'si',
'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv',
'Tajik': 'tg', 'Tamil': 'ta', 'Tatar': 'tt', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Turkmen': 'tk', 'Ukrainian': 'uk',
'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo',
'Zulu': 'zu'
}
# Function to load the translation model and tokenizer
@st.cache_resource
def load_translation_model(language_pair):
try:
model_name = f'Helsinki-NLP/opus-mt-{language_pair}'
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)
return model, tokenizer
except Exception as e:
st.error(f"Failed to load model for language pair {language_pair}: {str(e)}")
return None, None
# Function to translate text
def translate_text(text, target_language_code):
try:
# Load model and tokenizer for the selected language
language_pair = f'en-{target_language_code}'
model, tokenizer = load_translation_model(language_pair)
if model is None or tokenizer is None:
return "Translation failed: Model or tokenizer not loaded."
# Tokenize the input text
inputs = tokenizer(text, return_tensors="pt", padding=True)
# Translate the text
translated_tokens = model.generate(**inputs)
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
return translated_text
except Exception as e:
return f"Translation failed: {str(e)}"
# Function to initialize the TTS engine
def init_tts_engine():
engine = pyttsx3.init()
engine.setProperty('rate', 150) # Speed of speech
engine.setProperty('volume', 1.0) # Volume level (0.0 to 1.0)
return engine
# Streamlit UI
st.subheader("Enter the text you want to translate:")
user_text = st.text_input("Input text:", placeholder="Type your text here...")
st.subheader("Select the target language for translation:")
selected_target_language = st.selectbox("Target language:", list(language_names.keys()))
# Translate and speak the text
if user_text:
with st.spinner("Translating text..."):
translated_text = translate_text(user_text, language_names[selected_target_language])
if "Translation failed" not in translated_text:
# Display translated text in a styled box
st.success("Translation successful! π")
st.markdown(
f'<div style="border: 2px solid #0072B5; padding: 10px; background-color: #E5E5E5; border-radius: 5px; color: black;">'
f'<b>Translated Text:</b> {translated_text}'
f'</div>',
unsafe_allow_html=True
)
# Convert translated text to speech
with st.spinner("Generating audio..."):
try:
engine = init_tts_engine()
engine.say(translated_text)
engine.runAndWait()
st.success("Audio playback complete! π")
except Exception as e:
st.error(f"Text-to-speech failed: {str(e)}")
else:
st.error(translated_text) |