File size: 5,322 Bytes
9d96850
fcfd7f5
3854d49
 
9d96850
3854d49
9cb40d6
 
 
 
 
 
 
3854d49
 
9d96850
d0b5518
 
c2e4f74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0b5518
 
fcfd7f5
9cb40d6
fcfd7f5
9cb40d6
 
 
 
 
 
 
 
fcfd7f5
c2e4f74
 
 
fcfd7f5
 
 
 
9cb40d6
 
 
fcfd7f5
 
 
 
 
 
 
c2e4f74
 
 
 
3854d49
 
 
 
 
 
 
 
 
 
 
31ed4a8
3854d49
9d96850
3854d49
c2e4f74
3854d49
 
18e757e
9cb40d6
3854d49
 
 
 
 
 
 
 
9cb40d6
3854d49
 
 
 
 
 
 
 
 
9cb40d6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer
import pyttsx3
import time

# Install required libraries if not already installed
try:
    import sentencepiece
except ImportError:
    st.warning("The SentencePiece library is required but not installed. Installing now...")
    os.system("pip install sentencepiece")
    import sentencepiece

# Streamlit app title
st.title("🌍 Advanced Text Translator with Voice �")

# Define a dictionary of language codes and their full names
language_names = {
    'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu',
    'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chichewa': 'ny',
    'Chinese (Simplified)': 'zh-cn', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en',
    'Esperanto': 'eo', 'Estonian': 'et', 'Filipino': 'tl', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl',
    'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw',
    'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id',
    'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km',
    'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt',
    'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi',
    'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Odia': 'or',
    'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru',
    'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala': 'si',
    'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv',
    'Tajik': 'tg', 'Tamil': 'ta', 'Tatar': 'tt', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Turkmen': 'tk', 'Ukrainian': 'uk',
    'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo',
    'Zulu': 'zu'
}

# Function to load the translation model and tokenizer
@st.cache_resource
def load_translation_model(language_pair):
    try:
        model_name = f'Helsinki-NLP/opus-mt-{language_pair}'
        model = MarianMTModel.from_pretrained(model_name)
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        return model, tokenizer
    except Exception as e:
        st.error(f"Failed to load model for language pair {language_pair}: {str(e)}")
        return None, None

# Function to translate text
def translate_text(text, target_language_code):
    try:
        # Load model and tokenizer for the selected language
        language_pair = f'en-{target_language_code}'
        model, tokenizer = load_translation_model(language_pair)
        
        if model is None or tokenizer is None:
            return "Translation failed: Model or tokenizer not loaded."
        
        # Tokenize the input text
        inputs = tokenizer(text, return_tensors="pt", padding=True)
        
        # Translate the text
        translated_tokens = model.generate(**inputs)
        translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
        
        return translated_text
    except Exception as e:
        return f"Translation failed: {str(e)}"

# Function to initialize the TTS engine
def init_tts_engine():
    engine = pyttsx3.init()
    engine.setProperty('rate', 150)  # Speed of speech
    engine.setProperty('volume', 1.0)  # Volume level (0.0 to 1.0)
    return engine

# Streamlit UI
st.subheader("Enter the text you want to translate:")
user_text = st.text_input("Input text:", placeholder="Type your text here...")

st.subheader("Select the target language for translation:")
selected_target_language = st.selectbox("Target language:", list(language_names.keys()))

# Translate and speak the text
if user_text:
    with st.spinner("Translating text..."):
        translated_text = translate_text(user_text, language_names[selected_target_language])
    
    if "Translation failed" not in translated_text:
        # Display translated text in a styled box
        st.success("Translation successful! πŸŽ‰")
        st.markdown(
            f'<div style="border: 2px solid #0072B5; padding: 10px; background-color: #E5E5E5; border-radius: 5px; color: black;">'
            f'<b>Translated Text:</b> {translated_text}'
            f'</div>',
            unsafe_allow_html=True
        )
        
        # Convert translated text to speech
        with st.spinner("Generating audio..."):
            try:
                engine = init_tts_engine()
                engine.say(translated_text)
                engine.runAndWait()
                st.success("Audio playback complete! πŸ”Š")
            except Exception as e:
                st.error(f"Text-to-speech failed: {str(e)}")
    else:
        st.error(translated_text)