File size: 4,907 Bytes
9d96850
fcfd7f5
a47d303
 
9d96850
9cb40d6
 
 
 
 
 
 
 
9d96850
18e757e
9d96850
 
18e757e
9d96850
d0b5518
 
c2e4f74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0b5518
 
fcfd7f5
9cb40d6
fcfd7f5
9cb40d6
 
 
 
 
 
 
 
fcfd7f5
c2e4f74
 
 
fcfd7f5
 
 
 
9cb40d6
 
 
fcfd7f5
 
 
 
 
 
 
c2e4f74
 
 
 
b853b65
31ed4a8
2d10fd6
9d96850
18e757e
c2e4f74
 
18e757e
9cb40d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer
from gtts import gTTS
import os

# Install SentencePiece if not already installed
try:
    import sentencepiece
except ImportError:
    st.warning("The SentencePiece library is required but not installed. Installing now...")
    os.system("pip install sentencepiece")
    import sentencepiece

# Streamlit app
st.title("Text Translator with Voice")

# Input text
user_text = st.text_input("Enter the text you want to translate:")

# Define a dictionary of language codes and their full names
language_names = {
    'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu',
    'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chichewa': 'ny',
    'Chinese (Simplified)': 'zh-cn', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en',
    'Esperanto': 'eo', 'Estonian': 'et', 'Filipino': 'tl', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl',
    'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw',
    'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id',
    'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km',
    'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt',
    'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi',
    'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Odia': 'or',
    'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru',
    'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala': 'si',
    'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv',
    'Tajik': 'tg', 'Tamil': 'ta', 'Tatar': 'tt', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Turkmen': 'tk', 'Ukrainian': 'uk',
    'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo',
    'Zulu': 'zu'
}

# Function to load the translation model and tokenizer
@st.cache_resource
def load_translation_model(language_pair):
    try:
        model_name = f'Helsinki-NLP/opus-mt-{language_pair}'
        model = MarianMTModel.from_pretrained(model_name)
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        return model, tokenizer
    except Exception as e:
        st.error(f"Failed to load model for language pair {language_pair}: {str(e)}")
        return None, None

# Function to translate text
def translate_text(text, target_language_code):
    try:
        # Load model and tokenizer for the selected language
        language_pair = f'en-{target_language_code}'
        model, tokenizer = load_translation_model(language_pair)
        
        if model is None or tokenizer is None:
            return "Translation failed: Model or tokenizer not loaded."
        
        # Tokenize the input text
        inputs = tokenizer(text, return_tensors="pt", padding=True)
        
        # Translate the text
        translated_tokens = model.generate(**inputs)
        translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
        
        return translated_text
    except Exception as e:
        return f"Translation failed: {str(e)}"

# Target language selection
st.subheader("Select the target language for translation:")
selected_target_language = st.selectbox("Select a target language:", list(language_names.keys()))

# Display the translated text and speak it
if user_text:
    translated_text = translate_text(user_text, language_names[selected_target_language])
    
    if "Translation failed" not in translated_text:
        # Apply advanced CSS for better display in a bordered box
        styled_text = f'<div style="border: 2px solid #0072B5; padding: 10px; background-color: #E5E5E5; border-radius: 5px; color: black;">{translated_text}</div>'
        st.markdown(styled_text, unsafe_allow_html=True)
        
        # Text-to-Speech (TTS) conversion and playback
        tts = gTTS(translated_text, lang=language_names[selected_target_language])
        tts_file_path = "translated_audio.mp3"
        tts.save(tts_file_path)
        
        # Play the TTS audio
        st.audio(tts_file_path, format="audio/mp3")
        
        # Clean up the TTS audio file
        os.remove(tts_file_path)
    else:
        st.error(translated_text)