OCR-Translator / translate_speak.py
Sarath0x8f's picture
Upload 9 files
d69917c verified
raw
history blame
1.59 kB
import os
from gtts import gTTS
from deep_translator import GoogleTranslator
import soundfile as sf
import tempfile
import numpy as np
import gtts
output_path = 'Audio/output.wav'
translate_path = 'Audio/translate.wav'
def audio_streaming(txt=None, lang='en', to=None):
# If an audio file is provided as input, use it; otherwise, use the direct file path
speak = gTTS(text=txt, lang=lang, slow=False)
if to == 1:
audio = output_path
else:
audio = translate_path
speak.save(audio)
# Load the audio file
data, samplerate = sf.read(audio)
# Ensure data is in float32 format
data = np.array(data, dtype=np.float32)
# Save to a temporary file that Gradio can use for audio playback
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
sf.write(tmp_file.name, data, samplerate)
temp_audio_path = tmp_file.name
# Return the file path to Gradio
return temp_audio_path
def translate_txt(lang, text):
translator = GoogleTranslator(source="en", target=lang)
translated_text = translator.translate(text)
audio_path = audio_streaming(translated_text, lang='en', to=2)
return translated_text, audio_path
if __name__ == "__main__":
# print(audio_streaming("hello world"))
# os.system(f"start {audio_streaming('hello world!')}")
translate = set(GoogleTranslator().get_supported_languages(as_dict=True))
speak = set(gtts.lang.tts_langs())
not_speak = translate - speak
print(not_speak, len(not_speak))