File size: 2,526 Bytes
9a8d919 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
import requests
import numpy as np
import io
import soundfile as sf # To read audio data into NumPy array
# Placeholder API URLs (Replace with actual endpoints)
TRANSLATION_API_URL = "https://twcc2.eztalking.ai/nantrans/inference"
TTS_API_URL = "http://twcc2.eztalking.ai/mtts/tts"
def fetch_translation(text, target_language):
# Mock implementation (Replace with actual API call)
payload = {
"input_text": text,
"id": "1",
"src_lang": "zh",
"tgt_lang": "tw"
}
response = requests.post(TRANSLATION_API_URL, json=payload)
if response.status_code == 200:
return response.text
return "Translation failed."
def fetch_tts_audio(translated_text, spk):
# Mock implementation (Replace with actual API call)
payload = {
"input_text": translated_text,
"id": "1",
"src_lang": "tw",
"tgt_lang": "tailo"
}
# response = requests.post(TRANSLATION_API_URL, json=payload)
# if response.status_code == 200:
# translated_text = response.text
payload = {
"text": translated_text, # "tw_convert": False,
"b64enc": False, "speaker": spk, "speed": 0.9
}
response = requests.post(TTS_API_URL, json=payload)
if response.status_code == 200:
# Read the audio data from the response content into a NumPy array
audio_data, sample_rate = sf.read(io.BytesIO(response.content))
return audio_data, sample_rate
return None, None
def translate_and_speak(text, target_language, spk):
translated_text = fetch_translation(text, target_language)
if not translated_text:
return "Translation failed.", None
audio_data, sample_rate = fetch_tts_audio(translated_text, spk)
if audio_data is not None:
return translated_text, (sample_rate, audio_data)
return translated_text, None
spk_list = requests.get("http://twcc2.eztalking.ai/mtts/list_speakers").json()
# Gradio Interface
iface = gr.Interface(
fn=translate_and_speak,
inputs=[
gr.Textbox(label="Enter Text"),
gr.Dropdown(choices=["en", "es", "fr", "de"], label="Target Language", value="en"),
gr.Dropdown(choices=spk_list, label="Select Speaker", value=spk_list[0])
],
outputs=[
gr.Textbox(label="Translated Text"),
gr.Audio(label="TTS Audio", type="numpy")
],
title="Text Translator with TTS",
description="Translate text to a selected language and generate TTS audio."
)
iface.launch()
|