TEXT-SPEECH / app.py
lelafav502's picture
Create app.py
a854603 verified
import edge_tts
import gradio as gr
import tempfile
language_dict = {
'English-Jenny (Female)': 'en-US-JennyNeural',
}
async def text_to_speech_edge(text, language_code, rate, volume, pitch):
voice = language_dict.get(language_code, "default_voice")
rates = "+" + str(rate) + "%" if rate >= 0 else str(rate) + "%"
volumes = "+" + str(volume) + "%" if volume >= 0 else str(volume) + "%"
pitchs = "+" + str(pitch) + "Hz" if pitch >= 0 else str(pitch) + "Hz"
communicate = edge_tts.Communicate(text, voice, rate=rates, volume=volumes, pitch=pitchs, proxy=None)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return f"Speech synthesis completed for: {text}", tmp_path
input_text = gr.Textbox(lines=5, label="Input Text")
output_text = gr.Textbox(label="Output Text")
output_audio = gr.Audio(type="filepath", label="Exported Audio")
language = gr.Dropdown(choices=list(language_dict.keys()), label="Choose the Voice Model")
rate = gr.Slider(-100, 100,step=1,value=0,label="Rate",info="Rate",interactive=True)
volume = gr.Slider(-100,100,step=1,value=0,label="Volume",info="Volume",interactive=True)
pitch = gr.Slider(-100,100,step=1,value=0,label="Pitch",info="Pitch",interactive=True)
interface = gr.Interface(
fn=text_to_speech_edge,
inputs=[input_text, language, rate, volume,pitch],
outputs=[output_text, output_audio],
title="Edge-TTS",
description="Microsoft Edge Text-To-Speech (Created by Yash Chouhan)",
)
interface.launch(share=True)