File size: 3,410 Bytes
6712d1e e203fd6 6712d1e e203fd6 6712d1e e203fd6 203d73d e203fd6 203d73d 6712d1e e203fd6 6712d1e e203fd6 203d73d 6712d1e 203d73d 6712d1e 654df10 e203fd6 6712d1e e203fd6 6712d1e e203fd6 47bc9a6 e203fd6 654df10 e203fd6 ce17cdb e203fd6 203d73d e203fd6 203d73d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import sys
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
from edge_tts import SubMaker
async def get_voices():
voices = await edge_tts.list_voices()
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None, "Please enter text to convert."
if not voice:
return None, "Please select a voice."
# 创建临时文件
audio_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
audio_path = audio_tmp.name
audio_tmp.close() # 关闭文件句柄以便后续重新打开
sub_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
sub_path = sub_tmp.name
sub_tmp.close()
voice_short_name = voice.split(" - ")[0]
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
submaker = SubMaker()
audio_file = None
sub_file = None
try:
# 打开临时文件进行写入
audio_file = open(audio_path, "wb")
sub_file = open(sub_path, "w", encoding="utf-8")
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_file.write(chunk["data"])
elif chunk["type"] == "WordBoundary":
submaker.feed(chunk)
submaker.merge_cues(12)
# 写入字幕内容
sub_file.write(submaker.get_srt())
except Exception as e:
# 清理临时文件
if os.path.exists(audio_path):
os.remove(audio_path)
if os.path.exists(sub_path):
os.remove(sub_path)
return None, None, str(e)
finally:
# 确保文件正确关闭
if audio_file:
audio_file.close()
if sub_file:
sub_file.close()
return audio_path, sub_path, None
async def tts_interface(text, voice, rate, pitch):
audio, srt, warning = await text_to_speech(text, voice, rate, pitch)
if warning:
return audio, srt, gr.Warning(warning)
return audio, srt, None
async def create_demo():
voices = await get_voices()
demo = gr.Interface(
fn=tts_interface,
inputs=[
gr.Textbox(label="Input Text", lines=5),
gr.Dropdown(choices=[""] + list(voices.keys()), label="选择配音员", value=""),
gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
],
outputs=[
gr.Audio(label="Generated Audio", type="filepath"),
gr.File(label="下载 SRT 字幕文件"), # 直接提供 SRT 下载
gr.Markdown(label="Warning", visible=False)
],
title="Edge TTS Text-to-Speech",
article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
analytics_enabled=False,
allow_flagging="manual",
api_name=None
)
return demo
async def main():
demo = await create_demo()
demo.queue(default_concurrency_limit=5)
demo.launch(show_api=False)
if __name__ == "__main__":
asyncio.run(main())
|