File size: 3,410 Bytes
6712d1e
 
e203fd6
 
 
 
 
 
6712d1e
 
 
e203fd6
 
 
 
6712d1e
e203fd6
 
203d73d
e203fd6
203d73d
6712d1e
 
 
 
 
 
 
 
 
 
e203fd6
 
 
 
6712d1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e203fd6
203d73d
6712d1e
203d73d
6712d1e
 
654df10
e203fd6
 
 
6712d1e
e203fd6
 
 
 
6712d1e
e203fd6
 
 
 
 
47bc9a6
e203fd6
 
 
654df10
e203fd6
ce17cdb
 
e203fd6
 
 
203d73d
 
 
 
 
e203fd6
203d73d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import sys

import gradio as gr
import edge_tts
import asyncio
import tempfile
import os

from edge_tts import SubMaker


async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}


async def text_to_speech(text, voice, rate, pitch):
    if not text.strip():
        return None, "Please enter text to convert."
    if not voice:
        return None, "Please select a voice."

    # 创建临时文件
    audio_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    audio_path = audio_tmp.name
    audio_tmp.close()  # 关闭文件句柄以便后续重新打开

    sub_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
    sub_path = sub_tmp.name
    sub_tmp.close()

    voice_short_name = voice.split(" - ")[0]
    rate_str = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
    submaker = SubMaker()

    audio_file = None
    sub_file = None
    try:
        # 打开临时文件进行写入
        audio_file = open(audio_path, "wb")
        sub_file = open(sub_path, "w", encoding="utf-8")

        async for chunk in communicate.stream():
            if chunk["type"] == "audio":
                audio_file.write(chunk["data"])
            elif chunk["type"] == "WordBoundary":
                submaker.feed(chunk)
        submaker.merge_cues(12)
        # 写入字幕内容
        sub_file.write(submaker.get_srt())

    except Exception as e:
        # 清理临时文件
        if os.path.exists(audio_path):
            os.remove(audio_path)
        if os.path.exists(sub_path):
            os.remove(sub_path)
        return None, None, str(e)
    finally:
        # 确保文件正确关闭
        if audio_file:
            audio_file.close()
        if sub_file:
            sub_file.close()

    return audio_path, sub_path, None

async def tts_interface(text, voice, rate, pitch):
    audio, srt, warning = await text_to_speech(text, voice, rate, pitch)
    if warning:
        return audio, srt, gr.Warning(warning)
    return audio, srt,  None

async def create_demo():
    voices = await get_voices()
    

    demo = gr.Interface(
        fn=tts_interface,
        inputs=[
            gr.Textbox(label="Input Text", lines=5),
            gr.Dropdown(choices=[""] + list(voices.keys()), label="选择配音员", value=""),
            gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
        ],
        outputs=[
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.File(label="下载 SRT 字幕文件"),  # 直接提供 SRT 下载
            gr.Markdown(label="Warning", visible=False)
        ],
        title="Edge TTS Text-to-Speech",
        article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
        analytics_enabled=False,
        allow_flagging="manual",
        api_name=None
    )
    return demo

async def main():
    demo = await create_demo()
    demo.queue(default_concurrency_limit=5)
    demo.launch(show_api=False)

if __name__ == "__main__":
    asyncio.run(main())