File size: 4,556 Bytes
9e41260
889e1fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
909d366
889e1fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
from moviepy.editor import AudioFileClip
from wand.image import Image
from wand.drawing import Drawing
from wand.color import Color

# 获取所有可用的语音
async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

# 文字转语音功能
async def text_to_speech(text, voice, rate, pitch):
    if not text.strip():
        return None, gr.Warning("Please enter the text to convert.")
    if not voice:
        return None, gr.Warning("Please select a voice.")
    
    voice_short_name = voice.split(" - ")[0]
    rate_str = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    return tmp_path, None

# SRT文件生成
def generate_srt(pages, audio_clips):
    srt_path = os.path.join(tempfile.gettempdir(), "output_subtitles.srt")
    with open(srt_path, 'w', encoding='utf-8') as srt_file:
        for i, (page, audio_clip) in enumerate(zip(pages, audio_clips)):
            start_time = sum(audio_clip.duration for audio_clip in audio_clips[:i])
            end_time = start_time + audio_clip.duration
            
            # Convert to SRT format
            start_time_str = format_srt_time(start_time)
            end_time_str = format_srt_time(end_time)
            srt_file.write(f"{i + 1}\n{start_time_str} --> {end_time_str}\n{page}\n\n")
    
    return srt_path

def format_srt_time(seconds):
    millis = int((seconds - int(seconds)) * 1000)
    seconds = int(seconds)
    minutes = seconds // 60
    hours = minutes // 60
    minutes %= 60
    seconds %= 60
    return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"

# 文字转音频和SRT功能
async def text_to_audio_and_srt(text, voice, rate, pitch):
    # 计算每页可以容纳的行数和每行可以容纳的字符数
    max_chars_per_line = 60  # 适当设置每行最大字符数
    max_lines_per_page = 5    # 每页最大行数

    # 按页拆分文本
    words = text.split()
    lines = []
    current_line = ""
    pages = []
    
    for word in words:
        if len(current_line) + len(word) + 1 > max_chars_per_line:
            lines.append(current_line)
            current_line = word
            if len(lines) == max_lines_per_page:
                pages.append("\n".join(lines))
                lines = []
        else:
            current_line = f"{current_line} {word}".strip()
    
    lines.append(current_line)
    if lines:
        pages.append("\n".join(lines))

    # 为每页生成独立音频
    audio_clips = []
    for page in pages:
        audio_text = page.replace("\n", " ")  # 移除换行符以防止 TTS 停顿
        audio, warning = await text_to_speech(audio_text, voice, rate, pitch)
        if warning:
            return None, None, warning
        audio_clip = AudioFileClip(audio)
        audio_clips.append(audio_clip)

    # 生成SRT文件
    srt_path = generate_srt(pages, audio_clips)
    return audio, srt_path, None

# Gradio接口函数
def tts_interface(text, voice, rate, pitch):
    audio_path, srt_path, warning = asyncio.run(text_to_audio_and_srt(text, voice, rate, pitch))
    return audio_path, srt_path, warning

# 创建Gradio应用
async def create_demo():
    voices = await get_voices()
    
    demo = gr.Interface(
        fn=tts_interface,
        inputs=[
            gr.Textbox(label="Input Text", lines=5),
            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
            gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
        ],
        outputs=[
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.File(label="Generated SRT", file_count="single"),
            gr.Markdown(label="Warning", visible=False)
        ],
        title="Edge TTS Text to Speech with Subtitles",
        description="Convert text to speech and generate subtitles (SRT) using Microsoft Edge TTS.",
        analytics_enabled=False,
        allow_flagging=False,
    )
    
    return demo

# 运行应用
if __name__ == "__main__":
    demo = asyncio.run(create_demo())
    demo.launch()