wenkai26 commited on
Commit
6712d1e
·
1 Parent(s): 5367683
Files changed (2) hide show
  1. app.py +55 -27
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,68 +1,96 @@
 
 
1
  import gradio as gr
2
  import edge_tts
3
  import asyncio
4
  import tempfile
5
  import os
6
 
 
 
 
7
  async def get_voices():
8
  voices = await edge_tts.list_voices()
9
  return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
10
 
 
11
  async def text_to_speech(text, voice, rate, pitch):
12
  if not text.strip():
13
  return None, "Please enter text to convert."
14
  if not voice:
15
  return None, "Please select a voice."
16
-
 
 
 
 
 
 
 
 
 
17
  voice_short_name = voice.split(" - ")[0]
18
  rate_str = f"{rate:+d}%"
19
  pitch_str = f"{pitch:+d}Hz"
20
  communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
21
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
22
- tmp_path = tmp_file.name
23
- await communicate.save(tmp_path)
24
- return tmp_path, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  async def tts_interface(text, voice, rate, pitch):
27
- audio, warning = await text_to_speech(text, voice, rate, pitch)
28
  if warning:
29
- return audio, gr.Warning(warning)
30
- return audio, None
31
 
32
  async def create_demo():
33
  voices = await get_voices()
34
 
35
- description = """
36
- Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
37
-
38
- 🎥 **Exciting News: Introducing our Text-to-Video Converter!** 🎥
39
-
40
- Take your content creation to the next level with our cutting-edge Text-to-Video Converter!
41
- Transform your words into stunning, professional-quality videos in just a few clicks.
42
-
43
- ✨ Features:
44
- • Convert text to engaging videos with customizable visuals
45
- • Choose from 40+ languages and 300+ voices
46
- • Perfect for creating audiobooks, storytelling, and language learning materials
47
- • Ideal for educators, content creators, and language enthusiasts
48
-
49
- Ready to revolutionize your content? [Click here to try our Text-to-Video Converter now!](https://text2video.wingetgui.com/)
50
- """
51
-
52
  demo = gr.Interface(
53
  fn=tts_interface,
54
  inputs=[
55
  gr.Textbox(label="Input Text", lines=5),
56
- gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
57
  gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
58
  gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
59
  ],
60
  outputs=[
61
  gr.Audio(label="Generated Audio", type="filepath"),
 
62
  gr.Markdown(label="Warning", visible=False)
63
  ],
64
  title="Edge TTS Text-to-Speech",
65
- description=description,
66
  article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
67
  analytics_enabled=False,
68
  allow_flagging="manual",
 
1
+ import sys
2
+
3
  import gradio as gr
4
  import edge_tts
5
  import asyncio
6
  import tempfile
7
  import os
8
 
9
+ from edge_tts import SubMaker
10
+
11
+
12
  async def get_voices():
13
  voices = await edge_tts.list_voices()
14
  return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
15
 
16
+
17
  async def text_to_speech(text, voice, rate, pitch):
18
  if not text.strip():
19
  return None, "Please enter text to convert."
20
  if not voice:
21
  return None, "Please select a voice."
22
+
23
+ # 创建临时文件
24
+ audio_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
25
+ audio_path = audio_tmp.name
26
+ audio_tmp.close() # 关闭文件句柄以便后续重新打开
27
+
28
+ sub_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
29
+ sub_path = sub_tmp.name
30
+ sub_tmp.close()
31
+
32
  voice_short_name = voice.split(" - ")[0]
33
  rate_str = f"{rate:+d}%"
34
  pitch_str = f"{pitch:+d}Hz"
35
  communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
36
+ submaker = SubMaker()
37
+
38
+ audio_file = None
39
+ sub_file = None
40
+ try:
41
+ # 打开临时文件进行写入
42
+ audio_file = open(audio_path, "wb")
43
+ sub_file = open(sub_path, "w", encoding="utf-8")
44
+
45
+ async for chunk in communicate.stream():
46
+ if chunk["type"] == "audio":
47
+ audio_file.write(chunk["data"])
48
+ elif chunk["type"] == "WordBoundary":
49
+ submaker.feed(chunk)
50
+ submaker.merge_cues(12)
51
+ # 写入字幕内容
52
+ sub_file.write(submaker.get_srt())
53
+
54
+ except Exception as e:
55
+ # 清理临时文件
56
+ if os.path.exists(audio_path):
57
+ os.remove(audio_path)
58
+ if os.path.exists(sub_path):
59
+ os.remove(sub_path)
60
+ return None, None, str(e)
61
+ finally:
62
+ # 确保文件正确关闭
63
+ if audio_file:
64
+ audio_file.close()
65
+ if sub_file:
66
+ sub_file.close()
67
+
68
+ return audio_path, sub_path, None
69
 
70
  async def tts_interface(text, voice, rate, pitch):
71
+ audio, srt, warning = await text_to_speech(text, voice, rate, pitch)
72
  if warning:
73
+ return audio, srt, gr.Warning(warning)
74
+ return audio, srt, None
75
 
76
  async def create_demo():
77
  voices = await get_voices()
78
 
79
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  demo = gr.Interface(
81
  fn=tts_interface,
82
  inputs=[
83
  gr.Textbox(label="Input Text", lines=5),
84
+ gr.Dropdown(choices=[""] + list(voices.keys()), label="选择配音员", value=""),
85
  gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
86
  gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
87
  ],
88
  outputs=[
89
  gr.Audio(label="Generated Audio", type="filepath"),
90
+ gr.Text(label="Generated Srt", type="text"),
91
  gr.Markdown(label="Warning", visible=False)
92
  ],
93
  title="Edge TTS Text-to-Speech",
 
94
  article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
95
  analytics_enabled=False,
96
  allow_flagging="manual",
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- edge_tts==6.1.12
2
  gradio==4.36.1
 
1
+ edge_tts==7.0.0
2
  gradio==4.36.1