HoneyTian commited on
Commit
44edd1d
1 Parent(s): 4d0a327
data/e_book/confucianism/{the_analects.txt → the_analects/the_analects.txt} RENAMED
File without changes
main.py CHANGED
@@ -7,12 +7,10 @@ import json
7
  import logging
8
  from pathlib import Path
9
  import platform
10
- import re
11
- import tempfile
12
  from typing import List
13
  import uuid
14
 
15
- from project_settings import project_path, log_directory, edge_tts_record_file, edge_tts_temp_directory
16
  import log
17
 
18
  log.setup(log_directory=log_directory)
@@ -21,6 +19,9 @@ import aiofiles
21
  import anyio
22
  import edge_tts
23
  import gradio as gr
 
 
 
24
 
25
  from toolbox.os.command import Command
26
 
@@ -48,19 +49,57 @@ async def edge_tts_get_speakers() -> List[str]:
48
  return edge_tts_speakers_choices
49
 
50
 
51
- async def edge_tts_text_to_speech(text: str, speaker: str):
 
 
52
  communicate = edge_tts.Communicate(text, speaker)
53
 
54
- filename = edge_tts_temp_directory / "{}.wav".format(uuid.uuid4())
55
- async with aiofiles.open(edge_tts_record_file.as_posix(), "a+", encoding="utf-8") as f:
56
- await f.write(json.dumps({
57
- "text": text,
58
- "speaker": speaker,
59
- "filename": filename.as_posix(),
60
- }, ensure_ascii=False))
 
 
61
 
62
- await communicate.save(filename)
63
- return filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
 
66
  def shell(cmd: str):
@@ -80,15 +119,6 @@ def main():
80
  gr.Markdown(value=title)
81
 
82
  with gr.Tabs():
83
- with gr.TabItem("Ebook Reading"):
84
- e_book_reading_file = gr.File(
85
- value=(project_path / "data/e_book/confucianism/the_analects.txt").as_posix(),
86
- label="txt"
87
- )
88
- e_book_reading_tts_engine = gr.Dropdown(
89
- choices=["Edge TTS"], value="Edge TTS", label="tts_engine"
90
- )
91
- e_book_reading_button = gr.Button(value="e_book_reading", variant="primary")
92
  with gr.TabItem("Edge TTS"):
93
  edge_tts_text = gr.Textbox(value="学而时习之,不亦悦乎。", lines=4, max_lines=50, label="text")
94
  edge_tts_speaker = gr.Dropdown(choices=edge_tts_speakers_choices, value="zh-CN-XiaoxiaoNeural", label="speakers")
@@ -96,30 +126,52 @@ def main():
96
  edge_tts_audio = gr.Audio(type="filepath", label="audio", autoplay=True)
97
 
98
  edge_tts_button = gr.Button(value="edge_tts", variant="primary")
99
- edge_tts_button.click(
100
- edge_tts_text_to_speech,
101
- inputs=[
102
- edge_tts_text,
103
- edge_tts_speaker,
104
- ],
105
- outputs=[
106
- edge_tts_audio
107
- ],
108
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  with gr.TabItem("shell"):
110
  shell_text = gr.Textbox(label="cmd")
111
  shell_button = gr.Button("run")
112
  shell_output = gr.Textbox(label="output")
113
 
114
- shell_button.click(
115
- shell,
116
- inputs=[
117
- shell_text,
118
- ],
119
- outputs=[
120
- shell_output
121
- ],
122
- )
123
 
124
  launch = partial(
125
  blocks.queue().launch,
 
7
  import logging
8
  from pathlib import Path
9
  import platform
 
 
10
  from typing import List
11
  import uuid
12
 
13
+ from project_settings import project_path, log_directory, temp_directory, edge_tts_temp_directory
14
  import log
15
 
16
  log.setup(log_directory=log_directory)
 
19
  import anyio
20
  import edge_tts
21
  import gradio as gr
22
+ import librosa
23
+ from scipy.io import wavfile
24
+ import spacy
25
 
26
  from toolbox.os.command import Command
27
 
 
49
  return edge_tts_speakers_choices
50
 
51
 
52
+ async def edge_tts_text_to_speech(text: str, speaker: str, audio_dir: Path = edge_tts_temp_directory):
53
+ # tts
54
+ main_logger.info(f"edge tts; speaker: {speaker}; text: {text}")
55
  communicate = edge_tts.Communicate(text, speaker)
56
 
57
+ # save audio
58
+ audio_dir.mkdir(parents=True, exist_ok=True)
59
+ audio_file = audio_dir / "{}.wav".format(uuid.uuid4())
60
+ audio_file = audio_file.as_posix()
61
+ record_file = audio_dir / "edge_tts.jsonl"
62
+ try:
63
+ await communicate.save(audio_file)
64
+ except edge_tts.exceptions.NoAudioReceived:
65
+ audio_file = None
66
 
67
+ # save record
68
+ async with aiofiles.open(record_file.as_posix(), "a+", encoding="utf-8") as f:
69
+ row = json.dumps({
70
+ "text": text,
71
+ "speaker": speaker,
72
+ "filename": audio_file,
73
+ },
74
+ ensure_ascii=False)
75
+ await f.write("{}\n".format(row))
76
+ return audio_file
77
+
78
+
79
+ spacy_model = spacy.load("zh_core_web_sm")
80
+
81
+
82
+ async def e_book_reading(txt_file: str, speaker: str):
83
+ txt_file = Path(txt_file)
84
+
85
+ audio_dir = temp_directory / "e_book_reading" / txt_file.stem / speaker
86
+
87
+ while True:
88
+ async with aiofiles.open(txt_file.as_posix(), "r", encoding="utf-8") as f:
89
+ data = await f.read()
90
+ doc = spacy_model(data)
91
+ for sentence in doc.sents:
92
+ text = sentence.text.strip()
93
+ if len(text) == 0:
94
+ continue
95
+ filename = await edge_tts_text_to_speech(text=text, speaker=speaker, audio_dir=audio_dir)
96
+
97
+ # sample_rate, signal = wavfile.read(filename)
98
+ signal, sample_rate = librosa.load(filename)
99
+ duration = len(signal) / sample_rate
100
+
101
+ yield filename
102
+ await asyncio.sleep(duration)
103
 
104
 
105
  def shell(cmd: str):
 
119
  gr.Markdown(value=title)
120
 
121
  with gr.Tabs():
 
 
 
 
 
 
 
 
 
122
  with gr.TabItem("Edge TTS"):
123
  edge_tts_text = gr.Textbox(value="学而时习之,不亦悦乎。", lines=4, max_lines=50, label="text")
124
  edge_tts_speaker = gr.Dropdown(choices=edge_tts_speakers_choices, value="zh-CN-XiaoxiaoNeural", label="speakers")
 
126
  edge_tts_audio = gr.Audio(type="filepath", label="audio", autoplay=True)
127
 
128
  edge_tts_button = gr.Button(value="edge_tts", variant="primary")
129
+ edge_tts_button.click(
130
+ edge_tts_text_to_speech,
131
+ inputs=[
132
+ edge_tts_text,
133
+ edge_tts_speaker,
134
+ ],
135
+ outputs=[
136
+ edge_tts_audio
137
+ ],
138
+ )
139
+
140
+ with gr.TabItem("Ebook Reading"):
141
+ e_book_reading_file = gr.File(
142
+ value=(project_path / "data/e_book/confucianism/the_analects/the_analects.txt").as_posix(),
143
+ label="txt"
144
+ )
145
+ e_book_reading_speaker = gr.Dropdown(choices=edge_tts_speakers_choices, value="zh-CN-XiaoxiaoNeural", label="speakers")
146
+
147
+ e_book_reading_audio = gr.Audio(type="filepath", label="audio", streaming=True)
148
+ e_book_reading_button = gr.Button(value="e_book_reading", variant="primary")
149
+
150
+ e_book_reading_button.click(
151
+ e_book_reading,
152
+ inputs=[
153
+ e_book_reading_file,
154
+ e_book_reading_speaker,
155
+ ],
156
+ outputs=[
157
+ e_book_reading_audio
158
+ ],
159
+ )
160
+
161
  with gr.TabItem("shell"):
162
  shell_text = gr.Textbox(label="cmd")
163
  shell_button = gr.Button("run")
164
  shell_output = gr.Textbox(label="output")
165
 
166
+ shell_button.click(
167
+ shell,
168
+ inputs=[
169
+ shell_text,
170
+ ],
171
+ outputs=[
172
+ shell_output
173
+ ],
174
+ )
175
 
176
  launch = partial(
177
  blocks.queue().launch,
project_settings.py CHANGED
@@ -18,8 +18,6 @@ temp_directory.mkdir(parents=True, exist_ok=True)
18
  edge_tts_temp_directory = temp_directory / "edge_tts"
19
  edge_tts_temp_directory.mkdir(parents=True, exist_ok=True)
20
 
21
- edge_tts_record_file = edge_tts_temp_directory / "edge_tts.jsonl"
22
-
23
  environment = EnvironmentManager(
24
  path=os.path.join(project_path, "dotenv"),
25
  env=os.environ.get("environment", "dev"),
 
18
  edge_tts_temp_directory = temp_directory / "edge_tts"
19
  edge_tts_temp_directory.mkdir(parents=True, exist_ok=True)
20
 
 
 
21
  environment = EnvironmentManager(
22
  path=os.path.join(project_path, "dotenv"),
23
  env=os.environ.get("environment", "dev"),
requirements.txt CHANGED
@@ -2,3 +2,5 @@ gradio==4.38.1
2
  python-dotenv==1.0.1
3
  spacy==3.7.5
4
  edge-tts==6.1.12
 
 
 
2
  python-dotenv==1.0.1
3
  spacy==3.7.5
4
  edge-tts==6.1.12
5
+ scipy==1.14.0
6
+ librosa==0.10.2.post1