Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,8 @@ import librosa
|
|
9 |
import numpy as np
|
10 |
import soundfile
|
11 |
from scipy.io import wavfile
|
|
|
|
|
12 |
|
13 |
from inference.infer_tool import Svc
|
14 |
|
@@ -20,10 +22,10 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
|
20 |
sampling_rate = 44100
|
21 |
|
22 |
tts_voice = {
|
23 |
-
"
|
24 |
-
"
|
25 |
-
"
|
26 |
-
"
|
27 |
}
|
28 |
|
29 |
|
@@ -56,15 +58,22 @@ def create_fn(model, spk):
|
|
56 |
input_text = re.sub(r"[\n\,\(\) ]", "", input_text)
|
57 |
voice = tts_voice[gender]
|
58 |
ratestr = "+{:.0%}".format(tts_rate) if tts_rate >= 0 else "{:.0%}".format(tts_rate)
|
59 |
-
temp_path = "temp.wav"
|
60 |
-
p = subprocess.Popen("edge-tts " +
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
p.wait()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
audio, sr = librosa.load(temp_path)
|
69 |
audio = librosa.resample(audio, orig_sr=sr, target_sr=sampling_rate)
|
70 |
os.remove(temp_path)
|
@@ -93,7 +102,7 @@ if __name__ == '__main__':
|
|
93 |
gr.Markdown(
|
94 |
"# <center> 游戏角色语音生成\n"
|
95 |
"## <center> 模型作者:B站[Cyber蝈蝈总](https://space.bilibili.com/37706580)\n"
|
96 |
-
"<center> 罪恶都市人物AI语音请移步[GTAVC_SOVITS](https://huggingface.co/spaces/GroveStreet/GTAVC_SOVITS)\n"
|
97 |
"<center> 使用此资源创作的作品请标明出处,CJ有两个模型,carl1更清晰,carl2音域广\n"
|
98 |
)
|
99 |
with gr.Tabs():
|
@@ -117,8 +126,8 @@ if __name__ == '__main__':
|
|
117 |
tts_input = gr.Textbox(label='说话内容', value='',
|
118 |
placeholder='已支持无限长内容,处理时间约为说完原内容时间的5倍')
|
119 |
with gr.Row():
|
120 |
-
gender = gr.Radio(label='说话人性别 (男音调低,女音调高)', value='
|
121 |
-
choices=['
|
122 |
tts_rate = gr.Number(label='语速 (正负, 单位百分比)', value=0)
|
123 |
tts_submit = gr.Button("生成", variant="primary")
|
124 |
|
|
|
9 |
import numpy as np
|
10 |
import soundfile
|
11 |
from scipy.io import wavfile
|
12 |
+
import tempfile
|
13 |
+
import edge_tts
|
14 |
|
15 |
from inference.infer_tool import Svc
|
16 |
|
|
|
22 |
sampling_rate = 44100
|
23 |
|
24 |
tts_voice = {
|
25 |
+
"中文男": "zh-CN-YunxiNeural",
|
26 |
+
"中文女": "zh-CN-XiaoyiNeural",
|
27 |
+
"英文男": "en-US-EricNeural",
|
28 |
+
"英文女": "en-US-AnaNeural"
|
29 |
}
|
30 |
|
31 |
|
|
|
58 |
input_text = re.sub(r"[\n\,\(\) ]", "", input_text)
|
59 |
voice = tts_voice[gender]
|
60 |
ratestr = "+{:.0%}".format(tts_rate) if tts_rate >= 0 else "{:.0%}".format(tts_rate)
|
61 |
+
# temp_path = "temp.wav"
|
62 |
+
# p = subprocess.Popen("edge-tts " +
|
63 |
+
# " --text " + input_text +
|
64 |
+
# " --write-media " + temp_path +
|
65 |
+
# " --voice " + voice +
|
66 |
+
# " --rate=" + ratestr, shell=True,
|
67 |
+
# stdout=subprocess.PIPE,
|
68 |
+
# stdin=subprocess.PIPE)
|
69 |
+
# p.wait()
|
70 |
+
communicate = edge_tts.Communicate(text=input_text,
|
71 |
+
voice=voice,
|
72 |
+
rate=ratestr)
|
73 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
|
74 |
+
temp_path = temp_file.name
|
75 |
+
await communicate.save(temp_path)
|
76 |
+
|
77 |
audio, sr = librosa.load(temp_path)
|
78 |
audio = librosa.resample(audio, orig_sr=sr, target_sr=sampling_rate)
|
79 |
os.remove(temp_path)
|
|
|
102 |
gr.Markdown(
|
103 |
"# <center> 游戏角色语音生成\n"
|
104 |
"## <center> 模型作者:B站[Cyber蝈蝈总](https://space.bilibili.com/37706580)\n"
|
105 |
+
"<center> 罪恶都市人物AI语音请移步[GTAVC_SOVITS](https://huggingface.co/spaces/GroveStreet/GTAVC_SOVITS) \n"
|
106 |
"<center> 使用此资源创作的作品请标明出处,CJ有两个模型,carl1更清晰,carl2音域广\n"
|
107 |
)
|
108 |
with gr.Tabs():
|
|
|
126 |
tts_input = gr.Textbox(label='说话内容', value='',
|
127 |
placeholder='已支持无限长内容,处理时间约为说完原内容时间的5倍')
|
128 |
with gr.Row():
|
129 |
+
gender = gr.Radio(label='说话人性别 (男音调低,女音调高)', value='中文男',
|
130 |
+
choices=['中文男', '中文女', '英文男', '英文女'])
|
131 |
tts_rate = gr.Number(label='语速 (正负, 单位百分比)', value=0)
|
132 |
tts_submit = gr.Button("生成", variant="primary")
|
133 |
|