Spaces:

GroveStreet
/

GTA_SOVITS

Running

App Files Files Community

Katock commited on Nov 18, 2023

Commit

96a9860

1 Parent(s): 88d3449

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -16

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ import librosa
 import numpy as np
 import soundfile
 from scipy.io import wavfile
 from inference.infer_tool import Svc
@@ -20,10 +22,10 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
 sampling_rate = 44100
 tts_voice = {
-    "中文_男": "zh-CN-YunxiNeural",
-    "中文_女": "zh-CN-XiaoyiNeural",
-    "英文_男": "en-US-GuyNeural",
-    "英文_女": "en-US-AnaNeural"
 }
@@ -56,15 +58,22 @@ def create_fn(model, spk):
         input_text = re.sub(r"[\n\,\(\) ]", "", input_text)
         voice = tts_voice[gender]
         ratestr = "+{:.0%}".format(tts_rate) if tts_rate >= 0 else "{:.0%}".format(tts_rate)
-        temp_path = "temp.wav"
-        p = subprocess.Popen("edge-tts " +
-                             " --text " + input_text +
-                             " --write-media " + temp_path +
-                             " --voice " + voice +
-                             " --rate=" + ratestr, shell=True,
-                             stdout=subprocess.PIPE,
-                             stdin=subprocess.PIPE)
-        p.wait()
         audio, sr = librosa.load(temp_path)
         audio = librosa.resample(audio, orig_sr=sr, target_sr=sampling_rate)
         os.remove(temp_path)
@@ -93,7 +102,7 @@ if __name__ == '__main__':
         gr.Markdown(
             "# <center> 游戏角色语音生成\n"
             "## <center> 模型作者：B站[Cyber蝈蝈总](https://space.bilibili.com/37706580)\n"
-            "<center> 罪恶都市人物AI语音请移步[GTAVC_SOVITS](https://huggingface.co/spaces/GroveStreet/GTAVC_SOVITS)\n"
             "<center> 使用此资源创作的作品请标明出处，CJ有两个模型，carl1更清晰，carl2音域广\n"
         )
         with gr.Tabs():
@@ -117,8 +126,8 @@ if __name__ == '__main__':
                                     tts_input = gr.Textbox(label='说话内容', value='',
                                                            placeholder='已支持无限长内容，处理时间约为说完原内容时间的5倍')
                                     with gr.Row():
-                                        gender = gr.Radio(label='说话人性别 (男音调低，女音调高)', value='男',
-                                                          choices=['中文_男', '中文_女', '英文_男', '英文_女'])
                                         tts_rate = gr.Number(label='语速 (正负, 单位百分比)', value=0)
                                     tts_submit = gr.Button("生成", variant="primary")

 import numpy as np
 import soundfile
 from scipy.io import wavfile
+import tempfile
+import edge_tts
 from inference.infer_tool import Svc
 sampling_rate = 44100
 tts_voice = {
+    "中文男": "zh-CN-YunxiNeural",
+    "中文女": "zh-CN-XiaoyiNeural",
+    "英文男": "en-US-EricNeural",
+    "英文女": "en-US-AnaNeural"
 }
         input_text = re.sub(r"[\n\,\(\) ]", "", input_text)
         voice = tts_voice[gender]
         ratestr = "+{:.0%}".format(tts_rate) if tts_rate >= 0 else "{:.0%}".format(tts_rate)
+        # temp_path = "temp.wav"
+        # p = subprocess.Popen("edge-tts " +
+        #                      " --text " + input_text +
+        #                      " --write-media " + temp_path +
+        #                      " --voice " + voice +
+        #                      " --rate=" + ratestr, shell=True,
+        #                      stdout=subprocess.PIPE,
+        #                      stdin=subprocess.PIPE)
+        # p.wait()
+        communicate = edge_tts.Communicate(text=input_text,
+                                           voice=voice,
+                                           rate=ratestr)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
+            temp_path = temp_file.name
+        await communicate.save(temp_path)
         audio, sr = librosa.load(temp_path)
         audio = librosa.resample(audio, orig_sr=sr, target_sr=sampling_rate)
         os.remove(temp_path)
         gr.Markdown(
             "# <center> 游戏角色语音生成\n"
             "## <center> 模型作者：B站[Cyber蝈蝈总](https://space.bilibili.com/37706580)\n"
+            "<center> 罪恶都市人物AI语音请移步[GTAVC_SOVITS](https://huggingface.co/spaces/GroveStreet/GTAVC_SOVITS) \n"
             "<center> 使用此资源创作的作品请标明出处，CJ有两个模型，carl1更清晰，carl2音域广\n"
         )
         with gr.Tabs():
                                     tts_input = gr.Textbox(label='说话内容', value='',
                                                            placeholder='已支持无限长内容，处理时间约为说完原内容时间的5倍')
                                     with gr.Row():
+                                        gender = gr.Radio(label='说话人性别 (男音调低，女音调高)', value='中文男',
+                                                          choices=['中文男', '中文女', '英文男', '英文女'])
                                         tts_rate = gr.Number(label='语速 (正负, 单位百分比)', value=0)
                                     tts_submit = gr.Button("生成", variant="primary")