Katock commited on
Commit
a73a5ce
·
1 Parent(s): 054c3da
Files changed (1) hide show
  1. app.py +54 -35
app.py CHANGED
@@ -2,7 +2,7 @@ import argparse
2
  import io
3
  import logging
4
  import os
5
-
6
  import gradio as gr
7
  import gradio.processing_utils as gr_processing_utils
8
  import librosa
@@ -19,36 +19,16 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
19
  limitation = os.getenv("SYSTEM") == "spaces" # limit audio length in huggingface spaces
20
 
21
 
22
- # audio_postprocess_ori = gr.Audio.postprocess
23
- #
24
- #
25
- # def audio_postprocess(self, y):
26
- # data = audio_postprocess_ori(self, y)
27
- # if data is None:
28
- # return None
29
- # return gr_processing_utils.encode_url_or_file_to_base64(data["name"])
30
-
31
-
32
- # gr.Audio.postprocess = audio_postprocess
33
-
34
-
35
- def create_vc_fn(model, spk):
36
- def vc_fn(input_audio, vc_transform, auto_f0, f0p):
37
  if input_audio is None:
38
  return "请先上传音频", None
39
  sampling_rate, audio = input_audio
40
-
41
  audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
42
  if len(audio.shape) > 1:
43
  audio = librosa.to_mono(audio.transpose(1, 0))
44
-
45
- # raw_audio_path = io.BytesIO()
46
- # soundfile.write(raw_audio_path, audio, sampling_rate, format="wav")
47
- # raw_audio_path.seek(0)
48
-
49
  temp_path = "temp.wav"
50
  soundfile.write(temp_path, audio, sampling_rate, format="wav")
51
-
52
  out_audio = model.slice_inference(raw_audio_path=temp_path,
53
  spk=spk,
54
  slice_db=-40,
@@ -58,12 +38,38 @@ def create_vc_fn(model, spk):
58
  tran=vc_transform,
59
  f0_predictor=f0p,
60
  auto_predict_f0=auto_f0)
61
-
62
  os.remove(temp_path)
 
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  return 44100, out_audio
65
 
66
- return vc_fn
67
 
68
 
69
  if __name__ == '__main__':
@@ -78,24 +84,36 @@ if __name__ == '__main__':
78
  name = f
79
  model = Svc(fr"models/{f}/{f}.pth", f"models/{f}/config_{f}.json", device=args.device)
80
  cover = f"models/{f}/cover.png" if os.path.exists(f"models/{f}/cover.png") else None
81
- models.append((name, cover, create_vc_fn(model, name)))
82
  with gr.Blocks() as app:
83
  gr.Markdown(
84
  "# <center> GTASA人物SOVITS4.1(升级中,可能有bug)\n"
85
  "## <center> 模型作者:B站Cyber蝈蝈总\n"
86
- "<center> 使用此处资源创作的作品,请显著标明模型出处(B站Cyber蝈蝈总),这是唯一的要求\n"
87
  )
88
  with gr.Tabs():
89
- for (name, cover, vc_fn) in models:
90
  with gr.TabItem(name):
91
  with gr.Row():
92
  with gr.Column():
93
- vc_input = gr.Audio(label="上传干声 (已支持长音频)" if limitation else '')
94
- vc_transform = gr.Number(label="音高调整 (支持正负半音,12为一个八度)", value=0)
95
- auto_f0 = gr.Checkbox(label="自动音高预测 (正常说话可选)", value=False)
96
- f0_predictor = gr.Radio(label="f0预测器 (对电音有影响)",
97
- choices=['crepe', 'harvest', 'dio', 'pm'], value='crepe')
98
- vc_submit = gr.Button("生成", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  with gr.Column():
101
  gr.Markdown(
@@ -104,5 +122,6 @@ if __name__ == '__main__':
104
  '</div>'
105
  )
106
  vc_output = gr.Audio(label="输出音频")
107
- vc_submit.click(vc_fn, [vc_input, vc_transform, auto_f0, f0_predictor], vc_output)
 
108
  app.queue(concurrency_count=1, api_open=args.api).launch(share=args.share)
 
2
  import io
3
  import logging
4
  import os
5
+ import subprocess
6
  import gradio as gr
7
  import gradio.processing_utils as gr_processing_utils
8
  import librosa
 
19
  limitation = os.getenv("SYSTEM") == "spaces" # limit audio length in huggingface spaces
20
 
21
 
22
+ def create_fn(model, spk):
23
+ def svc_fn(input_audio, vc_transform, auto_f0, f0p):
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  if input_audio is None:
25
  return "请先上传音频", None
26
  sampling_rate, audio = input_audio
 
27
  audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
28
  if len(audio.shape) > 1:
29
  audio = librosa.to_mono(audio.transpose(1, 0))
 
 
 
 
 
30
  temp_path = "temp.wav"
31
  soundfile.write(temp_path, audio, sampling_rate, format="wav")
 
32
  out_audio = model.slice_inference(raw_audio_path=temp_path,
33
  spk=spk,
34
  slice_db=-40,
 
38
  tran=vc_transform,
39
  f0_predictor=f0p,
40
  auto_predict_f0=auto_f0)
 
41
  os.remove(temp_path)
42
+ return 44100, out_audio
43
 
44
+ def tts_fn(input_text, gender, tts_rate, vc_transform, auto_f0, f0p):
45
+ voice = "zh-CN-XiaoyiNeural" if gender == '女' else "zh-CN-YunxiNeural"
46
+ output_file = "temp.wav"
47
+ if tts_rate >= 0:
48
+ ratestr = "+{:.0%}".format(tts_rate)
49
+ elif tts_rate < 0:
50
+ ratestr = "{:.0%}".format(tts_rate) # 减号自带
51
+
52
+ p = subprocess.Popen("edge-tts " +
53
+ " --text " + input_text +
54
+ " --write-media " + output_file +
55
+ " --voice " + voice +
56
+ " --rate=" + ratestr, shell=True,
57
+ stdout=subprocess.PIPE,
58
+ stdin=subprocess.PIPE)
59
+ p.wait()
60
+ out_audio = model.slice_inference(raw_audio_path=output_file,
61
+ spk=spk,
62
+ slice_db=-40,
63
+ cluster_infer_ratio=0,
64
+ noice_scale=0.4,
65
+ clip_seconds=20,
66
+ tran=vc_transform,
67
+ f0_predictor=f0p,
68
+ auto_predict_f0=auto_f0)
69
+ os.remove(output_file)
70
  return 44100, out_audio
71
 
72
+ return svc_fn, tts_fn
73
 
74
 
75
  if __name__ == '__main__':
 
84
  name = f
85
  model = Svc(fr"models/{f}/{f}.pth", f"models/{f}/config_{f}.json", device=args.device)
86
  cover = f"models/{f}/cover.png" if os.path.exists(f"models/{f}/cover.png") else None
87
+ models.append((name, cover, create_fn(model, name)))
88
  with gr.Blocks() as app:
89
  gr.Markdown(
90
  "# <center> GTASA人物SOVITS4.1(升级中,可能有bug)\n"
91
  "## <center> 模型作者:B站Cyber蝈蝈总\n"
92
+ "<center> 使用此处资源创作的作品,请显著标明模型出处(B站Cyber蝈蝈总)\n"
93
  )
94
  with gr.Tabs():
95
+ for (name, cover, svc_fn, tts_fn) in models:
96
  with gr.TabItem(name):
97
  with gr.Row():
98
  with gr.Column():
99
+ mode = gr.Radio(label='模式', value='音频转音频', choices=['文字转音频', '音频转音频'])
100
+ if mode == '音频转音频':
101
+ svc_input = gr.Audio(label="上传干声 (已支持无限长音频,处理时间约为原音频时间的5倍)")
102
+ vc_transform = gr.Number(label="音高调整 (支持正负半音,12为一个八度)", value=0)
103
+ auto_f0 = gr.Checkbox(label="自动音高预测 (正常说话可选)", value=False)
104
+ f0_predictor = gr.Radio(label="f0预测器 (对电音有影响)",
105
+ choices=['crepe', 'harvest', 'dio', 'pm'], value='crepe')
106
+ vc_submit = gr.Button("生成", variant="primary")
107
+ else:
108
+ text_input = gr.Textbox(label='说话内容', value='',
109
+ placeholder='请输入说话内容,(已支持无限长内容,处理时间约为说完原内容时间的5倍)')
110
+ gender = gr.Radio(label='说话人性别', value='男', choices=['男', '女'])
111
+ tts_rate = gr.Number(label='语速(正负百分比)', value=0)
112
+ vc_transform = gr.Number(label="音高调整 (支持正负半音,12为一个八度)", value=0)
113
+ auto_f0 = gr.Checkbox(label="自动音高预测 (正常说话可选)", value=False)
114
+ f0_predictor = gr.Radio(label="f0预测器 (对电音有影响)",
115
+ choices=['crepe', 'harvest', 'dio', 'pm'], value='crepe')
116
+ tts_submit = gr.Button("生成", variant="primary")
117
 
118
  with gr.Column():
119
  gr.Markdown(
 
122
  '</div>'
123
  )
124
  vc_output = gr.Audio(label="输出音频")
125
+ vc_submit.click(svc_fn, [svc_input, vc_transform, auto_f0, f0_predictor], vc_output)
126
+ tts_submit.click(tts_fn, [text_input, gender, tts_rate, vc_transform, auto_f0, f0_predictor], vc_output)
127
  app.queue(concurrency_count=1, api_open=args.api).launch(share=args.share)