Vladimir Alabov commited on
Commit
01a3172
·
1 Parent(s): ed3e5df

Fix vc_fn #3

Browse files
app.py CHANGED
@@ -15,6 +15,8 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
15
 
16
  limitation = os.getenv("SYSTEM") == "spaces" # limit audio length in huggingface spaces
17
 
 
 
18
  audio_postprocess_ori = gr.Audio.postprocess
19
 
20
  def audio_postprocess(self, y):
@@ -44,23 +46,49 @@ def vc_fn(input_audio, vc_transform, voice):
44
  )
45
  return "Success", (44100, out_audio.cpu().numpy())
46
 
47
- def get_speakers():
48
- speakers = []
49
 
50
- for _,dirs,_ in os.walk("/models"):
51
- for folder in dirs:
52
- cur_speaker = {}
53
- # Look for G_****.pth
54
- g = glob.glob(os.path.join("/models",folder,'G_*.pth'))
55
- if not len(g):
56
- continue
57
- cur_speaker["model_path"] = g[0]
58
- cur_speaker["model_folder"] = folder
59
- cur_speaker["name"] = folder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- speakers.append(copy.copy(cur_speaker))
 
 
62
 
63
- return sorted(speakers, key=lambda x:x["name"].lower())
 
 
 
 
 
 
 
64
 
65
  if __name__ == '__main__':
66
  parser = argparse.ArgumentParser()
@@ -69,8 +97,7 @@ if __name__ == '__main__':
69
  parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
70
  args = parser.parse_args()
71
 
72
- speakers = get_speakers()
73
- speaker_list = [x["name"] for x in speakers]
74
 
75
  models = []
76
  voices = []
@@ -80,20 +107,16 @@ if __name__ == '__main__':
80
  with gr.Blocks() as app:
81
  gr.Markdown(
82
  "# <center> Sovits Chapay\n"
83
- "## <center> The input audio should be clean and pure voice without background music.\n"
84
  )
85
 
86
  with gr.Row():
87
  with gr.Column():
88
  vc_input = gr.Audio(label="Input audio"+' (less than 20 seconds)' if limitation else '')
89
-
90
- vc_transform = gr.Number(label="vc_transform", value=0)
91
-
92
- voice = gr.Dropdown(choices=speaker_list, visible=True)
93
 
94
  vc_submit = gr.Button("Generate", variant="primary")
95
  with gr.Column():
96
  vc_output1 = gr.Textbox(label="Output Message")
97
- vc_output2 = gr.Audio(label="Output Audio")
98
- vc_submit.click(vc_fn, [vc_input, vc_transform, voice], [vc_output1, vc_output2])
99
  app.queue(concurrency_count=1, api_open=args.api).launch(share=args.share)
 
15
 
16
  limitation = os.getenv("SYSTEM") == "spaces" # limit audio length in huggingface spaces
17
 
18
+ INFERENCE_OUTPUT_DIRNAME = '/output/'
19
+
20
  audio_postprocess_ori = gr.Audio.postprocess
21
 
22
  def audio_postprocess(self, y):
 
46
  )
47
  return "Success", (44100, out_audio.cpu().numpy())
48
 
 
 
49
 
50
+ def run_inference(input_audio, speaker):
51
+ if input_audio is None:
52
+ return "You need to upload an audio", None
53
+ sampling_rate, audio = input_audio
54
+ duration = audio.shape[0] / sampling_rate
55
+ if duration > 20 and limitation:
56
+ return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
57
+ audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
58
+ if len(audio.shape) > 1:
59
+ audio = librosa.to_mono(audio.transpose(1, 0))
60
+ if sampling_rate != 16000:
61
+ audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
62
+
63
+ #TODO edit from GUI
64
+ cluster_ratio = 1
65
+ noise_scale = 2
66
+ is_pitch_prediction_enabled = True
67
+ f0_method = "dio"
68
+ transpose = 0
69
+
70
+ model_path = f"/models/{speaker}/{speaker}.pth"
71
+ config_path = f"/models/{speaker}/config.json"
72
+ cluster_path = ""
73
+
74
+ raw_path = io.BytesIO()
75
+ soundfile.write(raw_path, audio, 16000, format="wav")
76
+ raw_path.seek(0)
77
+
78
+ inference_cmd = f"svc infer {raw_path.absolute()} -m {model_path} -c {config_path} {f'-k {cluster_path} -r {cluster_ratio}' if cluster_path != '' and cluster_ratio > 0 else ''} -t {transpose} --f0-method {f0_method} -n {noise_scale} -o {INFERENCE_OUTPUT_DIRNAME}/{raw_path.name} {'' if is_pitch_prediction_enabled else '--no-auto-predict-f0'}"
79
 
80
+ # out_audio, out_sr = model.infer(sid, vc_transform, raw_path,
81
+ # auto_predict_f0=True,
82
+ # )
83
 
84
+ result = subprocess.run(
85
+ inference_cmd.split(),
86
+ stdout=subprocess.PIPE,
87
+ stderr=subprocess.STDOUT,
88
+ text=True
89
+ )
90
+ print(result)
91
+ return "Success", "TODO" # (44100, out_audio.cpu().numpy())
92
 
93
  if __name__ == '__main__':
94
  parser = argparse.ArgumentParser()
 
97
  parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
98
  args = parser.parse_args()
99
 
100
+ speakers = ["chapaev", "petka", "anka"]
 
101
 
102
  models = []
103
  voices = []
 
107
  with gr.Blocks() as app:
108
  gr.Markdown(
109
  "# <center> Sovits Chapay\n"
 
110
  )
111
 
112
  with gr.Row():
113
  with gr.Column():
114
  vc_input = gr.Audio(label="Input audio"+' (less than 20 seconds)' if limitation else '')
115
+ speaker = gr.Dropdown(choices=speakers, visible=True)
 
 
 
116
 
117
  vc_submit = gr.Button("Generate", variant="primary")
118
  with gr.Column():
119
  vc_output1 = gr.Textbox(label="Output Message")
120
+ # vc_output2 = gr.Audio(label="Output Audio")
121
+ vc_submit.click(run_inference, [vc_input, speaker], [vc_output1, vc_output2])
122
  app.queue(concurrency_count=1, api_open=args.api).launch(share=args.share)
models/chapaev/{G_5400.pth → chapaev.pth} RENAMED
File without changes