None1145 commited on
Commit
d090336
·
verified ·
1 Parent(s): e723ecd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -29
app.py CHANGED
@@ -1,6 +1,3 @@
1
- model = None
2
- sid = ""
3
-
4
  import io
5
  import gradio as gr
6
  import librosa
@@ -31,6 +28,10 @@ for model_id in models_id.split("\n"):
31
  snapshot_download(repo_id=model_id, local_dir=f"./Models/{model_id}")
32
  print(f"{model_id}!!!")
33
  print("Models!!!")
 
 
 
 
34
  list_files_tree("./")
35
 
36
  import re
@@ -68,17 +69,10 @@ print(models_info)
68
  print(speakers)
69
 
70
  def load(speaker):
71
- global sid
72
- global model
73
- sid = speaker
74
- model = Svc(models_info[sid]["model"], models_info[sid]["config"], cluster_model_path=models_info[sid]["cluster"], feature_retrieval=models_info[sid]["feature_retrieval"])
75
- return "Model loaded successfully", sid
76
- sid = speakers[0]
77
- # load(sid)
78
 
79
- def vc_fn(input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale):
80
- global sid
81
- load(sid)
82
  if input_audio is None:
83
  return "You need to upload an audio", None
84
  sampling_rate, audio = input_audio
@@ -98,20 +92,21 @@ def vc_fn(input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scal
98
  app = gr.Blocks()
99
  with app:
100
  with gr.Tabs():
101
- with gr.TabItem("Inference"):
102
- speaker = gr.Dropdown(label="Speaker", choices=speakers, value=speakers[0])
103
- model_submit = gr.Button("Load Model", variant="primary")
104
- model_output1 = gr.Textbox(label="Output Message")
105
- model_output2 = gr.Textbox(label="Speaker", value=sid)
106
- vc_input3 = gr.Audio(label="Upload Audio")
107
- vc_transform = gr.Number(label="Pitch Shift (integer, can be positive or negative, number of semitones, raising an octave is +12)", value=0)
108
- cluster_ratio = gr.Number(label="Cluster Model Mixing Ratio (0-1): Defaults to 0 (clustering disabled). Improves timbre similarity but may reduce articulation clarity. Recommended value: ~0.5 if used", value=0)
109
- auto_f0 = gr.Checkbox(label="Auto f0 Prediction: Works better with the cluster model for f0 prediction but disables the pitch shift feature. (For voice conversion only; do not enable this for singing voices, as it will result in extreme off-pitch issues)", value=False)
110
- slice_db = gr.Number(label="Slicing Threshold", value=-40)
111
- noise_scale = gr.Number(label="noise_scale", value=0.4)
112
- vc_submit = gr.Button("Convert", variant="primary")
113
- vc_output1 = gr.Textbox(label="Output Message")
114
- vc_output2 = gr.Audio(label="Output Audio")
115
- model_submit.click(load, [speaker], [model_output1, model_output2])
116
- vc_submit.click(vc_fn, [vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale], [vc_output1, vc_output2])
 
117
  app.launch()
 
 
 
 
1
  import io
2
  import gradio as gr
3
  import librosa
 
28
  snapshot_download(repo_id=model_id, local_dir=f"./Models/{model_id}")
29
  print(f"{model_id}!!!")
30
  print("Models!!!")
31
+ print("PretrainedModels...")
32
+ base_model_id = "None1145/So-VITS-SVC-Base"
33
+ snapshot_download(repo_id=base_model_id, local_dir=f"./PretrainedModels/{base_model_id}")
34
+ print("PretrainedModels!!!")
35
  list_files_tree("./")
36
 
37
  import re
 
69
  print(speakers)
70
 
71
  def load(speaker):
72
+ return Svc(models_info[speaker]["model"], models_info[speaker]["config"], cluster_model_path=models_info[speaker]["cluster"], feature_retrieval=models_info[speaker]["feature_retrieval"])
 
 
 
 
 
 
73
 
74
+ def vc_fn(speaker, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale):
75
+ model = load(speaker)
 
76
  if input_audio is None:
77
  return "You need to upload an audio", None
78
  sampling_rate, audio = input_audio
 
92
  app = gr.Blocks()
93
  with app:
94
  with gr.Tabs():
95
+ for speaker in speakers:
96
+ with gr.TabItem(speaker):
97
+ with gr.Row():
98
+ gr.Markdown(
99
+ '<div align="center">'
100
+ f'<a><strong>{speaker}</strong></a>'
101
+ '</div>')
102
+ vc_input3 = gr.Audio(label="Upload Audio")
103
+ vc_transform = gr.Number(label="Pitch Shift (integer, can be positive or negative, number of semitones, raising an octave is +12)", value=0)
104
+ cluster_ratio = gr.Number(label="Cluster Model Mixing Ratio (0-1): Defaults to 0 (clustering disabled). Improves timbre similarity but may reduce articulation clarity. Recommended value: ~0.5 if used", value=0)
105
+ auto_f0 = gr.Checkbox(label="Auto f0 Prediction: Works better with the cluster model for f0 prediction but disables the pitch shift feature. (For voice conversion only; do not enable this for singing voices, as it will result in extreme off-pitch issues)", value=False)
106
+ slice_db = gr.Number(label="Slicing Threshold", value=-40)
107
+ noise_scale = gr.Number(label="noise_scale", value=0.4)
108
+ vc_submit = gr.Button("Convert", variant="primary")
109
+ vc_output1 = gr.Textbox(label="Output Message")
110
+ vc_output2 = gr.Audio(label="Output Audio")
111
+ vc_submit.click(vc_fn, [speaker, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale], [vc_output1, vc_output2])
112
  app.launch()