Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,276 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Prediction interface for Cog ⚙️
|
2 |
+
# https://github.com/replicate/cog/blob/main/docs/python.md
|
3 |
+
|
4 |
+
import os
|
5 |
+
import sys
|
6 |
+
import shutil
|
7 |
+
import zipfile
|
8 |
+
import urllib.request
|
9 |
+
from argparse import Namespace
|
10 |
+
from cog import BasePredictor, Input, Path as CogPath
|
11 |
+
|
12 |
+
sys.path.insert(0, os.path.abspath("src"))
|
13 |
+
|
14 |
+
import main as m
|
15 |
+
|
16 |
+
|
17 |
+
def download_online_model(url, dir_name):
|
18 |
+
print(f"[~] Downloading voice model with name {dir_name}...")
|
19 |
+
zip_name = url.split("/")[-1]
|
20 |
+
extraction_folder = os.path.join(m.rvc_models_dir, dir_name)
|
21 |
+
if os.path.exists(extraction_folder):
|
22 |
+
print(f"Voice model directory {dir_name} already exists! Skipping download.")
|
23 |
+
return
|
24 |
+
|
25 |
+
if "pixeldrain.com" in url:
|
26 |
+
url = f"https://pixeldrain.com/api/file/{zip_name}"
|
27 |
+
|
28 |
+
urllib.request.urlretrieve(url, zip_name)
|
29 |
+
|
30 |
+
print("[~] Extracting zip...")
|
31 |
+
with zipfile.ZipFile(zip_name, "r") as zip_ref:
|
32 |
+
for member in zip_ref.infolist():
|
33 |
+
# skip directories
|
34 |
+
if member.is_dir():
|
35 |
+
continue
|
36 |
+
|
37 |
+
# create target directory if it does not exist
|
38 |
+
os.makedirs(extraction_folder, exist_ok=True)
|
39 |
+
|
40 |
+
# extract only files directly to extraction_folder
|
41 |
+
with zip_ref.open(member) as source, open(
|
42 |
+
os.path.join(extraction_folder, os.path.basename(member.filename)), "wb"
|
43 |
+
) as target:
|
44 |
+
shutil.copyfileobj(source, target)
|
45 |
+
print(f"[+] {dir_name} Model successfully downloaded!")
|
46 |
+
|
47 |
+
|
48 |
+
class Predictor(BasePredictor):
|
49 |
+
def setup(self) -> None:
|
50 |
+
"""Load the model into memory to make running multiple predictions efficient"""
|
51 |
+
pass
|
52 |
+
|
53 |
+
def predict(
|
54 |
+
self,
|
55 |
+
song_input: CogPath = Input(
|
56 |
+
description="Upload your audio file here.",
|
57 |
+
default=None,
|
58 |
+
),
|
59 |
+
rvc_model: str = Input(
|
60 |
+
description="RVC model for a specific voice. If using a custom model, this should match the name of the downloaded model. If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.",
|
61 |
+
default="Squidward",
|
62 |
+
choices=[
|
63 |
+
"Squidward",
|
64 |
+
"MrKrabs",
|
65 |
+
"Plankton",
|
66 |
+
"Drake",
|
67 |
+
"Vader",
|
68 |
+
"Trump",
|
69 |
+
"Biden",
|
70 |
+
"Obama",
|
71 |
+
"Guitar",
|
72 |
+
"Voilin",
|
73 |
+
"CUSTOM",
|
74 |
+
"SamA", # TODO REMOVE THIS
|
75 |
+
],
|
76 |
+
),
|
77 |
+
custom_rvc_model_download_url: str = Input(
|
78 |
+
description="URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value.",
|
79 |
+
default=None,
|
80 |
+
),
|
81 |
+
pitch_change: str = Input(
|
82 |
+
description="Adjust pitch of AI vocals. Options: `no-change`, `male-to-female`, `female-to-male`.",
|
83 |
+
default="no-change",
|
84 |
+
choices=["no-change", "male-to-female", "female-to-male"],
|
85 |
+
),
|
86 |
+
index_rate: float = Input(
|
87 |
+
description="Control how much of the AI's accent to leave in the vocals.",
|
88 |
+
default=0.5,
|
89 |
+
ge=0,
|
90 |
+
le=1,
|
91 |
+
),
|
92 |
+
filter_radius: int = Input(
|
93 |
+
description="If >=3: apply median filtering median filtering to the harvested pitch results.",
|
94 |
+
default=3,
|
95 |
+
ge=0,
|
96 |
+
le=7,
|
97 |
+
),
|
98 |
+
rms_mix_rate: float = Input(
|
99 |
+
description="Control how much to use the original vocal's loudness (0) or a fixed loudness (1).",
|
100 |
+
default=0.25,
|
101 |
+
ge=0,
|
102 |
+
le=1,
|
103 |
+
),
|
104 |
+
pitch_detection_algorithm: str = Input(
|
105 |
+
description="Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals).",
|
106 |
+
default="rmvpe",
|
107 |
+
choices=["rmvpe", "mangio-crepe"],
|
108 |
+
),
|
109 |
+
crepe_hop_length: int = Input(
|
110 |
+
description="When `pitch_detection_algo` is set to `mangio-crepe`, this controls how often it checks for pitch changes in milliseconds. Lower values lead to longer conversions and higher risk of voice cracks, but better pitch accuracy.",
|
111 |
+
default=128,
|
112 |
+
),
|
113 |
+
protect: float = Input(
|
114 |
+
description="Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable.",
|
115 |
+
default=0.33,
|
116 |
+
ge=0,
|
117 |
+
le=0.5,
|
118 |
+
),
|
119 |
+
main_vocals_volume_change: float = Input(
|
120 |
+
description="Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels.",
|
121 |
+
default=0,
|
122 |
+
),
|
123 |
+
backup_vocals_volume_change: float = Input(
|
124 |
+
description="Control volume of backup AI vocals.",
|
125 |
+
default=0,
|
126 |
+
),
|
127 |
+
instrumental_volume_change: float = Input(
|
128 |
+
description="Control volume of the background music/instrumentals.",
|
129 |
+
default=0,
|
130 |
+
),
|
131 |
+
pitch_change_all: float = Input(
|
132 |
+
description="Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly.",
|
133 |
+
default=0,
|
134 |
+
),
|
135 |
+
reverb_size: float = Input(
|
136 |
+
description="The larger the room, the longer the reverb time.",
|
137 |
+
default=0.15,
|
138 |
+
ge=0,
|
139 |
+
le=1,
|
140 |
+
),
|
141 |
+
reverb_wetness: float = Input(
|
142 |
+
description="Level of AI vocals with reverb.",
|
143 |
+
default=0.2,
|
144 |
+
ge=0,
|
145 |
+
le=1,
|
146 |
+
),
|
147 |
+
reverb_dryness: float = Input(
|
148 |
+
description="Level of AI vocals without reverb.",
|
149 |
+
default=0.8,
|
150 |
+
ge=0,
|
151 |
+
le=1,
|
152 |
+
),
|
153 |
+
reverb_damping: float = Input(
|
154 |
+
description="Absorption of high frequencies in the reverb.",
|
155 |
+
default=0.7,
|
156 |
+
ge=0,
|
157 |
+
le=1,
|
158 |
+
),
|
159 |
+
output_format: str = Input(
|
160 |
+
description="wav for best quality and large file size, mp3 for decent quality and small file size.",
|
161 |
+
default="mp3",
|
162 |
+
choices=["mp3", "wav"],
|
163 |
+
),
|
164 |
+
) -> CogPath:
|
165 |
+
"""
|
166 |
+
Runs a single prediction on the model.
|
167 |
+
|
168 |
+
Required Parameters:
|
169 |
+
song_input (CogPath): Upload your audio file here.
|
170 |
+
rvc_model (str): RVC model for a specific voice. Default is "Squidward". If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.
|
171 |
+
pitch_change (float): Change pitch of AI vocals in octaves. Set to 0 for no change. Generally, use 1 for male to female conversions and -1 for vice-versa.
|
172 |
+
|
173 |
+
Optional Parameters:
|
174 |
+
custom_rvc_model_download_url (str): URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value. Defaults to None.
|
175 |
+
index_rate (float): Control how much of the AI's accent to leave in the vocals. 0 <= INDEX_RATE <= 1. Defaults to 0.5.
|
176 |
+
filter_radius (int): If >=3: apply median filtering median filtering to the harvested pitch results. 0 <= FILTER_RADIUS <= 7. Defaults to 3.
|
177 |
+
rms_mix_rate (float): Control how much to use the original vocal's loudness (0) or a fixed loudness (1). 0 <= RMS_MIX_RATE <= 1. Defaults to 0.25.
|
178 |
+
pitch_detection_algorithm (str): Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals). Defaults to "rmvpe".
|
179 |
+
crepe_hop_length (int): Controls how often it checks for pitch changes in milliseconds when using mangio-crepe algo specifically. Lower values leads to longer conversions and higher risk of voice cracks, but better pitch accuracy. Defaults to 128.
|
180 |
+
protect (float): Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable. 0 <= PROTECT <= 0.5. Defaults to 0.33.
|
181 |
+
main_vocals_volume_change (float): Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels. Defaults to 0.
|
182 |
+
backup_vocals_volume_change (float): Control volume of backup AI vocals. Defaults to 0.
|
183 |
+
instrumental_volume_change (float): Control volume of the background music/instrumentals. Defaults to 0.
|
184 |
+
pitch_change_all (float): Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly. Defaults to 0.
|
185 |
+
reverb_size (float): The larger the room, the longer the reverb time. 0 <= REVERB_SIZE <= 1. Defaults to 0.15.
|
186 |
+
reverb_wetness (float): Level of AI vocals with reverb. 0 <= REVERB_WETNESS <= 1. Defaults to 0.2.
|
187 |
+
reverb_dryness (float): Level of AI vocals without reverb. 0 <= REVERB_DRYNESS <= 1. Defaults to 0.8.
|
188 |
+
reverb_damping (float): Absorption of high frequencies in the reverb. 0 <= REVERB_DAMPING <= 1. Defaults to 0.7.
|
189 |
+
output_format (str): wav for best quality and large file size, mp3 for decent quality and small file size. Defaults to "mp3".
|
190 |
+
|
191 |
+
Returns:
|
192 |
+
CogPath: The output path of the generated audio file.
|
193 |
+
"""
|
194 |
+
|
195 |
+
if custom_rvc_model_download_url:
|
196 |
+
custom_rvc_model_download_name = urllib.parse.unquote(
|
197 |
+
custom_rvc_model_download_url.split("/")[-1]
|
198 |
+
)
|
199 |
+
custom_rvc_model_download_name = os.path.splitext(
|
200 |
+
custom_rvc_model_download_name
|
201 |
+
)[0]
|
202 |
+
print(
|
203 |
+
f"[!] The model will be downloaded as '{custom_rvc_model_download_name}'."
|
204 |
+
)
|
205 |
+
download_online_model(
|
206 |
+
url=custom_rvc_model_download_url,
|
207 |
+
dir_name=custom_rvc_model_download_name,
|
208 |
+
)
|
209 |
+
rvc_model = custom_rvc_model_download_name
|
210 |
+
else:
|
211 |
+
print(
|
212 |
+
"[!] Since URL was provided, we will try to download the model and use it (even if `rvc_model` is not set to 'CUSTOM')."
|
213 |
+
)
|
214 |
+
|
215 |
+
# Convert pitch_change from string to numerical value for processing
|
216 |
+
# 0 for no change, 1 for male to female, -1 for female to male
|
217 |
+
if pitch_change == "no-change":
|
218 |
+
pitch_change = 0
|
219 |
+
elif pitch_change == "male-to-female":
|
220 |
+
pitch_change = 1
|
221 |
+
else: # pitch_change == "female-to-male"
|
222 |
+
pitch_change = -1
|
223 |
+
|
224 |
+
args = Namespace(
|
225 |
+
song_input=str(song_input),
|
226 |
+
rvc_dirname=(model_dir_name := rvc_model),
|
227 |
+
pitch_change=pitch_change,
|
228 |
+
keep_files=(keep_files := False),
|
229 |
+
index_rate=index_rate,
|
230 |
+
filter_radius=filter_radius,
|
231 |
+
rms_mix_rate=rms_mix_rate,
|
232 |
+
pitch_detection_algo=pitch_detection_algorithm,
|
233 |
+
crepe_hop_length=crepe_hop_length,
|
234 |
+
protect=protect,
|
235 |
+
main_vol=main_vocals_volume_change,
|
236 |
+
backup_vol=backup_vocals_volume_change,
|
237 |
+
inst_vol=instrumental_volume_change,
|
238 |
+
pitch_change_all=pitch_change_all,
|
239 |
+
reverb_size=reverb_size,
|
240 |
+
reverb_wetness=reverb_wetness,
|
241 |
+
reverb_dryness=reverb_dryness,
|
242 |
+
reverb_damping=reverb_damping,
|
243 |
+
output_format=output_format,
|
244 |
+
)
|
245 |
+
|
246 |
+
rvc_dirname = args.rvc_dirname
|
247 |
+
if not os.path.exists(os.path.join(m.rvc_models_dir, rvc_dirname)):
|
248 |
+
raise Exception(
|
249 |
+
f"The folder {os.path.join(m.rvc_models_dir, rvc_dirname)} does not exist."
|
250 |
+
)
|
251 |
+
|
252 |
+
cover_path = m.song_cover_pipeline(
|
253 |
+
args.song_input,
|
254 |
+
rvc_dirname,
|
255 |
+
args.pitch_change,
|
256 |
+
args.keep_files,
|
257 |
+
main_gain=args.main_vol,
|
258 |
+
backup_gain=args.backup_vol,
|
259 |
+
inst_gain=args.inst_vol,
|
260 |
+
index_rate=args.index_rate,
|
261 |
+
filter_radius=args.filter_radius,
|
262 |
+
rms_mix_rate=args.rms_mix_rate,
|
263 |
+
f0_method=args.pitch_detection_algo,
|
264 |
+
crepe_hop_length=args.crepe_hop_length,
|
265 |
+
protect=args.protect,
|
266 |
+
pitch_change_all=args.pitch_change_all,
|
267 |
+
reverb_rm_size=args.reverb_size,
|
268 |
+
reverb_wet=args.reverb_wetness,
|
269 |
+
reverb_dry=args.reverb_dryness,
|
270 |
+
reverb_damping=args.reverb_damping,
|
271 |
+
output_format=args.output_format,
|
272 |
+
)
|
273 |
+
print(f"[+] Cover generated at {cover_path}")
|
274 |
+
|
275 |
+
# Return the output path
|
276 |
+
return CogPath(cover_path)
|