Seqath commited on
Commit
5891805
·
verified ·
1 Parent(s): b4739bb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +276 -0
app.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Prediction interface for Cog ⚙️
2
+ # https://github.com/replicate/cog/blob/main/docs/python.md
3
+
4
+ import os
5
+ import sys
6
+ import shutil
7
+ import zipfile
8
+ import urllib.request
9
+ from argparse import Namespace
10
+ from cog import BasePredictor, Input, Path as CogPath
11
+
12
+ sys.path.insert(0, os.path.abspath("src"))
13
+
14
+ import main as m
15
+
16
+
17
+ def download_online_model(url, dir_name):
18
+ print(f"[~] Downloading voice model with name {dir_name}...")
19
+ zip_name = url.split("/")[-1]
20
+ extraction_folder = os.path.join(m.rvc_models_dir, dir_name)
21
+ if os.path.exists(extraction_folder):
22
+ print(f"Voice model directory {dir_name} already exists! Skipping download.")
23
+ return
24
+
25
+ if "pixeldrain.com" in url:
26
+ url = f"https://pixeldrain.com/api/file/{zip_name}"
27
+
28
+ urllib.request.urlretrieve(url, zip_name)
29
+
30
+ print("[~] Extracting zip...")
31
+ with zipfile.ZipFile(zip_name, "r") as zip_ref:
32
+ for member in zip_ref.infolist():
33
+ # skip directories
34
+ if member.is_dir():
35
+ continue
36
+
37
+ # create target directory if it does not exist
38
+ os.makedirs(extraction_folder, exist_ok=True)
39
+
40
+ # extract only files directly to extraction_folder
41
+ with zip_ref.open(member) as source, open(
42
+ os.path.join(extraction_folder, os.path.basename(member.filename)), "wb"
43
+ ) as target:
44
+ shutil.copyfileobj(source, target)
45
+ print(f"[+] {dir_name} Model successfully downloaded!")
46
+
47
+
48
+ class Predictor(BasePredictor):
49
+ def setup(self) -> None:
50
+ """Load the model into memory to make running multiple predictions efficient"""
51
+ pass
52
+
53
+ def predict(
54
+ self,
55
+ song_input: CogPath = Input(
56
+ description="Upload your audio file here.",
57
+ default=None,
58
+ ),
59
+ rvc_model: str = Input(
60
+ description="RVC model for a specific voice. If using a custom model, this should match the name of the downloaded model. If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.",
61
+ default="Squidward",
62
+ choices=[
63
+ "Squidward",
64
+ "MrKrabs",
65
+ "Plankton",
66
+ "Drake",
67
+ "Vader",
68
+ "Trump",
69
+ "Biden",
70
+ "Obama",
71
+ "Guitar",
72
+ "Voilin",
73
+ "CUSTOM",
74
+ "SamA", # TODO REMOVE THIS
75
+ ],
76
+ ),
77
+ custom_rvc_model_download_url: str = Input(
78
+ description="URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value.",
79
+ default=None,
80
+ ),
81
+ pitch_change: str = Input(
82
+ description="Adjust pitch of AI vocals. Options: `no-change`, `male-to-female`, `female-to-male`.",
83
+ default="no-change",
84
+ choices=["no-change", "male-to-female", "female-to-male"],
85
+ ),
86
+ index_rate: float = Input(
87
+ description="Control how much of the AI's accent to leave in the vocals.",
88
+ default=0.5,
89
+ ge=0,
90
+ le=1,
91
+ ),
92
+ filter_radius: int = Input(
93
+ description="If >=3: apply median filtering median filtering to the harvested pitch results.",
94
+ default=3,
95
+ ge=0,
96
+ le=7,
97
+ ),
98
+ rms_mix_rate: float = Input(
99
+ description="Control how much to use the original vocal's loudness (0) or a fixed loudness (1).",
100
+ default=0.25,
101
+ ge=0,
102
+ le=1,
103
+ ),
104
+ pitch_detection_algorithm: str = Input(
105
+ description="Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals).",
106
+ default="rmvpe",
107
+ choices=["rmvpe", "mangio-crepe"],
108
+ ),
109
+ crepe_hop_length: int = Input(
110
+ description="When `pitch_detection_algo` is set to `mangio-crepe`, this controls how often it checks for pitch changes in milliseconds. Lower values lead to longer conversions and higher risk of voice cracks, but better pitch accuracy.",
111
+ default=128,
112
+ ),
113
+ protect: float = Input(
114
+ description="Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable.",
115
+ default=0.33,
116
+ ge=0,
117
+ le=0.5,
118
+ ),
119
+ main_vocals_volume_change: float = Input(
120
+ description="Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels.",
121
+ default=0,
122
+ ),
123
+ backup_vocals_volume_change: float = Input(
124
+ description="Control volume of backup AI vocals.",
125
+ default=0,
126
+ ),
127
+ instrumental_volume_change: float = Input(
128
+ description="Control volume of the background music/instrumentals.",
129
+ default=0,
130
+ ),
131
+ pitch_change_all: float = Input(
132
+ description="Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly.",
133
+ default=0,
134
+ ),
135
+ reverb_size: float = Input(
136
+ description="The larger the room, the longer the reverb time.",
137
+ default=0.15,
138
+ ge=0,
139
+ le=1,
140
+ ),
141
+ reverb_wetness: float = Input(
142
+ description="Level of AI vocals with reverb.",
143
+ default=0.2,
144
+ ge=0,
145
+ le=1,
146
+ ),
147
+ reverb_dryness: float = Input(
148
+ description="Level of AI vocals without reverb.",
149
+ default=0.8,
150
+ ge=0,
151
+ le=1,
152
+ ),
153
+ reverb_damping: float = Input(
154
+ description="Absorption of high frequencies in the reverb.",
155
+ default=0.7,
156
+ ge=0,
157
+ le=1,
158
+ ),
159
+ output_format: str = Input(
160
+ description="wav for best quality and large file size, mp3 for decent quality and small file size.",
161
+ default="mp3",
162
+ choices=["mp3", "wav"],
163
+ ),
164
+ ) -> CogPath:
165
+ """
166
+ Runs a single prediction on the model.
167
+
168
+ Required Parameters:
169
+ song_input (CogPath): Upload your audio file here.
170
+ rvc_model (str): RVC model for a specific voice. Default is "Squidward". If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.
171
+ pitch_change (float): Change pitch of AI vocals in octaves. Set to 0 for no change. Generally, use 1 for male to female conversions and -1 for vice-versa.
172
+
173
+ Optional Parameters:
174
+ custom_rvc_model_download_url (str): URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value. Defaults to None.
175
+ index_rate (float): Control how much of the AI's accent to leave in the vocals. 0 <= INDEX_RATE <= 1. Defaults to 0.5.
176
+ filter_radius (int): If >=3: apply median filtering median filtering to the harvested pitch results. 0 <= FILTER_RADIUS <= 7. Defaults to 3.
177
+ rms_mix_rate (float): Control how much to use the original vocal's loudness (0) or a fixed loudness (1). 0 <= RMS_MIX_RATE <= 1. Defaults to 0.25.
178
+ pitch_detection_algorithm (str): Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals). Defaults to "rmvpe".
179
+ crepe_hop_length (int): Controls how often it checks for pitch changes in milliseconds when using mangio-crepe algo specifically. Lower values leads to longer conversions and higher risk of voice cracks, but better pitch accuracy. Defaults to 128.
180
+ protect (float): Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable. 0 <= PROTECT <= 0.5. Defaults to 0.33.
181
+ main_vocals_volume_change (float): Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels. Defaults to 0.
182
+ backup_vocals_volume_change (float): Control volume of backup AI vocals. Defaults to 0.
183
+ instrumental_volume_change (float): Control volume of the background music/instrumentals. Defaults to 0.
184
+ pitch_change_all (float): Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly. Defaults to 0.
185
+ reverb_size (float): The larger the room, the longer the reverb time. 0 <= REVERB_SIZE <= 1. Defaults to 0.15.
186
+ reverb_wetness (float): Level of AI vocals with reverb. 0 <= REVERB_WETNESS <= 1. Defaults to 0.2.
187
+ reverb_dryness (float): Level of AI vocals without reverb. 0 <= REVERB_DRYNESS <= 1. Defaults to 0.8.
188
+ reverb_damping (float): Absorption of high frequencies in the reverb. 0 <= REVERB_DAMPING <= 1. Defaults to 0.7.
189
+ output_format (str): wav for best quality and large file size, mp3 for decent quality and small file size. Defaults to "mp3".
190
+
191
+ Returns:
192
+ CogPath: The output path of the generated audio file.
193
+ """
194
+
195
+ if custom_rvc_model_download_url:
196
+ custom_rvc_model_download_name = urllib.parse.unquote(
197
+ custom_rvc_model_download_url.split("/")[-1]
198
+ )
199
+ custom_rvc_model_download_name = os.path.splitext(
200
+ custom_rvc_model_download_name
201
+ )[0]
202
+ print(
203
+ f"[!] The model will be downloaded as '{custom_rvc_model_download_name}'."
204
+ )
205
+ download_online_model(
206
+ url=custom_rvc_model_download_url,
207
+ dir_name=custom_rvc_model_download_name,
208
+ )
209
+ rvc_model = custom_rvc_model_download_name
210
+ else:
211
+ print(
212
+ "[!] Since URL was provided, we will try to download the model and use it (even if `rvc_model` is not set to 'CUSTOM')."
213
+ )
214
+
215
+ # Convert pitch_change from string to numerical value for processing
216
+ # 0 for no change, 1 for male to female, -1 for female to male
217
+ if pitch_change == "no-change":
218
+ pitch_change = 0
219
+ elif pitch_change == "male-to-female":
220
+ pitch_change = 1
221
+ else: # pitch_change == "female-to-male"
222
+ pitch_change = -1
223
+
224
+ args = Namespace(
225
+ song_input=str(song_input),
226
+ rvc_dirname=(model_dir_name := rvc_model),
227
+ pitch_change=pitch_change,
228
+ keep_files=(keep_files := False),
229
+ index_rate=index_rate,
230
+ filter_radius=filter_radius,
231
+ rms_mix_rate=rms_mix_rate,
232
+ pitch_detection_algo=pitch_detection_algorithm,
233
+ crepe_hop_length=crepe_hop_length,
234
+ protect=protect,
235
+ main_vol=main_vocals_volume_change,
236
+ backup_vol=backup_vocals_volume_change,
237
+ inst_vol=instrumental_volume_change,
238
+ pitch_change_all=pitch_change_all,
239
+ reverb_size=reverb_size,
240
+ reverb_wetness=reverb_wetness,
241
+ reverb_dryness=reverb_dryness,
242
+ reverb_damping=reverb_damping,
243
+ output_format=output_format,
244
+ )
245
+
246
+ rvc_dirname = args.rvc_dirname
247
+ if not os.path.exists(os.path.join(m.rvc_models_dir, rvc_dirname)):
248
+ raise Exception(
249
+ f"The folder {os.path.join(m.rvc_models_dir, rvc_dirname)} does not exist."
250
+ )
251
+
252
+ cover_path = m.song_cover_pipeline(
253
+ args.song_input,
254
+ rvc_dirname,
255
+ args.pitch_change,
256
+ args.keep_files,
257
+ main_gain=args.main_vol,
258
+ backup_gain=args.backup_vol,
259
+ inst_gain=args.inst_vol,
260
+ index_rate=args.index_rate,
261
+ filter_radius=args.filter_radius,
262
+ rms_mix_rate=args.rms_mix_rate,
263
+ f0_method=args.pitch_detection_algo,
264
+ crepe_hop_length=args.crepe_hop_length,
265
+ protect=args.protect,
266
+ pitch_change_all=args.pitch_change_all,
267
+ reverb_rm_size=args.reverb_size,
268
+ reverb_wet=args.reverb_wetness,
269
+ reverb_dry=args.reverb_dryness,
270
+ reverb_damping=args.reverb_damping,
271
+ output_format=args.output_format,
272
+ )
273
+ print(f"[+] Cover generated at {cover_path}")
274
+
275
+ # Return the output path
276
+ return CogPath(cover_path)