akshansh36 commited on
Commit
44b6616
·
verified ·
1 Parent(s): 3c335ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -300
app.py CHANGED
@@ -1,321 +1,76 @@
1
- import os
2
  import gradio as gr
3
- import spaces
4
- from infer_rvc_python import BaseLoader
5
- import random
6
- import logging
7
- import time
8
- import soundfile as sf
9
- from infer_rvc_python.main import download_manager
10
- import zipfile
11
- import librosa
12
- import traceback
13
- import soundfile as sf
14
- from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
15
- from pedalboard.io import AudioFile
16
- from pydub import AudioSegment
17
- import noisereduce as nr
18
  import numpy as np
19
- import urllib.request
20
- import shutil
21
- import threading
22
-
23
- logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
24
-
25
- # Ensure the correct path to the models directory
26
- model_dir = os.path.join(os.path.dirname(__file__), "models")
27
 
 
28
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
29
 
30
- title = "<center><strong><font size='7'>Vodex AI</font></strong></center>"
31
- theme = "aliabid94/new-theme"
32
-
33
- def find_files(directory):
34
- file_paths = []
35
- for filename in os.listdir(directory):
36
- if filename.endswith('.pth') or filename.endswith('.zip') or filename.endswith('.index'):
37
- file_paths.append(os.path.join(directory, filename))
38
- return file_paths
39
-
40
- def unzip_in_folder(my_zip, my_dir):
41
- with zipfile.ZipFile(my_zip) as zip:
42
- for zip_info in zip.infolist():
43
- if zip_info.is_dir():
44
- continue
45
- zip_info.filename = os.path.basename(zip_info.filename)
46
- zip.extract(zip_info, my_dir)
47
-
48
- def find_my_model(a_, b_):
49
- if a_ is None or a_.endswith(".pth"):
50
- return a_, b_
51
-
52
- txt_files = []
53
- for base_file in [a_, b_]:
54
- if base_file is not None and base_file.endswith(".txt"):
55
- txt_files.append(base_file)
56
-
57
- directory = os.path.dirname(a_)
58
-
59
- for txt in txt_files:
60
- with open(txt, 'r') as file:
61
- first_line = file.readline()
62
-
63
- download_manager(
64
- url=first_line.strip(),
65
- path=directory,
66
- extension="",
67
- )
68
-
69
- for f in find_files(directory):
70
- if f.endswith(".zip"):
71
- unzip_in_folder(f, directory)
72
-
73
- model = None
74
- index = None
75
- end_files = find_files(directory)
76
-
77
- for ff in end_files:
78
- if ff.endswith(".pth"):
79
- model = os.path.join(directory, ff)
80
- gr.Info(f"Model found: {ff}")
81
- if ff.endswith(".index"):
82
- index = os.path.join(directory, ff)
83
- gr.Info(f"Index found: {ff}")
84
-
85
- if not model:
86
- gr.Error(f"Model not found in: {end_files}")
87
-
88
- if not index:
89
- gr.Warning("Index not found")
90
-
91
- return model, index
92
-
93
- def get_file_size(url):
94
- if "huggingface" not in url:
95
- raise ValueError("Only downloads from Hugging Face are allowed")
96
-
97
- try:
98
- with urllib.request.urlopen(url) as response:
99
- info = response.info()
100
- content_length = info.get("Content-Length")
101
-
102
- file_size = int(content_length)
103
- if file_size > 500000000:
104
- raise ValueError("The file is too large. You can only download files up to 500 MB in size.")
105
-
106
- except Exception as e:
107
- raise e
108
-
109
- def clear_files(directory):
110
- time.sleep(15)
111
- print(f"Clearing files: {directory}.")
112
- shutil.rmtree(directory)
113
-
114
- def get_my_model(url_data):
115
- if not url_data:
116
- return None, None
117
-
118
- if "," in url_data:
119
- a_, b_ = url_data.split()
120
- a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
121
- else:
122
- a_, b_ = url_data.strip().replace("/blob/", "/resolve/"), None
123
-
124
- out_dir = "downloads"
125
- folder_download = str(random.randint(1000, 9999))
126
- directory = os.path.join(out_dir, folder_download)
127
- os.makedirs(directory, exist_ok=True)
128
-
129
- try:
130
- get_file_size(a_)
131
- if b_:
132
- get_file_size(b_)
133
-
134
- valid_url = [a_] if not b_ else [a_, b_]
135
- for link in valid_url:
136
- download_manager(
137
- url=link,
138
- path=directory,
139
- extension="",
140
- )
141
-
142
- for f in find_files(directory):
143
- if f.endswith(".zip"):
144
- unzip_in_folder(f, directory)
145
-
146
- model = None
147
- index = None
148
- end_files = find_files(directory)
149
-
150
- for ff in end_files:
151
- if ff.endswith(".pth"):
152
- model = ff
153
- gr.Info(f"Model found: {ff}")
154
- if ff.endswith(".index"):
155
- index = ff
156
- gr.Info(f"Index found: {ff}")
157
-
158
- if not model:
159
- raise ValueError(f"Model not found in: {end_files}")
160
-
161
- if not index:
162
- gr.Warning("Index not found")
163
- else:
164
- index = os.path.abspath(index)
165
-
166
- return os.path.abspath(model), index
167
-
168
- except Exception as e:
169
- raise e
170
- finally:
171
- t = threading.Thread(target=clear_files, args=(directory,))
172
- t.start()
173
-
174
- def convert_now(audio_files, random_tag, converter):
175
- return converter(
176
- audio_files,
177
- random_tag,
178
- overwrite=False,
179
- parallel_workers=8
180
- )
181
-
182
- def apply_noisereduce(audio_list):
183
- print("Applying noise reduction")
184
-
185
- result = []
186
- for audio_path in audio_list:
187
- out_path = f'{os.path.splitext(audio_path)[0]}_noisereduce.wav'
188
-
189
- try:
190
- # Load audio file
191
- audio = AudioSegment.from_file(audio_path)
192
-
193
- # Convert audio to numpy array
194
- samples = np.array(audio.get_array_of_samples())
195
-
196
- reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
197
-
198
- reduced_audio = AudioSegment(
199
- reduced_noise.tobytes(),
200
- frame_rate=audio.frame_rate,
201
- sample_width=audio.sample_width,
202
- channels=audio.channels
203
- )
204
-
205
- reduced_audio.export(out_path, format="wav")
206
- result.append(out_path)
207
-
208
- except Exception as e:
209
- traceback.print_exc()
210
- print(f"Error in noise reduction: {str(e)}")
211
- result.append(audio_path)
212
-
213
- return result
214
-
215
- def run(audio_files, file_m, file_index):
216
- if not audio_files:
217
- raise ValueError("Please provide an audio file.")
218
-
219
- if isinstance(audio_files, str):
220
- audio_files = [audio_files]
221
-
222
- try:
223
- duration_base = librosa.get_duration(filename=audio_files[0])
224
- print("Duration:", duration_base)
225
- except Exception as e:
226
- print(e)
227
-
228
- file_m = os.path.join(model_dir, file_m)
229
- file_index = os.path.join(model_dir, file_index) if file_index else None
230
-
231
- random_tag = "USER_" + str(random.randint(10000000, 99999999))
232
 
 
 
 
 
 
233
  converter.apply_conf(
234
- tag=random_tag,
235
- file_model=file_m,
236
  pitch_algo="rmvpe+",
237
  pitch_lvl=0,
238
- file_index=file_index,
239
  index_influence=0.75,
240
  respiration_median_filtering=3,
241
  envelope_ratio=0.25,
242
  consonant_breath_protection=0.5,
243
- resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
244
  )
245
- time.sleep(0.1)
246
-
247
- result = convert_now(audio_files, random_tag, converter)
248
- result = apply_noisereduce(result)
249
-
250
- return result[0] # Return the first (and only) file path
251
-
 
 
 
 
 
 
252
 
253
- def process_audio(audio_file, file_m, file_index):
254
- if audio_file is not None:
255
- result = run([audio_file], file_m, file_index)
256
  else:
257
- raise ValueError("No audio recorded.")
258
-
259
- # Return the processed audio file for playback and download
260
- return gr.update(value=result, visible=True), result
261
 
262
-
263
- def model_conf():
 
264
  model_files = [f for f in os.listdir(model_dir) if f.endswith(".pth")]
265
- return gr.Dropdown(
266
- label="Select Model File",
267
- choices=model_files,
268
- value=model_files[0] if model_files else None,
269
- interactive=True,
270
- )
271
-
272
- def index_conf():
273
  index_files = [f for f in os.listdir(model_dir) if f.endswith(".index")]
274
- return gr.Dropdown(
275
- label="Select Index File",
276
- choices=index_files,
277
- value=index_files[0] if index_files else None,
278
- interactive=True,
279
- )
280
-
281
- def button_conf():
282
- return gr.Button(
283
- "Inference",
284
- variant="primary",
285
- )
286
-
287
-
288
- def get_gui(theme):
289
- with gr.Blocks(theme=theme, delete_cache=(3200, 3200)) as app:
290
- gr.Markdown(title)
291
-
292
- # Only keep the microphone input option
293
- audio = gr.Audio(label="Record Audio", sources="microphone", type="filepath", visible=True)
294
-
295
- model = model_conf()
296
- indx = index_conf()
297
- button_base = button_conf()
298
-
299
- output_audio = gr.Audio(label="Play Processed Audio", visible=False, show_share_button=False)
300
- output_files = gr.File(label="Download Processed Audio", interactive=False)
301
-
302
- button_base.click(
303
- process_audio,
304
- inputs=[audio, model, indx], # Removed None, keeping only valid inputs
305
- outputs=[output_audio, output_files], # Only need to output the audio file for playback and download
306
- )
307
-
308
- return app
309
-
310
 
311
  if __name__ == "__main__":
312
- app = get_gui(theme)
313
- app.queue(default_concurrency_limit=40)
314
- app.launch(
315
- max_threads=40,
316
- share=False,
317
- show_error=True,
318
- quiet=False,
319
- debug=False,
320
- allowed_paths=["./downloads/"],
321
- )
 
 
1
  import gradio as gr
2
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import numpy as np
4
+ import torch
5
+ from infer_rvc_python import BaseLoader
 
 
 
 
 
 
6
 
7
+ # Initialize the audio transformation model, but don't configure it yet
8
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
9
 
10
+ # Path to the models directory
11
+ model_dir = "./models"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Function to configure the model based on user selection
14
+ def configure_model(file_model, file_index):
15
+ model_path = os.path.join(model_dir, file_model)
16
+ index_path = os.path.join(model_dir, file_index) if file_index else None
17
+
18
  converter.apply_conf(
19
+ tag="live_transform",
20
+ file_model=model_path,
21
  pitch_algo="rmvpe+",
22
  pitch_lvl=0,
23
+ file_index=index_path,
24
  index_influence=0.75,
25
  respiration_median_filtering=3,
26
  envelope_ratio=0.25,
27
  consonant_breath_protection=0.5,
28
+ resample_sr=44100
29
  )
30
+ return "Model configured successfully."
31
+
32
+ # Function to process each audio chunk
33
+ def transform_audio_chunk(audio, instream):
34
+ if audio is None:
35
+ return None, instream
36
+
37
+ audio_data = torch.tensor(audio[1], dtype=torch.float32).unsqueeze(0) # Prepare audio for processing
38
+ with torch.no_grad():
39
+ transformed_audio, _ = converter.generate_from_cache(
40
+ audio_data=(audio[0], audio_data.numpy()),
41
+ tag="live_transform",
42
+ )
43
 
44
+ # Update the stream by concatenating the new transformed audio chunk
45
+ if instream is None:
46
+ return (audio[0], transformed_audio.squeeze(0).numpy()), (audio[0], transformed_audio.squeeze(0).numpy())
47
  else:
48
+ new_stream = np.concatenate((instream[1], transformed_audio.squeeze(0).numpy()))
49
+ return (audio[0], new_stream), (audio[0], new_stream)
 
 
50
 
51
+ # Gradio interface setup
52
+ with gr.Blocks() as demo:
53
+ # Get the list of available model and index files
54
  model_files = [f for f in os.listdir(model_dir) if f.endswith(".pth")]
 
 
 
 
 
 
 
 
55
  index_files = [f for f in os.listdir(model_dir) if f.endswith(".index")]
56
+
57
+ # Dropdowns for model and index file selection
58
+ model_file = gr.Dropdown(choices=model_files, label="Select Model File")
59
+ index_file = gr.Dropdown(choices=index_files, label="Select Index File")
60
+ configure_button = gr.Button("Configure Model")
61
+
62
+ # Audio input component with streaming enabled
63
+ inp = gr.Audio(source="microphone", streaming=True, type="numpy")
64
+ # Audio output component to play back the transformed audio
65
+ out = gr.Audio(streaming=True)
66
+ # State to manage the ongoing audio stream
67
+ stream = gr.State()
68
+
69
+ # Link the input to the processing function and output
70
+ inp.stream(transform_audio_chunk, [inp, stream], [out, stream])
71
+
72
+ # Link the model configuration button to the configure_model function
73
+ configure_button.click(configure_model, inputs=[model_file, index_file], outputs=[])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  if __name__ == "__main__":
76
+ demo.launch()