akshansh36 commited on
Commit
ff6d226
·
verified ·
1 Parent(s): ce2ef70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +305 -95
app.py CHANGED
@@ -1,110 +1,320 @@
 
1
  import gradio as gr
2
- import torch
3
- import numpy as np
 
 
4
  import time
5
  import soundfile as sf
6
- import datetime
7
- from infer_rvc_python import BaseLoader
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Initialize converter and other global variables
10
- converter = BaseLoader(only_cpu=False, hubert_path='./hubert_base.pt', rmvpe_path='./rmvpe.pt')
11
- now = datetime.datetime.now()
12
- timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")
13
- random_tag = "USER_" + str(timestamp)
14
-
15
- converter.apply_conf(
16
- tag=random_tag,
17
- file_model="./model.pth",
18
- pitch_algo="rmvpe+",
19
- pitch_lvl=0,
20
- file_index="./model.index",
21
- index_influence=0.80,
22
- respiration_median_filtering=3,
23
- envelope_ratio=0.25,
24
- consonant_breath_protection=0.5,
25
- resample_sr=0,
26
- )
27
-
28
- # Constants and initializations
29
- chunk_sec = 0.1
30
- sr = 16000
31
- chunk_len = int(sr * chunk_sec)
32
- L = 16
33
-
34
- # Define the streaming function for Gradio
35
- def process_audio_stream(audio, instream):
36
- global audio_buffer, start_time, first_output_latency, stop_recording
37
-
38
- if audio is None:
39
- return gr.update(), instream
40
-
41
- if instream is None:
42
- instream = torch.zeros(0, dtype=torch.float32)
43
-
44
- # Convert audio data to torch tensor
45
- audio_data = torch.tensor(audio[1], dtype=torch.float32)
46
-
47
- # Append new data to audio buffer
48
- audio_buffer = torch.cat((audio_buffer, audio_data))
49
-
50
- if len(audio_buffer) >= chunk_len:
51
- # Get the current chunk
52
- buffer_chunk = audio_buffer[:chunk_len]
53
- audio_buffer = audio_buffer[chunk_len:]
54
-
55
- # Process the audio data (as per your existing logic)
56
- input_chunk = torch.cat([instream[-L*2:], buffer_chunk])
57
- data = (input_chunk.numpy().astype(np.int16), sr)
58
-
59
- result_array, _ = converter.generate_from_cache(audio_data=data, tag=random_tag)
60
- output = torch.tensor(result_array, dtype=torch.float32)
61
-
62
- # Append the processed output to instream for continuous processing
63
- instream = torch.cat((instream, output))
64
-
65
- # Convert the output to a numpy array and return as a tuple with sample rate
66
- return (instream.numpy(), sr), (instream.numpy(), sr)
67
  else:
68
- return gr.update(), instream
69
 
 
 
 
 
70
 
 
 
 
 
71
 
72
- # Function to save audio to file
73
- def save_audio(audio, audio_path, sample_rate):
74
- torchaudio.save(audio_path, torch.tensor(audio, dtype=torch.float32), sample_rate)
 
 
 
 
75
 
76
- # Function to list audio devices (for debugging or selecting specific devices)
77
- def list_audio_devices():
78
- import pyaudio
79
- audio = pyaudio.PyAudio()
80
- device_count = audio.get_device_count()
81
 
82
- print("Available audio devices:")
83
- for i in range(device_count):
84
- device_info = audio.get_device_info_by_index(i)
85
- print(f"Index: {i}, Name: {device_info['name']}, Input Channels: {device_info['maxInputChannels']}, Output Channels: {device_info['maxOutputChannels']}")
86
 
87
- # Define Gradio interface
88
- with gr.Blocks() as demo:
89
- inp = gr.Audio(sources="microphone", streaming=True)
90
- out = gr.Audio(streaming=True)
91
- stream = gr.State()
 
 
92
 
93
- inp.stream(process_audio_stream, [inp, stream], [out, stream])
94
-
95
- # Button to clear/reset the stream
96
- clear = gr.Button("Clear")
97
- clear.click(lambda: [None, torch.zeros(0, dtype=torch.float32)], None, [inp, out, stream])
98
 
99
- if __name__ == "__main__":
100
- # Initialize global audio buffer
101
- audio_buffer = torch.zeros(0, dtype=torch.float32)
102
- start_time = time.time()
103
- first_output_latency = 0
104
- stop_recording = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- # Optionally list audio devices (can be commented out if not needed)
107
- # list_audio_devices()
108
 
109
- # Launch Gradio interface
110
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import gradio as gr
3
+ import spaces
4
+ from infer_rvc_python import BaseLoader
5
+ import random
6
+ import logging
7
  import time
8
  import soundfile as sf
9
+ from infer_rvc_python.main import download_manager
10
+ import zipfile
11
+ import librosa
12
+ import traceback
13
+ import soundfile as sf
14
+ from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
15
+ from pedalboard.io import AudioFile
16
+ from pydub import AudioSegment
17
+ import noisereduce as nr
18
+ import numpy as np
19
+ import urllib.request
20
+ import shutil
21
+ import threading
22
+
23
+ logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
24
+
25
+ # Ensure the correct path to the models directory
26
+ model_dir = os.path.join(os.path.dirname(__file__), "models")
27
+
28
+ converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
29
+
30
+ title = "<center><strong><font size='7'>Vodex AI</font></strong></center>"
31
+ theme = "aliabid94/new-theme"
32
+
33
+ def find_files(directory):
34
+ file_paths = []
35
+ for filename in os.listdir(directory):
36
+ if filename.endswith('.pth') or filename.endswith('.zip') or filename.endswith('.index'):
37
+ file_paths.append(os.path.join(directory, filename))
38
+ return file_paths
39
+
40
+ def unzip_in_folder(my_zip, my_dir):
41
+ with zipfile.ZipFile(my_zip) as zip:
42
+ for zip_info in zip.infolist():
43
+ if zip_info.is_dir():
44
+ continue
45
+ zip_info.filename = os.path.basename(zip_info.filename)
46
+ zip.extract(zip_info, my_dir)
47
+
48
+ def find_my_model(a_, b_):
49
+ if a_ is None or a_.endswith(".pth"):
50
+ return a_, b_
51
+
52
+ txt_files = []
53
+ for base_file in [a_, b_]:
54
+ if base_file is not None and base_file.endswith(".txt"):
55
+ txt_files.append(base_file)
56
+
57
+ directory = os.path.dirname(a_)
58
+
59
+ for txt in txt_files:
60
+ with open(txt, 'r') as file:
61
+ first_line = file.readline()
62
+
63
+ download_manager(
64
+ url=first_line.strip(),
65
+ path=directory,
66
+ extension="",
67
+ )
68
 
69
+ for f in find_files(directory):
70
+ if f.endswith(".zip"):
71
+ unzip_in_folder(f, directory)
72
+
73
+ model = None
74
+ index = None
75
+ end_files = find_files(directory)
76
+
77
+ for ff in end_files:
78
+ if ff.endswith(".pth"):
79
+ model = os.path.join(directory, ff)
80
+ gr.Info(f"Model found: {ff}")
81
+ if ff.endswith(".index"):
82
+ index = os.path.join(directory, ff)
83
+ gr.Info(f"Index found: {ff}")
84
+
85
+ if not model:
86
+ gr.Error(f"Model not found in: {end_files}")
87
+
88
+ if not index:
89
+ gr.Warning("Index not found")
90
+
91
+ return model, index
92
+
93
+ def get_file_size(url):
94
+ if "huggingface" not in url:
95
+ raise ValueError("Only downloads from Hugging Face are allowed")
96
+
97
+ try:
98
+ with urllib.request.urlopen(url) as response:
99
+ info = response.info()
100
+ content_length = info.get("Content-Length")
101
+
102
+ file_size = int(content_length)
103
+ if file_size > 500000000:
104
+ raise ValueError("The file is too large. You can only download files up to 500 MB in size.")
105
+
106
+ except Exception as e:
107
+ raise e
108
+
109
+ def clear_files(directory):
110
+ time.sleep(15)
111
+ print(f"Clearing files: {directory}.")
112
+ shutil.rmtree(directory)
113
+
114
+ def get_my_model(url_data):
115
+ if not url_data:
116
+ return None, None
117
+
118
+ if "," in url_data:
119
+ a_, b_ = url_data.split()
120
+ a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
 
 
 
 
 
 
121
  else:
122
+ a_, b_ = url_data.strip().replace("/blob/", "/resolve/"), None
123
 
124
+ out_dir = "downloads"
125
+ folder_download = str(random.randint(1000, 9999))
126
+ directory = os.path.join(out_dir, folder_download)
127
+ os.makedirs(directory, exist_ok=True)
128
 
129
+ try:
130
+ get_file_size(a_)
131
+ if b_:
132
+ get_file_size(b_)
133
 
134
+ valid_url = [a_] if not b_ else [a_, b_]
135
+ for link in valid_url:
136
+ download_manager(
137
+ url=link,
138
+ path=directory,
139
+ extension="",
140
+ )
141
 
142
+ for f in find_files(directory):
143
+ if f.endswith(".zip"):
144
+ unzip_in_folder(f, directory)
 
 
145
 
146
+ model = None
147
+ index = None
148
+ end_files = find_files(directory)
 
149
 
150
+ for ff in end_files:
151
+ if ff.endswith(".pth"):
152
+ model = ff
153
+ gr.Info(f"Model found: {ff}")
154
+ if ff.endswith(".index"):
155
+ index = ff
156
+ gr.Info(f"Index found: {ff}")
157
 
158
+ if not model:
159
+ raise ValueError(f"Model not found in: {end_files}")
 
 
 
160
 
161
+ if not index:
162
+ gr.Warning("Index not found")
163
+ else:
164
+ index = os.path.abspath(index)
165
+
166
+ return os.path.abspath(model), index
167
+
168
+ except Exception as e:
169
+ raise e
170
+ finally:
171
+ t = threading.Thread(target=clear_files, args=(directory,))
172
+ t.start()
173
+
174
+ def convert_now(audio_files, random_tag, converter):
175
+ return converter(
176
+ audio_files,
177
+ random_tag,
178
+ overwrite=False,
179
+ parallel_workers=8
180
+ )
181
 
182
+ def apply_noisereduce(audio_list):
183
+ print("Applying noise reduction")
184
 
185
+ result = []
186
+ for audio_path in audio_list:
187
+ out_path = f'{os.path.splitext(audio_path)[0]}_noisereduce.wav'
188
+
189
+ try:
190
+ # Load audio file
191
+ audio = AudioSegment.from_file(audio_path)
192
+
193
+ # Convert audio to numpy array
194
+ samples = np.array(audio.get_array_of_samples())
195
+
196
+ reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
197
+
198
+ reduced_audio = AudioSegment(
199
+ reduced_noise.tobytes(),
200
+ frame_rate=audio.frame_rate,
201
+ sample_width=audio.sample_width,
202
+ channels=audio.channels
203
+ )
204
+
205
+ reduced_audio.export(out_path, format="wav")
206
+ result.append(out_path)
207
+
208
+ except Exception as e:
209
+ traceback.print_exc()
210
+ print(f"Error in noise reduction: {str(e)}")
211
+ result.append(audio_path)
212
+
213
+ return result
214
+
215
+ def run(audio_files, file_m, file_index):
216
+ if not audio_files:
217
+ raise ValueError("Please provide an audio file.")
218
+
219
+ if isinstance(audio_files, str):
220
+ audio_files = [audio_files]
221
+
222
+ try:
223
+ duration_base = librosa.get_duration(filename=audio_files[0])
224
+ print("Duration:", duration_base)
225
+ except Exception as e:
226
+ print(e)
227
+
228
+ file_m = os.path.join(model_dir, file_m)
229
+ file_index = os.path.join(model_dir, file_index) if file_index else None
230
+
231
+ random_tag = "USER_" + str(random.randint(10000000, 99999999))
232
+
233
+ converter.apply_conf(
234
+ tag=random_tag,
235
+ file_model=file_m,
236
+ pitch_algo="rmvpe+",
237
+ pitch_lvl=0,
238
+ file_index=file_index,
239
+ index_influence=0.75,
240
+ respiration_median_filtering=3,
241
+ envelope_ratio=0.25,
242
+ consonant_breath_protection=0.5,
243
+ resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
244
+ )
245
+ time.sleep(0.1)
246
+
247
+ result = convert_now(audio_files, random_tag, converter)
248
+ result = apply_noisereduce(result)
249
+
250
+ return result[0] # Return the first (and only) file path
251
+
252
+
253
+ def process_audio(audio_file, _, file_m, file_index):
254
+ if audio_file is not None:
255
+ result = run([audio_file], file_m, file_index)
256
+ else:
257
+ raise ValueError("No audio recorded.")
258
+
259
+ # Return the processed audio file for playback and download
260
+ return gr.update(value=result, visible=True), result
261
+
262
+
263
+ def model_conf():
264
+ model_files = [f for f in os.listdir(model_dir) if f.endswith(".pth")]
265
+ return gr.Dropdown(
266
+ label="Select Model File",
267
+ choices=model_files,
268
+ value=model_files[0] if model_files else None,
269
+ interactive=True,
270
+ )
271
+
272
+ def index_conf():
273
+ index_files = [f for f in os.listdir(model_dir) if f.endswith(".index")]
274
+ return gr.Dropdown(
275
+ label="Select Index File",
276
+ choices=index_files,
277
+ value=index_files[0] if index_files else None,
278
+ interactive=True,
279
+ )
280
+
281
+ def button_conf():
282
+ return gr.Button(
283
+ "Inference",
284
+ variant="primary",
285
+ )
286
+
287
+ def get_gui(theme):
288
+ with gr.Blocks(theme=theme, delete_cache=(3200, 3200)) as app:
289
+ gr.Markdown(title)
290
+
291
+ # Only keep the microphone input option
292
+ audio = gr.Audio(label="Record Audio", sources="microphone", type="filepath", visible=True)
293
+
294
+ model = model_conf()
295
+ indx = index_conf()
296
+ button_base = button_conf()
297
+
298
+ output_audio = gr.Audio(label="Play Processed Audio", visible=False, show_share_button=False)
299
+ output_files = gr.File(label="Download Processed Audio", interactive=False)
300
+
301
+ button_base.click(
302
+ process_audio,
303
+ inputs=[audio, None, model, indx],
304
+ outputs=[output_audio, output_files], # Only need to output the audio file for playback and download
305
+ )
306
+
307
+ return app
308
+
309
+
310
+ if __name__ == "__main__":
311
+ app = get_gui(theme)
312
+ app.queue(default_concurrency_limit=40)
313
+ app.launch(
314
+ max_threads=40,
315
+ share=False,
316
+ show_error=True,
317
+ quiet=False,
318
+ debug=False,
319
+ allowed_paths=["./downloads/"],
320
+ )