Spaces:

youdata-ai
/

Vodex-AI

Sleeping

App Files Files Community

akshansh36 commited on Aug 23, 2024

Commit

ff6d226

verified ·

1 Parent(s): ce2ef70

Update app.py

Browse files

Files changed (1) hide show

app.py +305 -95

app.py CHANGED Viewed

@@ -1,110 +1,320 @@
 import gradio as gr
-import torch
-import numpy as np
 import time
 import soundfile as sf
-import datetime
-from infer_rvc_python import BaseLoader
-# Initialize converter and other global variables
-converter = BaseLoader(only_cpu=False, hubert_path='./hubert_base.pt', rmvpe_path='./rmvpe.pt')
-now = datetime.datetime.now()
-timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")
-random_tag = "USER_" + str(timestamp)
-converter.apply_conf(
-    tag=random_tag,
-    file_model="./model.pth",
-    pitch_algo="rmvpe+",
-    pitch_lvl=0,
-    file_index="./model.index",
-    index_influence=0.80,
-    respiration_median_filtering=3,
-    envelope_ratio=0.25,
-    consonant_breath_protection=0.5,
-    resample_sr=0,
-)
-# Constants and initializations
-chunk_sec = 0.1
-sr = 16000
-chunk_len = int(sr * chunk_sec)
-L = 16
-# Define the streaming function for Gradio
-def process_audio_stream(audio, instream):
-    global audio_buffer, start_time, first_output_latency, stop_recording
-    if audio is None:
-        return gr.update(), instream
-    if instream is None:
-        instream = torch.zeros(0, dtype=torch.float32)
-    # Convert audio data to torch tensor
-    audio_data = torch.tensor(audio[1], dtype=torch.float32)
-    # Append new data to audio buffer
-    audio_buffer = torch.cat((audio_buffer, audio_data))
-    if len(audio_buffer) >= chunk_len:
-        # Get the current chunk
-        buffer_chunk = audio_buffer[:chunk_len]
-        audio_buffer = audio_buffer[chunk_len:]
-        # Process the audio data (as per your existing logic)
-        input_chunk = torch.cat([instream[-L*2:], buffer_chunk])
-        data = (input_chunk.numpy().astype(np.int16), sr)
-        result_array, _ = converter.generate_from_cache(audio_data=data, tag=random_tag)
-        output = torch.tensor(result_array, dtype=torch.float32)
-        # Append the processed output to instream for continuous processing
-        instream = torch.cat((instream, output))
-        # Convert the output to a numpy array and return as a tuple with sample rate
-        return (instream.numpy(), sr), (instream.numpy(), sr)
     else:
-        return gr.update(), instream
-# Function to save audio to file
-def save_audio(audio, audio_path, sample_rate):
-    torchaudio.save(audio_path, torch.tensor(audio, dtype=torch.float32), sample_rate)
-# Function to list audio devices (for debugging or selecting specific devices)
-def list_audio_devices():
-    import pyaudio
-    audio = pyaudio.PyAudio()
-    device_count = audio.get_device_count()
-    print("Available audio devices:")
-    for i in range(device_count):
-        device_info = audio.get_device_info_by_index(i)
-        print(f"Index: {i}, Name: {device_info['name']}, Input Channels: {device_info['maxInputChannels']}, Output Channels: {device_info['maxOutputChannels']}")
-# Define Gradio interface
-with gr.Blocks() as demo:
-    inp = gr.Audio(sources="microphone", streaming=True)
-    out = gr.Audio(streaming=True)
-    stream = gr.State()
-    inp.stream(process_audio_stream, [inp, stream], [out, stream])
-    # Button to clear/reset the stream
-    clear = gr.Button("Clear")
-    clear.click(lambda: [None, torch.zeros(0, dtype=torch.float32)], None, [inp, out, stream])
-if __name__ == "__main__":
-    # Initialize global audio buffer
-    audio_buffer = torch.zeros(0, dtype=torch.float32)
-    start_time = time.time()
-    first_output_latency = 0
-    stop_recording = False
-    # Optionally list audio devices (can be commented out if not needed)
-    # list_audio_devices()
-    # Launch Gradio interface
-    demo.launch()

+import os
 import gradio as gr
+import spaces
+from infer_rvc_python import BaseLoader
+import random
+import logging
 import time
 import soundfile as sf
+from infer_rvc_python.main import download_manager
+import zipfile
+import librosa
+import traceback
+import soundfile as sf
+from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
+from pedalboard.io import AudioFile
+from pydub import AudioSegment
+import noisereduce as nr
+import numpy as np
+import urllib.request
+import shutil
+import threading
+logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
+# Ensure the correct path to the models directory
+model_dir = os.path.join(os.path.dirname(__file__), "models")
+converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
+title = "<center><strong><font size='7'>Vodex AI</font></strong></center>"
+theme = "aliabid94/new-theme"
+def find_files(directory):
+    file_paths = []
+    for filename in os.listdir(directory):
+        if filename.endswith('.pth') or filename.endswith('.zip') or filename.endswith('.index'):
+            file_paths.append(os.path.join(directory, filename))
+    return file_paths
+def unzip_in_folder(my_zip, my_dir):
+    with zipfile.ZipFile(my_zip) as zip:
+        for zip_info in zip.infolist():
+            if zip_info.is_dir():
+                continue
+            zip_info.filename = os.path.basename(zip_info.filename)
+            zip.extract(zip_info, my_dir)
+def find_my_model(a_, b_):
+    if a_ is None or a_.endswith(".pth"):
+        return a_, b_
+    txt_files = []
+    for base_file in [a_, b_]:
+        if base_file is not None and base_file.endswith(".txt"):
+            txt_files.append(base_file)
+    directory = os.path.dirname(a_)
+    for txt in txt_files:
+        with open(txt, 'r') as file:
+            first_line = file.readline()
+        download_manager(
+            url=first_line.strip(),
+            path=directory,
+            extension="",
+        )
+    for f in find_files(directory):
+        if f.endswith(".zip"):
+            unzip_in_folder(f, directory)
+    model = None
+    index = None
+    end_files = find_files(directory)
+    for ff in end_files:
+        if ff.endswith(".pth"):
+            model = os.path.join(directory, ff)
+            gr.Info(f"Model found: {ff}")
+        if ff.endswith(".index"):
+            index = os.path.join(directory, ff)
+            gr.Info(f"Index found: {ff}")
+    if not model:
+        gr.Error(f"Model not found in: {end_files}")
+    if not index:
+        gr.Warning("Index not found")
+    return model, index
+def get_file_size(url):
+    if "huggingface" not in url:
+        raise ValueError("Only downloads from Hugging Face are allowed")
+    try:
+        with urllib.request.urlopen(url) as response:
+            info = response.info()
+            content_length = info.get("Content-Length")
+        file_size = int(content_length)
+        if file_size > 500000000:
+            raise ValueError("The file is too large. You can only download files up to 500 MB in size.")
+    except Exception as e:
+        raise e
+def clear_files(directory):
+    time.sleep(15)
+    print(f"Clearing files: {directory}.")
+    shutil.rmtree(directory)
+def get_my_model(url_data):
+    if not url_data:
+        return None, None
+    if "," in url_data:
+        a_, b_ = url_data.split()
+        a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
     else:
+        a_, b_ = url_data.strip().replace("/blob/", "/resolve/"), None
+    out_dir = "downloads"
+    folder_download = str(random.randint(1000, 9999))
+    directory = os.path.join(out_dir, folder_download)
+    os.makedirs(directory, exist_ok=True)
+    try:
+        get_file_size(a_)
+        if b_:
+            get_file_size(b_)
+        valid_url = [a_] if not b_ else [a_, b_]
+        for link in valid_url:
+            download_manager(
+                url=link,
+                path=directory,
+                extension="",
+            )
+        for f in find_files(directory):
+            if f.endswith(".zip"):
+                unzip_in_folder(f, directory)
+        model = None
+        index = None
+        end_files = find_files(directory)
+        for ff in end_files:
+            if ff.endswith(".pth"):
+                model = ff
+                gr.Info(f"Model found: {ff}")
+            if ff.endswith(".index"):
+                index = ff
+                gr.Info(f"Index found: {ff}")
+        if not model:
+            raise ValueError(f"Model not found in: {end_files}")
+        if not index:
+            gr.Warning("Index not found")
+        else:
+            index = os.path.abspath(index)
+        return os.path.abspath(model), index
+    except Exception as e:
+        raise e
+    finally:
+        t = threading.Thread(target=clear_files, args=(directory,))
+        t.start()
+def convert_now(audio_files, random_tag, converter):
+    return converter(
+        audio_files,
+        random_tag,
+        overwrite=False,
+        parallel_workers=8
+    )
+def apply_noisereduce(audio_list):
+    print("Applying noise reduction")
+    result = []
+    for audio_path in audio_list:
+        out_path = f'{os.path.splitext(audio_path)[0]}_noisereduce.wav'
+        try:
+            # Load audio file
+            audio = AudioSegment.from_file(audio_path)
+            # Convert audio to numpy array
+            samples = np.array(audio.get_array_of_samples())
+            reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
+            reduced_audio = AudioSegment(
+                reduced_noise.tobytes(),
+                frame_rate=audio.frame_rate,
+                sample_width=audio.sample_width,
+                channels=audio.channels
+            )
+            reduced_audio.export(out_path, format="wav")
+            result.append(out_path)
+        except Exception as e:
+            traceback.print_exc()
+            print(f"Error in noise reduction: {str(e)}")
+            result.append(audio_path)
+    return result
+def run(audio_files, file_m, file_index):
+    if not audio_files:
+        raise ValueError("Please provide an audio file.")
+    if isinstance(audio_files, str):
+        audio_files = [audio_files]
+    try:
+        duration_base = librosa.get_duration(filename=audio_files[0])
+        print("Duration:", duration_base)
+    except Exception as e:
+        print(e)
+    file_m = os.path.join(model_dir, file_m)
+    file_index = os.path.join(model_dir, file_index) if file_index else None
+    random_tag = "USER_" + str(random.randint(10000000, 99999999))
+    converter.apply_conf(
+        tag=random_tag,
+        file_model=file_m,
+        pitch_algo="rmvpe+",
+        pitch_lvl=0,
+        file_index=file_index,
+        index_influence=0.75,
+        respiration_median_filtering=3,
+        envelope_ratio=0.25,
+        consonant_breath_protection=0.5,
+        resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
+    )
+    time.sleep(0.1)
+    result = convert_now(audio_files, random_tag, converter)
+    result = apply_noisereduce(result)
+    return result[0]  # Return the first (and only) file path
+def process_audio(audio_file, _, file_m, file_index):
+    if audio_file is not None:
+        result = run([audio_file], file_m, file_index)
+    else:
+        raise ValueError("No audio recorded.")
+    # Return the processed audio file for playback and download
+    return gr.update(value=result, visible=True), result
+def model_conf():
+    model_files = [f for f in os.listdir(model_dir) if f.endswith(".pth")]
+    return gr.Dropdown(
+        label="Select Model File",
+        choices=model_files,
+        value=model_files[0] if model_files else None,
+        interactive=True,
+    )
+def index_conf():
+    index_files = [f for f in os.listdir(model_dir) if f.endswith(".index")]
+    return gr.Dropdown(
+        label="Select Index File",
+        choices=index_files,
+        value=index_files[0] if index_files else None,
+        interactive=True,
+    )
+def button_conf():
+    return gr.Button(
+        "Inference",
+        variant="primary",
+    )
+def get_gui(theme):
+    with gr.Blocks(theme=theme, delete_cache=(3200, 3200)) as app:
+        gr.Markdown(title)
+        # Only keep the microphone input option
+        audio = gr.Audio(label="Record Audio", sources="microphone", type="filepath", visible=True)
+        model = model_conf()
+        indx = index_conf()
+        button_base = button_conf()
+        output_audio = gr.Audio(label="Play Processed Audio", visible=False, show_share_button=False)
+        output_files = gr.File(label="Download Processed Audio", interactive=False)
+        button_base.click(
+            process_audio,
+            inputs=[audio, None, model, indx],
+            outputs=[output_audio, output_files],  # Only need to output the audio file for playback and download
+        )
+    return app
+if __name__ == "__main__":
+    app = get_gui(theme)
+    app.queue(default_concurrency_limit=40)
+    app.launch(
+        max_threads=40,
+        share=False,
+        show_error=True,
+        quiet=False,
+        debug=False,
+        allowed_paths=["./downloads/"],
+    )