Spaces:

Seqath
/

Training-RVC-v2

Build error

App Files Files Community

Seqath commited on Dec 23, 2024

Commit

5891805

verified ·

1 Parent(s): b4739bb

Create app.py

Browse files

Files changed (1) hide show

app.py +276 -0

app.py ADDED Viewed

	@@ -0,0 +1,276 @@

+# Prediction interface for Cog ⚙️
+# https://github.com/replicate/cog/blob/main/docs/python.md
+import os
+import sys
+import shutil
+import zipfile
+import urllib.request
+from argparse import Namespace
+from cog import BasePredictor, Input, Path as CogPath
+sys.path.insert(0, os.path.abspath("src"))
+import main as m
+def download_online_model(url, dir_name):
+    print(f"[~] Downloading voice model with name {dir_name}...")
+    zip_name = url.split("/")[-1]
+    extraction_folder = os.path.join(m.rvc_models_dir, dir_name)
+    if os.path.exists(extraction_folder):
+        print(f"Voice model directory {dir_name} already exists! Skipping download.")
+        return
+    if "pixeldrain.com" in url:
+        url = f"https://pixeldrain.com/api/file/{zip_name}"
+    urllib.request.urlretrieve(url, zip_name)
+    print("[~] Extracting zip...")
+    with zipfile.ZipFile(zip_name, "r") as zip_ref:
+        for member in zip_ref.infolist():
+            # skip directories
+            if member.is_dir():
+                continue
+            # create target directory if it does not exist
+            os.makedirs(extraction_folder, exist_ok=True)
+            # extract only files directly to extraction_folder
+            with zip_ref.open(member) as source, open(
+                os.path.join(extraction_folder, os.path.basename(member.filename)), "wb"
+            ) as target:
+                shutil.copyfileobj(source, target)
+    print(f"[+] {dir_name} Model successfully downloaded!")
+class Predictor(BasePredictor):
+    def setup(self) -> None:
+        """Load the model into memory to make running multiple predictions efficient"""
+        pass
+    def predict(
+        self,
+        song_input: CogPath = Input(
+            description="Upload your audio file here.",
+            default=None,
+        ),
+        rvc_model: str = Input(
+            description="RVC model for a specific voice. If using a custom model, this should match the name of the downloaded model. If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.",
+            default="Squidward",
+            choices=[
+                "Squidward",
+                "MrKrabs",
+                "Plankton",
+                "Drake",
+                "Vader",
+                "Trump",
+                "Biden",
+                "Obama",
+                "Guitar",
+                "Voilin",
+                "CUSTOM",
+                "SamA",  # TODO REMOVE THIS
+            ],
+        ),
+        custom_rvc_model_download_url: str = Input(
+            description="URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value.",
+            default=None,
+        ),
+        pitch_change: str = Input(
+            description="Adjust pitch of AI vocals. Options: `no-change`, `male-to-female`, `female-to-male`.",
+            default="no-change",
+            choices=["no-change", "male-to-female", "female-to-male"],
+        ),
+        index_rate: float = Input(
+            description="Control how much of the AI's accent to leave in the vocals.",
+            default=0.5,
+            ge=0,
+            le=1,
+        ),
+        filter_radius: int = Input(
+            description="If >=3: apply median filtering median filtering to the harvested pitch results.",
+            default=3,
+            ge=0,
+            le=7,
+        ),
+        rms_mix_rate: float = Input(
+            description="Control how much to use the original vocal's loudness (0) or a fixed loudness (1).",
+            default=0.25,
+            ge=0,
+            le=1,
+        ),
+        pitch_detection_algorithm: str = Input(
+            description="Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals).",
+            default="rmvpe",
+            choices=["rmvpe", "mangio-crepe"],
+        ),
+        crepe_hop_length: int = Input(
+            description="When `pitch_detection_algo` is set to `mangio-crepe`, this controls how often it checks for pitch changes in milliseconds. Lower values lead to longer conversions and higher risk of voice cracks, but better pitch accuracy.",
+            default=128,
+        ),
+        protect: float = Input(
+            description="Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable.",
+            default=0.33,
+            ge=0,
+            le=0.5,
+        ),
+        main_vocals_volume_change: float = Input(
+            description="Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels.",
+            default=0,
+        ),
+        backup_vocals_volume_change: float = Input(
+            description="Control volume of backup AI vocals.",
+            default=0,
+        ),
+        instrumental_volume_change: float = Input(
+            description="Control volume of the background music/instrumentals.",
+            default=0,
+        ),
+        pitch_change_all: float = Input(
+            description="Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly.",
+            default=0,
+        ),
+        reverb_size: float = Input(
+            description="The larger the room, the longer the reverb time.",
+            default=0.15,
+            ge=0,
+            le=1,
+        ),
+        reverb_wetness: float = Input(
+            description="Level of AI vocals with reverb.",
+            default=0.2,
+            ge=0,
+            le=1,
+        ),
+        reverb_dryness: float = Input(
+            description="Level of AI vocals without reverb.",
+            default=0.8,
+            ge=0,
+            le=1,
+        ),
+        reverb_damping: float = Input(
+            description="Absorption of high frequencies in the reverb.",
+            default=0.7,
+            ge=0,
+            le=1,
+        ),
+        output_format: str = Input(
+            description="wav for best quality and large file size, mp3 for decent quality and small file size.",
+            default="mp3",
+            choices=["mp3", "wav"],
+        ),
+    ) -> CogPath:
+        """
+        Runs a single prediction on the model.
+        Required Parameters:
+            song_input (CogPath): Upload your audio file here.
+            rvc_model (str): RVC model for a specific voice. Default is "Squidward". If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.
+            pitch_change (float): Change pitch of AI vocals in octaves. Set to 0 for no change. Generally, use 1 for male to female conversions and -1 for vice-versa.
+        Optional Parameters:
+            custom_rvc_model_download_url (str): URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value. Defaults to None.
+            index_rate (float): Control how much of the AI's accent to leave in the vocals. 0 <= INDEX_RATE <= 1. Defaults to 0.5.
+            filter_radius (int): If >=3: apply median filtering median filtering to the harvested pitch results. 0 <= FILTER_RADIUS <= 7. Defaults to 3.
+            rms_mix_rate (float): Control how much to use the original vocal's loudness (0) or a fixed loudness (1). 0 <= RMS_MIX_RATE <= 1. Defaults to 0.25.
+            pitch_detection_algorithm (str): Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals). Defaults to "rmvpe".
+            crepe_hop_length (int): Controls how often it checks for pitch changes in milliseconds when using mangio-crepe algo specifically. Lower values leads to longer conversions and higher risk of voice cracks, but better pitch accuracy. Defaults to 128.
+            protect (float): Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable. 0 <= PROTECT <= 0.5. Defaults to 0.33.
+            main_vocals_volume_change (float): Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels. Defaults to 0.
+            backup_vocals_volume_change (float): Control volume of backup AI vocals. Defaults to 0.
+            instrumental_volume_change (float): Control volume of the background music/instrumentals. Defaults to 0.
+            pitch_change_all (float): Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly. Defaults to 0.
+            reverb_size (float): The larger the room, the longer the reverb time. 0 <= REVERB_SIZE <= 1. Defaults to 0.15.
+            reverb_wetness (float): Level of AI vocals with reverb. 0 <= REVERB_WETNESS <= 1. Defaults to 0.2.
+            reverb_dryness (float): Level of AI vocals without reverb. 0 <= REVERB_DRYNESS <= 1. Defaults to 0.8.
+            reverb_damping (float): Absorption of high frequencies in the reverb. 0 <= REVERB_DAMPING <= 1. Defaults to 0.7.
+            output_format (str): wav for best quality and large file size, mp3 for decent quality and small file size. Defaults to "mp3".
+        Returns:
+            CogPath: The output path of the generated audio file.
+        """
+        if custom_rvc_model_download_url:
+            custom_rvc_model_download_name = urllib.parse.unquote(
+                custom_rvc_model_download_url.split("/")[-1]
+            )
+            custom_rvc_model_download_name = os.path.splitext(
+                custom_rvc_model_download_name
+            )[0]
+            print(
+                f"[!] The model will be downloaded as '{custom_rvc_model_download_name}'."
+            )
+            download_online_model(
+                url=custom_rvc_model_download_url,
+                dir_name=custom_rvc_model_download_name,
+            )
+            rvc_model = custom_rvc_model_download_name
+        else:
+            print(
+                "[!] Since URL was provided, we will try to download the model and use it (even if `rvc_model` is not set to 'CUSTOM')."
+            )
+        # Convert pitch_change from string to numerical value for processing
+        # 0 for no change, 1 for male to female, -1 for female to male
+        if pitch_change == "no-change":
+            pitch_change = 0
+        elif pitch_change == "male-to-female":
+            pitch_change = 1
+        else:  # pitch_change == "female-to-male"
+            pitch_change = -1
+        args = Namespace(
+            song_input=str(song_input),
+            rvc_dirname=(model_dir_name := rvc_model),
+            pitch_change=pitch_change,
+            keep_files=(keep_files := False),
+            index_rate=index_rate,
+            filter_radius=filter_radius,
+            rms_mix_rate=rms_mix_rate,
+            pitch_detection_algo=pitch_detection_algorithm,
+            crepe_hop_length=crepe_hop_length,
+            protect=protect,
+            main_vol=main_vocals_volume_change,
+            backup_vol=backup_vocals_volume_change,
+            inst_vol=instrumental_volume_change,
+            pitch_change_all=pitch_change_all,
+            reverb_size=reverb_size,
+            reverb_wetness=reverb_wetness,
+            reverb_dryness=reverb_dryness,
+            reverb_damping=reverb_damping,
+            output_format=output_format,
+        )
+        rvc_dirname = args.rvc_dirname
+        if not os.path.exists(os.path.join(m.rvc_models_dir, rvc_dirname)):
+            raise Exception(
+                f"The folder {os.path.join(m.rvc_models_dir, rvc_dirname)} does not exist."
+            )
+        cover_path = m.song_cover_pipeline(
+            args.song_input,
+            rvc_dirname,
+            args.pitch_change,
+            args.keep_files,
+            main_gain=args.main_vol,
+            backup_gain=args.backup_vol,
+            inst_gain=args.inst_vol,
+            index_rate=args.index_rate,
+            filter_radius=args.filter_radius,
+            rms_mix_rate=args.rms_mix_rate,
+            f0_method=args.pitch_detection_algo,
+            crepe_hop_length=args.crepe_hop_length,
+            protect=args.protect,
+            pitch_change_all=args.pitch_change_all,
+            reverb_rm_size=args.reverb_size,
+            reverb_wet=args.reverb_wetness,
+            reverb_dry=args.reverb_dryness,
+            reverb_damping=args.reverb_damping,
+            output_format=args.output_format,
+        )
+        print(f"[+] Cover generated at {cover_path}")
+        # Return the output path
+        return CogPath(cover_path)