Spaces:

YetNak
/

RVC-UI

File size: 2,985 Bytes

import argparse
import os
import sys
from pydub import AudioSegment
import matplotlib.pyplot as plt
import numpy as np


now_dir = os.getcwd()
sys.path.append(now_dir)
from dotenv import load_dotenv
from scipy.io import wavfile

from configs import Config
from infer.modules.vc import VC

####
# USAGE
#
# In your Terminal or CMD or whatever


def arg_parse() -> tuple:
    parser = argparse.ArgumentParser()
    parser.add_argument("--f0up_key", type=int, default=0)
    parser.add_argument("--input_path", type=str, help="input path")
    parser.add_argument("--index_path", type=str, help="index path")
    parser.add_argument("--f0method", type=str, default="harvest", help="harvest or pm")
    parser.add_argument("--opt_path", type=str, help="opt path")
    parser.add_argument("--model_name", type=str, help="store in assets/weight_root")
    parser.add_argument("--index_rate", type=float, default=0.66, help="index rate")
    parser.add_argument("--device", type=str, help="device")
    parser.add_argument("--is_half", type=bool, help="use half -> True")
    parser.add_argument("--filter_radius", type=int, default=3, help="filter radius")
    parser.add_argument("--resample_sr", type=int, default=0, help="resample sr")
    parser.add_argument("--rms_mix_rate", type=float, default=1, help="rms mix rate")
    parser.add_argument("--protect", type=float, default=0.33, help="protect")

    args = parser.parse_args()
    sys.argv = sys.argv[:1]

    return args


def main():
    load_dotenv()
    args = arg_parse()
    config = Config()
    config.device = args.device if args.device else config.device
    config.is_half = args.is_half if args.is_half else config.is_half
    vc = VC(config)
    vc.get_vc(args.model_name)
    _, wav_opt = vc.vc_single(
        0,
        args.input_path,
        args.f0up_key,
        None,
        args.f0method,
        args.index_path,
        None,
        args.index_rate,
        args.filter_radius,
        args.resample_sr,
        args.rms_mix_rate,
        args.protect,
    )
    wavfile.write(args.opt_path, wav_opt[0], wav_opt[1])



# Load the audio file
audio = AudioSegment.from_file(wav_opt)  # Replace with your audio file path

# Display basic information about the audio file
print(f"Channels: {audio.channels}")
print(f"Sample Width: {audio.sample_width} bytes")
print(f"Frame Rate (Sample Rate): {audio.frame_rate} Hz")
print(f"Frame Width: {audio.frame_width} bytes")
print(f"Length: {len(audio)} ms")

# Convert the audio data to a numpy array for visualization
samples = np.array(audio.get_array_of_samples())

# If the audio has more than one channel, split the samples into multiple arrays
if audio.channels == 2:
    samples = samples.reshape((-1, 2))

# Plot the waveform
plt.figure(figsize=(15, 5))
plt.plot(samples[:1000])  # Plotting first 1000 samples for clarity
plt.title("Waveform of the Audio File")
plt.xlabel("Sample")
plt.ylabel("Amplitude")
plt.show()


if __name__ == "__main__":
    main()