Spaces:

YetNak
/

RVC-UI

Runtime error

App Files Files Community

RVC-UI / tools /cmd /infer_cli.py

Blane187

Update tools/cmd/infer_cli.py

3c2bcd5 verified 11 months ago

raw

history blame contribute delete

2.99 kB

	import argparse
	import os
	import sys
	from pydub import AudioSegment
	import matplotlib.pyplot as plt
	import numpy as np


	now_dir = os.getcwd()
	sys.path.append(now_dir)
	from dotenv import load_dotenv
	from scipy.io import wavfile

	from configs import Config
	from infer.modules.vc import VC

	####
	# USAGE
	#
	# In your Terminal or CMD or whatever


	def arg_parse() -> tuple:
	parser = argparse.ArgumentParser()
	parser.add_argument("--f0up_key", type=int, default=0)
	parser.add_argument("--input_path", type=str, help="input path")
	parser.add_argument("--index_path", type=str, help="index path")
	parser.add_argument("--f0method", type=str, default="harvest", help="harvest or pm")
	parser.add_argument("--opt_path", type=str, help="opt path")
	parser.add_argument("--model_name", type=str, help="store in assets/weight_root")
	parser.add_argument("--index_rate", type=float, default=0.66, help="index rate")
	parser.add_argument("--device", type=str, help="device")
	parser.add_argument("--is_half", type=bool, help="use half -> True")
	parser.add_argument("--filter_radius", type=int, default=3, help="filter radius")
	parser.add_argument("--resample_sr", type=int, default=0, help="resample sr")
	parser.add_argument("--rms_mix_rate", type=float, default=1, help="rms mix rate")
	parser.add_argument("--protect", type=float, default=0.33, help="protect")

	args = parser.parse_args()
	sys.argv = sys.argv[:1]

	return args


	def main():
	load_dotenv()
	args = arg_parse()
	config = Config()
	config.device = args.device if args.device else config.device
	config.is_half = args.is_half if args.is_half else config.is_half
	vc = VC(config)
	vc.get_vc(args.model_name)
	_, wav_opt = vc.vc_single(
	0,
	args.input_path,
	args.f0up_key,
	None,
	args.f0method,
	args.index_path,
	None,
	args.index_rate,
	args.filter_radius,
	args.resample_sr,
	args.rms_mix_rate,
	args.protect,
	)
	wavfile.write(args.opt_path, wav_opt[0], wav_opt[1])



	# Load the audio file
	audio = AudioSegment.from_file(wav_opt) # Replace with your audio file path

	# Display basic information about the audio file
	print(f"Channels: {audio.channels}")
	print(f"Sample Width: {audio.sample_width} bytes")
	print(f"Frame Rate (Sample Rate): {audio.frame_rate} Hz")
	print(f"Frame Width: {audio.frame_width} bytes")
	print(f"Length: {len(audio)} ms")

	# Convert the audio data to a numpy array for visualization
	samples = np.array(audio.get_array_of_samples())

	# If the audio has more than one channel, split the samples into multiple arrays
	if audio.channels == 2:
	samples = samples.reshape((-1, 2))

	# Plot the waveform
	plt.figure(figsize=(15, 5))
	plt.plot(samples[:1000]) # Plotting first 1000 samples for clarity
	plt.title("Waveform of the Audio File")
	plt.xlabel("Sample")
	plt.ylabel("Amplitude")
	plt.show()


	if __name__ == "__main__":
	main()