import argparse import os import sys from pydub import AudioSegment import matplotlib.pyplot as plt import numpy as np now_dir = os.getcwd() sys.path.append(now_dir) from dotenv import load_dotenv from scipy.io import wavfile from configs import Config from infer.modules.vc import VC #### # USAGE # # In your Terminal or CMD or whatever def arg_parse() -> tuple: parser = argparse.ArgumentParser() parser.add_argument("--f0up_key", type=int, default=0) parser.add_argument("--input_path", type=str, help="input path") parser.add_argument("--index_path", type=str, help="index path") parser.add_argument("--f0method", type=str, default="harvest", help="harvest or pm") parser.add_argument("--opt_path", type=str, help="opt path") parser.add_argument("--model_name", type=str, help="store in assets/weight_root") parser.add_argument("--index_rate", type=float, default=0.66, help="index rate") parser.add_argument("--device", type=str, help="device") parser.add_argument("--is_half", type=bool, help="use half -> True") parser.add_argument("--filter_radius", type=int, default=3, help="filter radius") parser.add_argument("--resample_sr", type=int, default=0, help="resample sr") parser.add_argument("--rms_mix_rate", type=float, default=1, help="rms mix rate") parser.add_argument("--protect", type=float, default=0.33, help="protect") args = parser.parse_args() sys.argv = sys.argv[:1] return args def main(): load_dotenv() args = arg_parse() config = Config() config.device = args.device if args.device else config.device config.is_half = args.is_half if args.is_half else config.is_half vc = VC(config) vc.get_vc(args.model_name) _, wav_opt = vc.vc_single( 0, args.input_path, args.f0up_key, None, args.f0method, args.index_path, None, args.index_rate, args.filter_radius, args.resample_sr, args.rms_mix_rate, args.protect, ) wavfile.write(args.opt_path, wav_opt[0], wav_opt[1]) # Load the audio file audio = AudioSegment.from_file(wav_opt) # Replace with your audio file path # Display basic information about the audio file print(f"Channels: {audio.channels}") print(f"Sample Width: {audio.sample_width} bytes") print(f"Frame Rate (Sample Rate): {audio.frame_rate} Hz") print(f"Frame Width: {audio.frame_width} bytes") print(f"Length: {len(audio)} ms") # Convert the audio data to a numpy array for visualization samples = np.array(audio.get_array_of_samples()) # If the audio has more than one channel, split the samples into multiple arrays if audio.channels == 2: samples = samples.reshape((-1, 2)) # Plot the waveform plt.figure(figsize=(15, 5)) plt.plot(samples[:1000]) # Plotting first 1000 samples for clarity plt.title("Waveform of the Audio File") plt.xlabel("Sample") plt.ylabel("Amplitude") plt.show() if __name__ == "__main__": main()