Spaces:
Runtime error
Runtime error
import os | |
import sys | |
from dotenv import load_dotenv | |
import requests | |
import wave | |
import zipfile | |
now_dir = os.getcwd() | |
sys.path.append(now_dir) | |
load_dotenv() | |
from infer.modules.vc.modules import VC | |
from infer.modules.uvr5.modules import UVRHANDLER | |
from infer.lib.train.process_ckpt import ( | |
change_info, | |
extract_small_model, | |
merge, | |
show_info, | |
) | |
from i18n.i18n import I18nAuto | |
from configs.config import Config | |
from sklearn.cluster import MiniBatchKMeans | |
import torch | |
import numpy as np | |
import gradio as gr | |
import faiss | |
import fairseq | |
import librosa | |
import librosa.display | |
import pathlib | |
import json | |
from pydub import AudioSegment | |
from time import sleep | |
from subprocess import Popen | |
from random import shuffle | |
import warnings | |
import traceback | |
import threading | |
import shutil | |
import logging | |
import matplotlib.pyplot as plt | |
import soundfile as sf | |
from dotenv import load_dotenv | |
from tools import pretrain_helper | |
import edge_tts, asyncio | |
from infer.modules.vc.ilariatts import tts_order_voice | |
language_dict = tts_order_voice | |
ilariavoices = list(language_dict.keys()) | |
now_dir = os.getcwd() | |
sys.path.append(now_dir) | |
load_dotenv() | |
logging.getLogger("numba").setLevel(logging.WARNING) | |
logger = logging.getLogger(__name__) | |
tmp = os.path.join(now_dir, "TEMP") | |
shutil.rmtree(tmp, ignore_errors=True) | |
shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % now_dir, ignore_errors=True) | |
os.makedirs(tmp, exist_ok=True) | |
os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) | |
os.makedirs(os.path.join(now_dir, "models/pth"), exist_ok=True) | |
os.environ["TEMP"] = tmp | |
warnings.filterwarnings("ignore") | |
torch.manual_seed(114514) | |
config = Config() | |
vc = VC(config) | |
weight_root = os.getenv("weight_root") | |
weight_uvr5_root = os.getenv("weight_uvr5_root") | |
index_root = os.getenv("index_root") | |
names = [] | |
for name in os.listdir(weight_root): | |
if name.endswith(".pth"): | |
names.append(name) | |
index_paths = [] | |
for root, dirs, files in os.walk(index_root, topdown=False): | |
for name in files: | |
if name.endswith(".index") and "trained" not in name: | |
index_paths.append("%s/%s" % (root, name)) | |
uvr5_names = [ | |
'5_HP-Karaoke-UVR.pth', | |
'Kim_Vocal_2.onnx', | |
'MDX23C-8KFFT-InstVoc_HQ_2.ckpt', | |
'UVR-DeEcho-DeReverb.pth', | |
'UVR-Denoise', | |
] | |
if config.dml: | |
def forward_dml(ctx, x, scale): | |
ctx.scale = scale | |
res = x.clone().detach() | |
return res | |
fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml | |
i18n = I18nAuto() | |
logger.info(i18n) | |
ngpu = torch.cuda.device_count() | |
gpu_infos = [] | |
mem = [] | |
if_gpu_ok = False | |
if torch.cuda.is_available() or ngpu != 0: | |
for i in range(ngpu): | |
gpu_name = torch.cuda.get_device_name(i) | |
if any( | |
value in gpu_name.upper() | |
for value in [ | |
"10", | |
"16", | |
"20", | |
"30", | |
"40", | |
"A2", | |
"A3", | |
"A4", | |
"P4", | |
"A50", | |
"500", | |
"A60", | |
"70", | |
"80", | |
"90", | |
"M4", | |
"T4", | |
"TITAN", | |
] | |
): | |
if_gpu_ok = True | |
gpu_infos.append("%s\t%s" % (i, gpu_name)) | |
mem.append( | |
int( | |
torch.cuda.get_device_properties(i).total_memory | |
/ 1024 | |
/ 1024 | |
/ 1024 | |
+ 0.4 | |
) | |
) | |
if if_gpu_ok and len(gpu_infos) > 0: | |
gpu_info = "\n".join(gpu_infos) | |
default_batch_size = ((min(mem) // 2 + 1) // 2) * 2 | |
else: | |
gpu_info = i18n("Your GPU doesn't work for training") | |
default_batch_size = 1 | |
gpus = "-".join([i[0] for i in gpu_infos]) | |
class ToolButton(gr.Button, gr.components.FormComponent): | |
def __init__(self, **kwargs): | |
super().__init__(variant="tool", **kwargs) | |
def get_block_name(self): | |
return "button" | |
weight_root = os.getenv("weight_root") | |
index_root = os.getenv("index_root") | |
audio_root = "audios" | |
sup_audioext = {'wav', 'mp3', 'flac', 'ogg', 'opus', | |
'm4a', 'mp4', 'aac', 'alac', 'wma', | |
'aiff', 'webm', 'ac3'} | |
names = [os.path.join(root, file) | |
for root, _, files in os.walk(weight_root) | |
for file in files | |
if file.endswith((".pth", ".onnx"))] | |
indexes_list = [os.path.join(root, name) | |
for root, _, files in os.walk(index_root, topdown=False) | |
for name in files | |
if name.endswith(".index") and "trained" not in name] | |
audio_paths = [os.path.join(root, name) | |
for root, _, files in os.walk(audio_root, topdown=False) | |
for name in files | |
if name.endswith(tuple(sup_audioext))] | |
def get_pretrained_files(directory, keyword, filter_str): | |
file_paths = {} | |
for filename in os.listdir(directory): | |
if filename.endswith(".pth") and keyword in filename and filter_str in filename: | |
file_paths[filename] = os.path.join(directory, filename) | |
return file_paths | |
pretrained_directory = "assets/pretrained_v2" | |
pretrained_path = {filename: os.path.join(pretrained_directory, filename) for filename in os.listdir(pretrained_directory)} | |
pretrained_G_files = get_pretrained_files(pretrained_directory, "G", "f0") | |
pretrained_D_files = get_pretrained_files(pretrained_directory, "D", "f0") | |
def get_pretrained_models(path_str, f0_str, sr2): | |
sr_mapping = pretrain_helper.get_pretrained_models(f0_str) | |
pretrained_G_filename = sr_mapping.get(sr2, "") | |
pretrained_D_filename = pretrained_G_filename.replace("G", "D") | |
if not pretrained_G_filename or not pretrained_D_filename: | |
logging.warning(f"Pretrained models not found for sample rate {sr2}, will not use pretrained models") | |
return os.path.join(pretrained_directory, pretrained_G_filename), os.path.join(pretrained_directory, pretrained_D_filename) | |
names = [] | |
for name in os.listdir(weight_root): | |
if name.endswith(".pth"): | |
names.append(name) | |
index_paths = [] | |
for root, dirs, files in os.walk(index_root, topdown=False): | |
for name in files: | |
if name.endswith(".index") and "trained" not in name: | |
index_paths.append("%s/%s" % (root, name)) | |
def generate_spectrogram_and_get_info(audio_file): | |
y, sr = librosa.load(audio_file, sr=None) | |
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=256) | |
log_S = librosa.amplitude_to_db(S, ref=np.max, top_db=256) | |
plt.figure(figsize=(12, 5.5)) | |
librosa.display.specshow(log_S, sr=sr, x_axis='time') | |
plt.colorbar(format='%+2.0f dB', pad=0.01) | |
plt.tight_layout(pad=0.5) | |
plt.savefig('spectrogram.png', dpi=500) | |
audio_info = sf.info(audio_file) | |
bit_depth = {'PCM_16': 16, 'FLOAT': 32}.get(audio_info.subtype, 0) | |
minutes, seconds = divmod(audio_info.duration, 60) | |
seconds, milliseconds = divmod(seconds, 1) | |
milliseconds *= 1000 | |
speed_in_kbps = audio_info.samplerate * bit_depth / 1000 | |
filename_without_extension, _ = os.path.splitext(os.path.basename(audio_file)) | |
info_table = f""" | |
| Information | Value | | |
| :---: | :---: | | |
| File Name | {filename_without_extension} | | |
| Duration | {int(minutes)} minutes - {int(seconds)} seconds - {int(milliseconds)} milliseconds | | |
| Bitrate | {speed_in_kbps} kbp/s | | |
| Audio Channels | {audio_info.channels} | | |
| Samples per second | {audio_info.samplerate} Hz | | |
| Bit per second | {audio_info.samplerate * audio_info.channels * bit_depth} bit/s | | |
""" | |
return info_table, "spectrogram.png" | |
def change_choices(): | |
names = [] | |
for name in os.listdir(weight_root): | |
if name.endswith(".pth"): | |
names.append(name) | |
index_paths = [] | |
for root, dirs, files in os.walk(index_root, topdown=False): | |
for name in files: | |
if name.endswith(".index") and "trained" not in name: | |
index_paths.append("%s/%s" % (root, name)) | |
audios = [os.path.join(audio_root, file) for file in os.listdir(os.path.join(now_dir, "audios"))] | |
return {"choices": sorted(names), "__type__": "update"}, {"choices": sorted(index_paths),"__type__": "update"},{ | |
"choices": sorted(audios), "__type__": "update" | |
} | |
# Define the tts_and_convert function | |
def tts_and_convert(ttsvoice, text, spk_item, vc_transform, f0_file, f0method, file_index1, file_index2, index_rate, filter_radius, resample_sr, rms_mix_rate, protect): | |
# Perform TTS (we only need 1 function) | |
vo=language_dict[ttsvoice] | |
asyncio.run(edge_tts.Communicate(text, vo).save("./TEMP/temp_ilariatts.mp3")) | |
aud_path = './TEMP/temp_ilariatts.mp3' | |
# Update output Textbox | |
vc_output1.update("Text converted successfully!") | |
#Calls vc similar to any other inference. | |
#This is why we needed all the other shit in our call, otherwise we couldn't infer. | |
return vc.vc_single(spk_item , None,aud_path, vc_transform, f0_file, f0method, file_index1, file_index2, index_rate, filter_radius, resample_sr, rms_mix_rate, protect) | |
def import_files(file): | |
if file is not None: | |
file_name = file.name | |
if file_name.endswith('.zip'): | |
with zipfile.ZipFile(file.name, 'r') as zip_ref: | |
# Create a temporary directory to extract files | |
temp_dir = './TEMP' | |
zip_ref.extractall(temp_dir) | |
# Move .pth and .index files to their respective directories | |
for root, dirs, files in os.walk(temp_dir): | |
for file in files: | |
if file.endswith('.pth'): | |
destination = './models/pth/' + file | |
if not os.path.exists(destination): | |
shutil.move(os.path.join(root, file), destination) | |
else: | |
print(f"File {destination} already exists. Skipping.") | |
elif file.endswith('.index'): | |
destination = './models/index/' + file | |
if not os.path.exists(destination): | |
shutil.move(os.path.join(root, file), destination) | |
else: | |
print(f"File {destination} already exists. Skipping.") | |
# Remove the temporary directory | |
shutil.rmtree(temp_dir) | |
return "Zip file has been successfully extracted." | |
elif file_name.endswith('.pth'): | |
destination = './models/pth/' + os.path.basename(file.name) | |
if not os.path.exists(destination): | |
os.rename(file.name, destination) | |
else: | |
print(f"File {destination} already exists. Skipping.") | |
return "PTH file has been successfully imported." | |
elif file_name.endswith('.index'): | |
destination = './models/index/' + os.path.basename(file.name) | |
if not os.path.exists(destination): | |
os.rename(file.name, destination) | |
else: | |
print(f"File {destination} already exists. Skipping.") | |
return "Index file has been successfully imported." | |
else: | |
return "Unsupported file type." | |
else: | |
return "No file has been uploaded." | |
def import_button_click(file): | |
return import_files(file) | |
def get_audio_duration(audio_file_path): | |
audio_info = sf.info(audio_file_path) | |
duration_minutes = audio_info.duration / 60 | |
return duration_minutes | |
def clean(): | |
return {"value": "", "__type__": "update"} | |
sr_dict = { | |
"32k": 32000, "40k": 40000, "48k": 48000, "OV2-32k": 32000, "OV2-40k": 40000, "RIN-40k": 40000, "Snowie-40k": 40000, "Snowie-48k": 48000, "SnowieV3.1-40k": 40000, "SnowieV3.1-32k": 32000, "SnowieV3.1-48k": 48000, "SnowieV3.1-RinE3-40K": 40000, | |
} | |
def durations(sample_rate, model_options, qualities, duration): | |
if duration <= 350: | |
return qualities['short'] | |
else: | |
if sample_rate == 32000: | |
return model_options['32k'] | |
elif sample_rate == 40000: | |
return model_options['40k'] | |
elif sample_rate == 48000: | |
return model_options['48k'] | |
else: | |
return qualities['other'] | |
def get_training_info(audio_file): | |
if audio_file is None: | |
return 'Please provide an audio file!' | |
duration = get_audio_duration(audio_file) | |
sample_rate = wave.open(audio_file, 'rb').getframerate() | |
training_info = { | |
(0, 2): (150, 'OV2'), | |
(2, 3): (200, 'OV2'), | |
(3, 5): (250, 'OV2'), | |
(5, 10): (300, 'Normal'), | |
(10, 25): (500, 'Normal'), | |
(25, 45): (700, 'Normal'), | |
(45, 60): (1000, 'Normal') | |
} | |
for (min_duration, max_duration), (epochs, pretrain) in training_info.items(): | |
if min_duration <= duration < max_duration: | |
break | |
else: | |
return 'Duration is not within the specified range!' | |
return f'You should use the **{pretrain}** pretrain with **{epochs}** epochs at **{sample_rate/1000}khz** sample rate.' | |
def if_done(done, p): | |
while 1: | |
if p.poll() is None: | |
sleep(0.5) | |
else: | |
break | |
done[0] = True | |
def on_button_click(audio_file_path): | |
return get_training_info(audio_file_path) | |
def download_from_url(url, model): | |
if url == '': | |
return "URL cannot be left empty." | |
if model == '': | |
return "You need to name your model. For example: Ilaria" | |
url = url.strip() | |
zip_dirs = ["zips", "unzips"] | |
for directory in zip_dirs: | |
if os.path.exists(directory): | |
shutil.rmtree(directory) | |
os.makedirs("zips", exist_ok=True) | |
os.makedirs("unzips", exist_ok=True) | |
zipfile = model + '.zip' | |
zipfile_path = './zips/' + zipfile | |
try: | |
if "drive.google.com" in url: | |
subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path]) | |
elif "mega.nz" in url: | |
m = Mega() | |
m.download_url(url, './zips') | |
else: | |
response = requests.get(url) | |
response.raise_for_status() # Raise an exception for HTTP errors | |
with open(zipfile_path, 'wb') as file: | |
file.write(response.content) | |
shutil.unpack_archive(zipfile_path, "./unzips", 'zip') | |
for root, dirs, files in os.walk('./unzips'): | |
for file in files: | |
file_path = os.path.join(root, file) | |
if file.endswith(".index"): | |
os.makedirs(f'./models/index', exist_ok=True) | |
shutil.copy2(file_path, f'./models/index/{model}.index') | |
elif "G_" not in file and "D_" not in file and file.endswith(".pth"): | |
os.makedirs(f'./models/pth', exist_ok=True) | |
shutil.copy(file_path, f'./models/pth/{model}.pth') | |
shutil.rmtree("zips") | |
shutil.rmtree("unzips") | |
return "Model downloaded, you can go back to the inference page!" | |
except subprocess.CalledProcessError as e: | |
return f"ERROR - Download failed (gdown): {str(e)}" | |
except requests.exceptions.RequestException as e: | |
return f"ERROR - Download failed (requests): {str(e)}" | |
except Exception as e: | |
return f"ERROR - The test failed: {str(e)}" | |
def transfer_files(filething, dataset_dir='dataset/'): | |
file_names = [f.name for f in filething] | |
for f in file_names: | |
filename = os.path.basename(f) | |
destination = os.path.join(dataset_dir, filename) | |
shutil.copyfile(f, destination) | |
return "Transferred files to dataset directory!" | |
def if_done_multi(done, ps): | |
while 1: | |
flag = 1 | |
for p in ps: | |
if p.poll() is None: | |
flag = 0 | |
sleep(0.5) | |
break | |
if flag == 1: | |
break | |
done[0] = True | |
def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): | |
sr = sr_dict[sr] | |
os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) | |
f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w") | |
f.close() | |
per = 3.0 if config.is_half else 3.7 | |
cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % ( | |
config.python_cmd, | |
trainset_dir, | |
sr, | |
n_p, | |
now_dir, | |
exp_dir, | |
config.noparallel, | |
per, | |
) | |
logger.info(cmd) | |
p = Popen(cmd, shell=True) | |
done = [False] | |
threading.Thread( | |
target=if_done, | |
args=( | |
done, | |
p, | |
), | |
).start() | |
while 1: | |
with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: | |
yield f.read() | |
sleep(1) | |
if done[0]: | |
break | |
with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: | |
log = f.read() | |
logger.info(log) | |
yield log | |
def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvpe): | |
gpus = gpus.split("-") | |
os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) | |
f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") | |
f.close() | |
if if_f0: | |
if f0method != "rmvpe_gpu": | |
cmd = ( | |
'"%s" infer/modules/train/extract/extract_f0_print.py "%s/logs/%s" %s %s' | |
% ( | |
config.python_cmd, | |
now_dir, | |
exp_dir, | |
n_p, | |
f0method, | |
) | |
) | |
logger.info(cmd) | |
p = Popen( | |
cmd, shell=True, cwd=now_dir | |
) | |
done = [False] | |
threading.Thread( | |
target=if_done, | |
args=( | |
done, | |
p, | |
), | |
).start() | |
else: | |
if gpus_rmvpe != "-": | |
gpus_rmvpe = gpus_rmvpe.split("-") | |
leng = len(gpus_rmvpe) | |
ps = [] | |
for idx, n_g in enumerate(gpus_rmvpe): | |
cmd = ( | |
'"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s ' | |
% ( | |
config.python_cmd, | |
leng, | |
idx, | |
n_g, | |
now_dir, | |
exp_dir, | |
config.is_half, | |
) | |
) | |
logger.info(cmd) | |
p = Popen( | |
cmd, shell=True, cwd=now_dir | |
) | |
ps.append(p) | |
done = [False] | |
threading.Thread( | |
target=if_done_multi, # | |
args=( | |
done, | |
ps, | |
), | |
).start() | |
else: | |
cmd = ( | |
config.python_cmd | |
+ ' infer/modules/train/extract/extract_f0_rmvpe_dml.py "%s/logs/%s" ' | |
% ( | |
now_dir, | |
exp_dir, | |
) | |
) | |
logger.info(cmd) | |
p = Popen( | |
cmd, shell=True, cwd=now_dir | |
) | |
p.wait() | |
done = [True] | |
while 1: | |
with open( | |
"%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" | |
) as f: | |
yield f.read() | |
sleep(1) | |
if done[0]: | |
break | |
with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: | |
log = f.read() | |
logger.info(log) | |
yield log | |
leng = len(gpus) | |
ps = [] | |
for idx, n_g in enumerate(gpus): | |
cmd = ( | |
'"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s' | |
% ( | |
config.python_cmd, | |
config.device, | |
leng, | |
idx, | |
n_g, | |
now_dir, | |
exp_dir, | |
version19, | |
) | |
) | |
logger.info(cmd) | |
p = Popen( | |
cmd, shell=True, cwd=now_dir | |
) | |
ps.append(p) | |
done = [False] | |
threading.Thread( | |
target=if_done_multi, | |
args=( | |
done, | |
ps, | |
), | |
).start() | |
while 1: | |
with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: | |
yield f.read() | |
sleep(1) | |
if done[0]: | |
break | |
with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: | |
log = f.read() | |
logger.info(log) | |
yield log | |
def change_sr2(sr2, if_f0_3, version19): | |
path_str = "" if version19 == "v1" else "_v2" | |
f0_str = "f0" if if_f0_3 else "" | |
return get_pretrained_models(path_str, f0_str, sr2) | |
def change_version19(sr2, if_f0_3, version19): | |
path_str = "" if version19 == "v1" else "_v2" | |
if sr2 == "32k" and version19 == "v1": | |
sr2 = "40k" | |
to_return_sr2 = ( | |
{"choices": ["32k","40k", "48k"], "__type__": "update", "value": sr2} | |
if version19 == "v1" | |
else {"choices": ["32k", "40k", "48k", "OV2-32k", "OV2-40k", "RIN-40k","Snowie-40k","Snowie-48k"], "__type__": "update", "value": sr2} | |
) | |
f0_str = "f0" if if_f0_3 else "" | |
return ( | |
*get_pretrained_models(path_str, f0_str, sr2), | |
to_return_sr2, | |
) | |
def change_f0(if_f0_3, sr2, version19): | |
path_str = "" if version19 == "v1" else "_v2" | |
return ( | |
{"visible": if_f0_3, "__type__": "update"}, | |
{"visible": if_f0_3, "__type__": "update"}, | |
*get_pretrained_models(path_str, "f0" if if_f0_3 is True else "", sr2), | |
) | |
def click_train( | |
exp_dir1, | |
sr2, | |
if_f0_3, | |
spk_id5, | |
save_epoch10, | |
total_epoch11, | |
batch_size12, | |
if_save_latest13, | |
pretrained_G14, | |
pretrained_D15, | |
gpus16, | |
if_cache_gpu17, | |
if_save_every_weights18, | |
version19, | |
): | |
global f0_dir, f0nsf_dir | |
exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) | |
os.makedirs(exp_dir, exist_ok=True) | |
gt_wavs_dir = "%s/0_gt_wavs" % exp_dir | |
feature_dir = ( | |
"%s/3_feature256" % exp_dir | |
if version19 == "v1" | |
else "%s/3_feature768" % exp_dir | |
) | |
if if_f0_3: | |
f0_dir = "%s/2a_f0" % exp_dir | |
f0nsf_dir = "%s/2b-f0nsf" % exp_dir | |
names = ( | |
set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) | |
& set([name.split(".")[0] for name in os.listdir(feature_dir)]) | |
& set([name.split(".")[0] for name in os.listdir(f0_dir)]) | |
& set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) | |
) | |
else: | |
names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( | |
[name.split(".")[0] for name in os.listdir(feature_dir)] | |
) | |
opt = [] | |
for name in names: | |
if if_f0_3: | |
opt.append( | |
"%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" | |
% ( | |
gt_wavs_dir.replace("\\", "\\\\"), | |
name, | |
feature_dir.replace("\\", "\\\\"), | |
name, | |
f0_dir.replace("\\", "\\\\"), | |
name, | |
f0nsf_dir.replace("\\", "\\\\"), | |
name, | |
spk_id5, | |
) | |
) | |
else: | |
opt.append( | |
"%s/%s.wav|%s/%s.npy|%s" | |
% ( | |
gt_wavs_dir.replace("\\", "\\\\"), | |
name, | |
feature_dir.replace("\\", "\\\\"), | |
name, | |
spk_id5, | |
) | |
) | |
fea_dim = 256 if version19 == "v1" else 768 | |
if if_f0_3: | |
for _ in range(2): | |
opt.append( | |
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy" | |
"|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" | |
% (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) | |
) | |
else: | |
for _ in range(2): | |
opt.append( | |
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" | |
% (now_dir, sr2, now_dir, fea_dim, spk_id5) | |
) | |
shuffle(opt) | |
with open("%s/filelist.txt" % exp_dir, "w") as f: | |
f.write("\n".join(opt)) | |
logger.debug("Write filelist done") | |
logger.info("Use gpus: %s", str(gpus16)) | |
if pretrained_G14 == "": | |
logger.info("No pretrained Generator") | |
if pretrained_D15 == "": | |
logger.info("No pretrained Discriminator") | |
if version19 == "v1" or sr2 == "40k": | |
config_path = "v1/%s.json" % sr2 | |
else: | |
config_path = "v2/%s.json" % sr2 | |
config_save_path = os.path.join(exp_dir, "config.json") | |
if not pathlib.Path(config_save_path).exists(): | |
with open(config_save_path, "w", encoding="utf-8") as f: | |
json.dump( | |
config.json_config[config_path], | |
f, | |
ensure_ascii=False, | |
indent=4, | |
sort_keys=True, | |
) | |
f.write("\n") | |
if gpus16: | |
cmd = ( | |
'"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s ' | |
"-sw %s -v %s" | |
% ( | |
config.python_cmd, | |
exp_dir1, | |
sr2, | |
1 if if_f0_3 else 0, | |
batch_size12, | |
gpus16, | |
total_epoch11, | |
save_epoch10, | |
"-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", | |
"-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", | |
1 if if_save_latest13 == i18n("是") else 0, | |
1 if if_cache_gpu17 == i18n("是") else 0, | |
1 if if_save_every_weights18 == i18n("是") else 0, | |
version19, | |
) | |
) | |
else: | |
cmd = ( | |
'"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw ' | |
"%s -v %s" | |
% ( | |
config.python_cmd, | |
exp_dir1, | |
sr2, | |
1 if if_f0_3 else 0, | |
batch_size12, | |
total_epoch11, | |
save_epoch10, | |
"-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", | |
"-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", | |
1 if if_save_latest13 == i18n("是") else 0, | |
1 if if_cache_gpu17 == i18n("是") else 0, | |
1 if if_save_every_weights18 == i18n("是") else 0, | |
version19, | |
) | |
) | |
logger.info(cmd) | |
p = Popen(cmd, shell=True, cwd=now_dir) | |
p.wait() | |
return "You can view console or train.log" | |
def train_index(exp_dir1, version19): | |
exp_dir = "logs/%s" % exp_dir1 | |
os.makedirs(exp_dir, exist_ok=True) | |
feature_dir = ( | |
"%s/3_feature256" % exp_dir | |
if version19 == "v1" | |
else "%s/3_feature768" % exp_dir | |
) | |
if not os.path.exists(feature_dir): | |
return "Please perform Feature Extraction First!" | |
listdir_res = list(os.listdir(feature_dir)) | |
if len(listdir_res) == 0: | |
return "Please perform Feature Extraction First!" | |
infos = [] | |
npys = [] | |
for name in sorted(listdir_res): | |
phone = np.load("%s/%s" % (feature_dir, name)) | |
npys.append(phone) | |
big_npy = np.concatenate(npys, 0) | |
big_npy_idx = np.arange(big_npy.shape[0]) | |
np.random.shuffle(big_npy_idx) | |
big_npy = big_npy[big_npy_idx] | |
if big_npy.shape[0] > 2e5: | |
infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]) | |
yield "\n".join(infos) | |
try: | |
big_npy = ( | |
MiniBatchKMeans( | |
n_clusters=10000, | |
verbose=True, | |
batch_size=256 * config.n_cpu, | |
compute_labels=False, | |
init="random", | |
) | |
.fit(big_npy) | |
.cluster_centers_ | |
) | |
except: | |
info = traceback.format_exc() | |
logger.info(info) | |
infos.append(info) | |
yield "\n".join(infos) | |
np.save("%s/total_fea.npy" % exp_dir, big_npy) | |
n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) | |
infos.append("%s,%s" % (big_npy.shape, n_ivf)) | |
yield "\n".join(infos) | |
index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) | |
infos.append("training") | |
yield "\n".join(infos) | |
index_ivf = faiss.extract_index_ivf(index) # | |
index_ivf.nprobe = 1 | |
index.train(big_npy) | |
faiss.write_index( | |
index, | |
"%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" | |
% (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), | |
) | |
infos.append("adding") | |
yield "\n".join(infos) | |
batch_size_add = 8192 | |
for i in range(0, big_npy.shape[0], batch_size_add): | |
index.add(big_npy[i: i + batch_size_add]) | |
faiss.write_index( | |
index, | |
"%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" | |
% (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), | |
) | |
infos.append( | |
"Success,added_IVF%s_Flat_nprobe_%s_%s_%s.index" | |
% (n_ivf, index_ivf.nprobe, exp_dir1, version19) | |
) | |
yield "\n".join(infos) | |
F0GPUVisible = config.dml is False | |
def change_f0_method(f0method8): | |
if f0method8 == "rmvpe_gpu": | |
visible = F0GPUVisible | |
else: | |
visible = False | |
return {"visible": visible, "__type__": "update"} | |
vc_output1 = gr.Textbox(label=i18n("Console")) | |
vc_output2 = gr.Audio(label=i18n("Audio output")) | |
with gr.Blocks(title="Ilaria RVC 💖") as app: | |
gr.Markdown("<h1> Ilaria RVC 💖 </h1>") | |
gr.Markdown(value=i18n("Made with 💖 by Ilaria | Support her on [Ko-Fi](https://ko-fi.com/ilariaowo)")) | |
gr.Markdown(i18n("For voice models and support join [AI Hub](https://discord.gg/AIHUB)")) | |
with gr.Tabs(): | |
with gr.TabItem(i18n("Inference")): | |
with gr.Row(): | |
sid0= gr.Dropdown(label=i18n("Voice"), choices=sorted(names)) | |
sid1= sid0 | |
with gr.Column(): | |
refresh_button = gr.Button(i18n("Refresh"), variant="primary") | |
clean_button = gr.Button(i18n("Unload Voice from VRAM"), variant="primary") | |
vc_transform0 = gr.inputs.Slider( | |
label=i18n( | |
"Pitch: -24 is lower (2 octaves) and 24 is higher (2 octaves)"), | |
minimum=-24, | |
maximum=24, | |
default=0, | |
step=1, | |
) | |
clean_button.click( | |
fn=clean, inputs=[], outputs=[sid0], api_name="infer_clean" | |
) | |
with gr.TabItem(i18n("Inference")): | |
with gr.Group(): | |
with gr.Row(): | |
with gr.Column(): | |
input_audio1 = gr.Audio( | |
label=i18n("Upload Audio file"), | |
type="filepath", | |
) | |
record_button = gr.Audio(source="microphone", label="Use your microphone", | |
type="filepath") | |
input_audio0 = gr.Dropdown( | |
label=i18n("Select a file from the audio folder"), | |
choices=sorted(audio_paths), | |
value='', | |
interactive=True, | |
) | |
record_button.change( | |
fn=lambda x: x, | |
inputs=[record_button], | |
outputs=[input_audio0], | |
) | |
file_index1 = gr.Textbox( | |
label=i18n("Path of index"), | |
placeholder=".\models\index", | |
interactive=True, | |
visible=False, | |
) | |
file_index2 = gr.Textbox( | |
label=i18n("Auto-detect index path"), | |
choices=sorted(index_paths), | |
interactive=True, | |
visible=False, | |
) | |
with gr.Column(): | |
with gr.Accordion('Advanced Settings', open=False, visible=False): | |
with gr.Column(): | |
f0method0 = gr.Radio( | |
label=i18n("Pitch Extraction, rmvpe is best"), | |
choices=["harvest", "crepe", "rmvpe"] | |
if config.dml is False | |
else ["harvest", "rmvpe"], | |
value="rmvpe", | |
interactive=True, | |
) | |
resample_sr0 = gr.Slider( | |
minimum=0, | |
maximum=48000, | |
label=i18n("Resampling, 0=none"), | |
value=0, | |
step=1, | |
interactive=True, | |
) | |
rms_mix_rate0 = gr.Slider( | |
minimum=0, | |
maximum=1, | |
label=i18n("0=Input source volume, 1=Normalized Output"), | |
value=0.25, | |
interactive=True, | |
) | |
protect0 = gr.Slider( | |
minimum=0, | |
maximum=0.5, | |
label=i18n( | |
"Protect clear consonants and breathing sounds, preventing electro-acoustic tearing and other artifacts, 0.5 does not open"), | |
value=0.33, | |
step=0.01, | |
interactive=True, | |
) | |
filter_radius0 = gr.Slider( | |
minimum=0, | |
maximum=7, | |
label=i18n(">=3 apply median filter to the harvested pitch results"), | |
value=3, | |
step=1, | |
interactive=True, | |
) | |
index_rate1 = gr.Slider( | |
minimum=0, | |
maximum=1, | |
label=i18n("Index Ratio"), | |
value=0.40, | |
interactive=True, | |
) | |
f0_file = gr.File( | |
label=i18n("F0 curve file [optional]"), | |
visible=False, | |
) | |
refresh_button.click( | |
fn=change_choices, | |
inputs=[], | |
outputs=[sid0, file_index2, input_audio1], | |
api_name="infer_refresh", | |
) | |
file_index1 = gr.Textbox( | |
label=i18n("Path of index"), | |
placeholder="%userprofile%\\Desktop\\models\\model_example.index", | |
interactive=True, | |
) | |
file_index2 = gr.Dropdown( | |
label=i18n("Auto-detect index path"), | |
choices=sorted(index_paths), | |
interactive=True, | |
) | |
spk_item = gr.Slider( | |
minimum=0, | |
maximum=2333, | |
step=1, | |
label=i18n("Speaker ID (Auto-Detected)"), | |
value=0, | |
visible=True, | |
interactive=False, | |
) | |
with gr.Accordion('IlariaTTS', open=True): | |
with gr.Column(): | |
ilariaid=gr.Dropdown(label="Voice:", choices=ilariavoices, interactive=True, value="English-Jenny (Female)") | |
ilariatext = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.") | |
ilariatts_button = gr.Button(value="Speak and Convert") | |
ilariatts_button.click(tts_and_convert, | |
[ilariaid, | |
ilariatext, | |
spk_item, | |
vc_transform0, | |
f0_file, | |
f0method0, | |
file_index1, | |
file_index2, | |
index_rate1, | |
filter_radius0, | |
resample_sr0, | |
rms_mix_rate0, | |
protect0] | |
, [vc_output1, vc_output2]) | |
#Otherwise everything break, to be optimized | |
with gr.Accordion('Advanced Settings', open=False, visible=True): | |
with gr.Column(): | |
f0method0 = gr.Radio( | |
label=i18n("Pitch Extraction, rmvpe is best"), | |
choices=["harvest", "crepe", "rmvpe"] | |
if config.dml is False | |
else ["harvest", "rmvpe"], | |
value="rmvpe", | |
interactive=True, | |
) | |
resample_sr0 = gr.Slider( | |
minimum=0, | |
maximum=48000, | |
label=i18n("Resampling, 0=none"), | |
value=0, | |
step=1, | |
interactive=True, | |
) | |
rms_mix_rate0 = gr.Slider( | |
minimum=0, | |
maximum=1, | |
label=i18n("0=Input source volume, 1=Normalized Output"), | |
value=0.25, | |
interactive=True, | |
) | |
protect0 = gr.Slider( | |
minimum=0, | |
maximum=0.5, | |
label=i18n( | |
"Protect clear consonants and breathing sounds, preventing electro-acoustic tearing and other artifacts, 0.5 does not open"), | |
value=0.33, | |
step=0.01, | |
interactive=True, | |
) | |
filter_radius0 = gr.Slider( | |
minimum=0, | |
maximum=7, | |
label=i18n(">=3 apply median filter to the harvested pitch results"), | |
value=3, | |
step=1, | |
interactive=True, | |
) | |
index_rate1 = gr.Slider( | |
minimum=0, | |
maximum=1, | |
label=i18n("Index Ratio"), | |
value=0.40, | |
interactive=True, | |
) | |
f0_file = gr.File( | |
label=i18n("F0 curve file [optional]"), | |
visible=False, | |
) | |
refresh_button.click( | |
fn=change_choices, | |
inputs=[], | |
outputs=[sid0, file_index2], | |
api_name="infer_refresh", | |
) | |
file_index1 = gr.Textbox( | |
label=i18n("Path of index"), | |
placeholder="%userprofile%\\Desktop\\models\\model_example.index", | |
interactive=True, | |
) | |
file_index2 = gr.Dropdown( | |
label=i18n("Auto-detect index path"), | |
choices=sorted(index_paths), | |
interactive=True, | |
) | |
with gr.Group(): | |
with gr.Column(): | |
but0 = gr.Button(i18n("Convert"), variant="primary") | |
with gr.Row(): | |
vc_output1.render() | |
vc_output2.render() | |
but0.click( | |
vc.vc_single, | |
[ | |
spk_item, | |
input_audio0, | |
input_audio1, | |
vc_transform0, | |
f0_file, | |
f0method0, | |
file_index1, | |
file_index2, | |
# file_big_npy1, | |
index_rate1, | |
filter_radius0, | |
resample_sr0, | |
rms_mix_rate0, | |
protect0, | |
], | |
[vc_output1, vc_output2], | |
api_name="infer_convert", | |
) | |
with gr.TabItem("Download Voice Models"): | |
gr.Markdown(i18n("For models found in [AI Hub](https://discord.gg/AIHUB)")) | |
with gr.Row(): | |
url = gr.Textbox(label="Huggingface Link:") | |
with gr.Row(): | |
model = gr.Textbox(label="Name of the model (without spaces):") | |
download_button = gr.Button("Download") | |
with gr.Row(): | |
status_bar = gr.Textbox(label="Download Status") | |
download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar]) | |
with gr.TabItem("Import Models"): | |
gr.Markdown(i18n("For models found on [Weights](https://weights.gg)")) | |
file_upload = gr.File(label="Upload a .zip file containing a .pth and .index file") | |
import_button = gr.Button("Import") | |
import_status = gr.Textbox(label="Import Status") | |
import_button.click(fn=import_button_click, inputs=file_upload, outputs=import_status) | |
with gr.TabItem(i18n("Batch Inference")): | |
gr.Markdown( | |
value=i18n("Batch Conversion") | |
) | |
with gr.Row(): | |
with gr.Column(): | |
vc_transform1 = gr.Number( | |
label=i18n("Pitch: 0 from man to man (or woman to woman); 12 from man to woman and -12 from woman to man."), | |
value=0 | |
) | |
opt_input = gr.Textbox(label=i18n("Output"), value="InferOutput") | |
file_index3 = gr.Textbox( | |
label=i18n("Path to index"), | |
value="", | |
interactive=True, | |
) | |
file_index4 = gr.Dropdown( | |
label=i18n("Auto-detect index path"), | |
choices=sorted(index_paths), | |
interactive=True, | |
) | |
f0method1 = gr.Radio( | |
label=i18n("Pitch Extraction, rmvpe is best"), | |
choices=["harvest", "crepe", "rmvpe"] | |
if config.dml is False | |
else ["harvest", "rmvpe"], | |
value="rmvpe", | |
interactive=True, | |
) | |
format1 = gr.Radio( | |
label=i18n("Export Format"), | |
choices=["flac", "wav", "mp3", "m4a"], | |
value="flac", | |
interactive=True, | |
) | |
refresh_button.click( | |
fn=lambda: change_choices()[1], | |
inputs=[], | |
outputs=file_index4, | |
api_name="infer_refresh_batch", | |
) | |
with gr.Column(): | |
resample_sr1 = gr.Slider( | |
minimum=0, | |
maximum=48000, | |
label=i18n("Resampling, 0=none"), | |
value=0, | |
step=1, | |
interactive=True, | |
) | |
rms_mix_rate1 = gr.Slider( | |
minimum=0, | |
maximum=1, | |
label=i18n("0=Input source volume, 1=Normalized Output"), | |
value=0.25, | |
interactive=True, | |
) | |
protect1 = gr.Slider( | |
minimum=0, | |
maximum=0.5, | |
label=i18n( | |
"Protect clear consonants and breathing sounds, preventing electro-acoustic tearing and other artifacts, 0.5 does not open"), | |
value=0.33, | |
step=0.01, | |
interactive=True, | |
) | |
filter_radius1 = gr.Slider( | |
minimum=0, | |
maximum=7, | |
label=i18n(">=3 apply median filter to the harvested pitch results"), | |
value=3, | |
step=1, | |
interactive=True, | |
) | |
index_rate2 = gr.Slider( | |
minimum=0, | |
maximum=1, | |
label=i18n("Index Ratio"), | |
value=0.40, | |
interactive=True, | |
) | |
with gr.Row(): | |
dir_input = gr.Textbox( | |
label=i18n("Enter the path to the audio folder to be processed"), | |
placeholder="%userprofile%\\Desktop\\covers", | |
) | |
inputs = gr.File( | |
file_count="multiple", label=i18n("Audio files can also be imported in batch") | |
) | |
with gr.Row(): | |
but1 = gr.Button(i18n("Convert"), variant="primary") | |
vc_output3 = gr.Textbox(label=i18n("Console")) | |
but1.click( | |
vc.vc_multi, | |
[ | |
spk_item, | |
dir_input, | |
opt_input, | |
inputs, | |
vc_transform1, | |
f0method1, | |
file_index3, | |
file_index4, | |
# file_big_npy2, | |
index_rate2, | |
filter_radius1, | |
resample_sr1, | |
rms_mix_rate1, | |
protect1, | |
format1, | |
], | |
[vc_output3], | |
api_name="infer_convert_batch", | |
) | |
with gr.TabItem(i18n("Train")): | |
gr.Markdown(value=i18n("")) | |
with gr.Row(): | |
exp_dir1 = gr.Textbox(label=i18n("Model Name"), value="test-model") | |
sr2 = gr.Dropdown( | |
label=i18n("Sample Rate & Pretrain"), | |
choices=["32k", "40k", "48k", "OV2-32k", "OV2-40k", "RIN-40k", "Snowie-40k", "Snowie-48k", "SnowieV3.1-40k","SnowieV3.1-32k","SnowieV3.1-48k","SnowieV3.1-RinE3-40K"], | |
value="32k", | |
interactive=True, | |
) | |
version19 = gr.Radio( | |
label=i18n("Version 2 only here"), | |
choices=["v2"], | |
value="v2", | |
interactive=False, | |
visible=False, | |
) | |
np7 = gr.Slider( | |
minimum=0, | |
maximum=config.n_cpu, | |
step=1, | |
label=i18n("CPU Threads"), | |
value=int(np.ceil(config.n_cpu / 2.5)), | |
interactive=True, | |
) | |
with gr.Group(): | |
gr.Markdown(value=i18n("")) | |
with gr.Row(): | |
trainset_dir4 = gr.Textbox( | |
label=i18n("Path to Dataset"), value="dataset" | |
) | |
with gr.Accordion('Upload Dataset (alternative)', open=False, visible=True): | |
file_thin = gr.Files(label='Dataset') # transfers files to the dataset dir, lol # much coding -ila | |
show = gr.Textbox(label='Status') | |
transfer_button = gr.Button('Upload Dataset to the folder', variant="primary") | |
transfer_button.click( | |
fn=transfer_files, | |
inputs=[file_thin], | |
outputs=show, | |
) | |
with gr.Group(): | |
gr.Markdown(value=i18n("")) | |
with gr.Row(): | |
save_epoch10 = gr.Slider( | |
minimum=1, | |
maximum=250, | |
step=1, | |
label=i18n("Save frequency"), | |
value=50, | |
interactive=True, | |
) | |
total_epoch11 = gr.Slider( | |
minimum=2, | |
maximum=10000, | |
step=1, | |
label=i18n("Total Epochs"), | |
value=300, | |
interactive=True, | |
) | |
batch_size12 = gr.Slider( | |
minimum=1, | |
maximum=16, | |
step=1, | |
label=i18n("Batch Size"), | |
value=default_batch_size, | |
interactive=True, | |
) | |
if_save_every_weights18 = gr.Radio( | |
label=i18n("Create model with save frequency"), | |
choices=[i18n("是"), i18n("否")], | |
value=i18n("是"), | |
interactive=True, | |
) | |
with gr.Accordion('Advanced Settings', open=False, visible=True): | |
with gr.Row(): | |
with gr.Group(): | |
spk_id5 = gr.Slider( | |
minimum=0, | |
maximum=4, | |
step=1, | |
label=i18n("Speaker ID"), | |
value=0, | |
interactive=True, | |
) | |
if_f0_3 = gr.Radio( | |
label=i18n("Pitch Guidance"), | |
choices=[True, False], | |
value=True, | |
interactive=True, | |
) | |
gpus6 = gr.Textbox( | |
label=i18n("GPU ID (Leave 0 if you have only one GPU, use 0-1 for multiple GPus)"), | |
value=gpus, | |
interactive=True, | |
visible=F0GPUVisible, | |
) | |
gpu_info9 = gr.Textbox( | |
label=i18n("GPU Model"), | |
value=gpu_info, | |
visible=F0GPUVisible, | |
) | |
gpus16 = gr.Textbox( | |
label=i18n("Enter cards to be used (Leave 0 if you have only one GPU, use 0-1 for multiple GPus)"), | |
value=gpus if gpus != "" else "0", | |
interactive=True, | |
) | |
with gr.Group(): | |
if_save_latest13 = gr.Radio( | |
label=i18n("Save last ckpt as final Model"), | |
choices=[i18n("是"), i18n("否")], | |
value=i18n("是"), | |
interactive=True, | |
) | |
if_cache_gpu17 = gr.Radio( | |
label=i18n("Cache data to GPU (Only for datasets under 8 minutes)"), | |
choices=[i18n("是"), i18n("否")], | |
value=i18n("否"), | |
interactive=True, | |
) | |
f0method8 = gr.Radio( | |
label=i18n("Feature Extraction Method"), | |
choices=["rmvpe", "rmvpe_gpu"], | |
value="rmvpe_gpu", | |
interactive=True, | |
) | |
gpus_rmvpe = gr.Textbox( | |
label=i18n( | |
"rmvpe_gpu will use your GPU instead of the CPU for the feature extraction" | |
), | |
value="%s-%s" % (gpus, gpus), | |
interactive=True, | |
visible=F0GPUVisible, | |
) | |
f0method8.change( | |
fn=change_f0_method, | |
inputs=[f0method8], | |
outputs=[gpus_rmvpe], | |
) | |
with gr.Row(): | |
pretrained_G14 = gr.Textbox( | |
label="Pretrained G", | |
choices=list(pretrained_G_files.values()), | |
value=pretrained_G_files.get('f0G32.pth', ''), | |
visible=False, | |
interactive=True, | |
) | |
pretrained_D15 = gr.Textbox( | |
label="Pretrained D", | |
choices=list(pretrained_D_files.values()), | |
value=pretrained_D_files.get('f0D32.pth', ''), | |
visible=False, | |
interactive=True, | |
) | |
sr2.change( | |
change_sr2, | |
[sr2, if_f0_3, version19], | |
[pretrained_G14, pretrained_D15], | |
) | |
version19.change( | |
change_version19, | |
[sr2, if_f0_3, version19], | |
[pretrained_G14, pretrained_D15, sr2], | |
) | |
if_f0_3.change( | |
change_f0, | |
[if_f0_3, sr2, version19], | |
[f0method8, gpus_rmvpe, pretrained_G14, pretrained_D15], | |
) | |
with gr.Group(): | |
def one_click_train(trainset_dir4, exp_dir1, sr2, gpus6, np7, f0method8, if_f0_3, version19, gpus_rmvpe): | |
preprocess_dataset(trainset_dir4, exp_dir1, sr2, np7) | |
extract_f0_feature(gpus6, np7, f0method8, if_f0_3, exp_dir1, version19, gpus_rmvpe) | |
train_index(exp_dir1, version19) | |
click_train(exp_dir1, sr2, if_f0_3, spk_id5, save_epoch10, total_epoch11, batch_size12, if_save_latest13, | |
pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17, if_save_every_weights18, version19) | |
with gr.Row(): | |
with gr.TabItem("One-Click Training"): | |
but5 = gr.Button('Train', variant="primary") | |
info = gr.Textbox(label=i18n("Output"), value="", max_lines=5, lines=5) | |
but5.click( | |
one_click_train, | |
[trainset_dir4, exp_dir1, sr2, gpus6, np7, f0method8, if_f0_3, version19, gpus_rmvpe] | |
) | |
with gr.TabItem("Manual Training"): | |
but1 = gr.Button(i18n("1. Process Data"), variant="primary") | |
but2 = gr.Button(i18n("2. Feature Extraction"), variant="primary") | |
but4 = gr.Button(i18n("3. Train Index"), variant="primary") | |
but3 = gr.Button(i18n("4. Train Model"), variant="primary") | |
info = gr.Textbox(label=i18n("Output"), value="", max_lines=5, lines=5) | |
but1.click( | |
preprocess_dataset, | |
[trainset_dir4, exp_dir1, sr2, np7], | |
[info], | |
api_name="train_preprocess", | |
) | |
but2.click( | |
extract_f0_feature, | |
[ | |
gpus6, | |
np7, | |
f0method8, | |
if_f0_3, | |
exp_dir1, | |
version19, | |
gpus_rmvpe, | |
], | |
[info], | |
api_name="train_extract_f0_feature", | |
) | |
but4.click(train_index, [exp_dir1, version19], info) | |
but3.click( | |
click_train, | |
[ | |
exp_dir1, | |
sr2, | |
if_f0_3, | |
spk_id5, | |
save_epoch10, | |
total_epoch11, | |
batch_size12, | |
if_save_latest13, | |
pretrained_G14, | |
pretrained_D15, | |
gpus16, | |
if_cache_gpu17, | |
if_save_every_weights18, | |
version19, | |
], | |
info, | |
api_name="train_start", | |
) | |
but4.click(train_index, [exp_dir1, version19], info) | |
with gr.TabItem(i18n("UVR5")): | |
with gr.Group(): | |
gr.Markdown( | |
value=i18n( | |
""" | |
- **Kim Vocal 2**: Effortlessly separates vocals and instrumentals, a perfect tool for music enthusiasts. | |
- **Karaoke 5 HP**: Expertly isolates two overlapping voices, making it a valuable asset for duet performances. | |
- **DeEcho DeReverb**: Skillfully eliminates reverb from vocal tracks, enhancing the clarity of your sound. | |
- **MDX23C InstVoc**: Excellent at removing sound effects or other annoying noises, ensuring a smooth listening experience. | |
- **DeNoise**: Exceptional at detecting and removing nearly imperceptible noises that can compromise the quality of a cover or a model. | |
""" | |
) | |
) | |
uvr_handler = UVRHANDLER() | |
with gr.Row(): | |
audios = gr.File() | |
output_dir = gr.Textbox('opt/', label='Output Directory') | |
model_name = gr.Dropdown(choices=uvr5_names, label='Models') | |
model_status = gr.Textbox(placeholder='Waiting...', interactive=False, label='Model Information') | |
with gr.Row(): | |
LOADMODELBUTTON = gr.Button('Load Model', variant="primary") | |
LOADMODELBUTTON.click( | |
fn=uvr_handler.loadmodel, | |
inputs=[model_name, output_dir], | |
outputs=[model_status] | |
) | |
CLEARMODELBUTTON = gr.Button('Unload Model', variant="primary") | |
CLEARMODELBUTTON.click( | |
fn=uvr_handler.deloadmodel, | |
outputs=[model_status] | |
) | |
with gr.Column(): | |
with gr.Row(): | |
inst = gr.Audio(show_download_button=True, interactive=False, label='Instrumental') | |
vocal = gr.Audio(show_download_button=True, interactive=False, label='Vocals') | |
UVRBUTTON = gr.Button('Extract', variant="primary") | |
UVRBUTTON.click( | |
fn=uvr_handler.uvr, | |
inputs=[audios], | |
outputs=[inst, vocal] | |
) | |
with gr.TabItem(i18n("Extra")): | |
with gr.Accordion('Model Info', open=False): | |
with gr.Column(): | |
sid1 = gr.Dropdown(label=i18n("Voice Model"), choices=sorted(names)) | |
refresh_button = gr.Button(i18n("Refresh"), variant="primary") | |
refresh_button.click( | |
fn=change_choices, | |
inputs=[], | |
outputs=[sid1, file_index2], | |
api_name="infer_refresh", | |
) | |
modelload_out = gr.Textbox(label="Model Metadata", interactive=False, lines=4) | |
get_model_info_button = gr.Button(i18n("Get Model Info")) | |
get_model_info_button.click( | |
fn=vc.get_vc, | |
inputs=[sid1, protect0, protect1], | |
outputs=[spk_item, protect0, protect1, file_index2, file_index4, modelload_out] | |
) | |
with gr.Accordion('Audio Analyser', open=False): | |
with gr.Column(): | |
audio_input = gr.Audio(type="filepath") | |
get_info_button = gr.Button( | |
value=i18n("Get information about the audio"), variant="primary" | |
) | |
with gr.Column(): | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown( | |
value=i18n("Information about the audio file"), | |
visible=True, | |
) | |
output_markdown = gr.Markdown( | |
value=i18n("Waiting for information..."), visible=True | |
) | |
image_output = gr.Image(type="filepath", interactive=False) | |
get_info_button.click( | |
fn=generate_spectrogram_and_get_info, | |
inputs=[audio_input], | |
outputs=[output_markdown, image_output], | |
) | |
with gr.Accordion('Training Helper', open=False): | |
with gr.Column(): | |
audio_input = gr.Audio(type="filepath", label="Upload your audio file") | |
gr.Text("Please note that these results are approximate and intended to provide a general idea for beginners.", label='Notice:') | |
training_info_output = gr.Markdown(label="Training Information:") | |
get_info_button = gr.Button("Get Training Info") | |
get_info_button.click( | |
fn=on_button_click, | |
inputs=[audio_input], | |
outputs=[training_info_output] | |
) | |
with gr.Accordion(i18n("Model Fusion"), open=False): | |
with gr.Group(): | |
gr.Markdown(value=i18n("Strongly suggested to use only very clean models.")) | |
with gr.Row(): | |
ckpt_a = gr.Textbox( | |
label=i18n("Path of the first .pth"), value="", interactive=True | |
) | |
ckpt_b = gr.Textbox( | |
label=i18n("Path of the second .pth"), value="", interactive=True | |
) | |
alpha_a = gr.Slider( | |
minimum=0, | |
maximum=1, | |
label=i18n("Weight of the first model over the second"), | |
value=0.5, | |
interactive=True, | |
) | |
with gr.Row(): | |
sr_ = gr.Radio( | |
label=i18n("Sample rate of both models"), | |
choices=["32k","40k", "48k"], | |
value="32k", | |
interactive=True, | |
) | |
if_f0_ = gr.Radio( | |
label=i18n("Pitch Guidance"), | |
choices=[i18n("是"), i18n("否")], | |
value=i18n("是"), | |
interactive=True, | |
) | |
info__ = gr.Textbox( | |
label=i18n("Add informations to the model"), | |
value="", | |
max_lines=8, | |
interactive=True, | |
visible=False | |
) | |
name_to_save0 = gr.Textbox( | |
label=i18n("Final Model name"), | |
value="", | |
max_lines=1, | |
interactive=True, | |
) | |
version_2 = gr.Radio( | |
label=i18n("Versions of the models"), | |
choices=["v1", "v2"], | |
value="v1", | |
interactive=True, | |
) | |
with gr.Row(): | |
but6 = gr.Button(i18n("Fuse the two models"), variant="primary") | |
info4 = gr.Textbox(label=i18n("Output"), value="", max_lines=8) | |
but6.click( | |
merge, | |
[ | |
ckpt_a, | |
ckpt_b, | |
alpha_a, | |
sr_, | |
if_f0_, | |
info__, | |
name_to_save0, | |
version_2, | |
], | |
info4, | |
api_name="ckpt_merge", | |
) | |
with gr.Accordion('Credits', open=False): | |
gr.Markdown(''' | |
## All the amazing people who worked on this! | |
### Developers | |
- **Ilaria**: Founder, Lead Developer | |
- **Yui**: Training feature | |
- **GDR-**: Inference feature | |
- **Poopmaster**: Model downloader, Model importer | |
- **kitlemonfoot**: Ilaria TTS implementation | |
- **eddycrack864**: UVR5 implementation | |
- **Diablo**: Bug Fixes, UI help. | |
- **Mikus**: Ilaria Updater & Downloader | |
- **Mitsubachi1**: Bug Fixes, UI help | |
### Beta Tester | |
- **Charlotte**: Beta Tester | |
- **RME**: Beta Tester | |
- **Delik**: Beta Tester | |
### Pretrains Makers | |
- **simplcup**: Ov2Super | |
- **mustar22**: RIN_E3 | |
- **mustar22**: Snowie | |
### Other | |
- **RVC Project**: Original Developers | |
- **yumereborn**: Ilaria RVC image | |
### **In loving memory of JLabDX** 🕊️ | |
''') | |
sid0.change( | |
fn=vc.get_vc, | |
inputs=[sid0, protect0, protect1], | |
outputs=[spk_item, protect0, protect1, file_index2, file_index4, modelload_out], | |
api_name="infer_change_voice", | |
) | |
with gr.TabItem(i18n("")): | |
gr.Markdown(''' | |
 | |
''') | |
if config.iscolab: | |
app.queue(concurrency_count=511, max_size=1022).launch(share=True) | |
else: | |
app.queue(concurrency_count=511, max_size=1022).launch( | |
server_name="0.0.0.0", | |
inbrowser=not config.noautoopen, | |
server_port=config.listen_port, | |
quiet=True, | |
) | |