Spaces:
Sleeping
Sleeping
from ipywidgets import widgets | |
import copy | |
import json | |
import glob | |
import os | |
import time | |
import threading | |
from IPython.display import Audio, display, HTML, FileLink | |
from pathlib import Path | |
import subprocess | |
import shutil | |
from google.colab import files | |
from ipywidgets import TwoByTwoLayout | |
import re | |
SEGMENT_LENGTH = 60 | |
CUSTOM_MODELS_FILENAME = "customModels" | |
CUSTOM_MODELS_DIR = f"/drive/MyDrive/{CUSTOM_MODELS_FILENAME}" | |
MUSIC_EXTENSIONS = ['.mp3', '.wav', '.flac', '.aac', '.ogg'] | |
SEGMENTS_DIRNAME = f"/content/segments" | |
INFERENCE_OUTPUT_DIRNAME = "/content/inference_output" | |
def progress(value, max=100): | |
return HTML(""" | |
<progress | |
value='{value}' | |
max='{max}', | |
style='width: 100%' | |
> | |
{value} | |
</progress> | |
""".format(value=value, max=max)) | |
def is_valid_filename(filename): | |
if re.search(r'[<>:"/\\|?*\x00-\x1f]', filename): | |
return False | |
if re.search(r'[-\s()]', filename): | |
return False | |
return True | |
def clean_filename(filename): | |
basename = os.path.basename(filename) | |
cleaned_basename = re.sub(r'[%<>:"/\\|?*\x00-\x1f]', '', basename) | |
cleaned_basename = re.sub(r'[-\s]+', '_', cleaned_basename) | |
cleaned_basename = re.sub(r'[\(\)]+', '', cleaned_basename) | |
cleaned_basename = cleaned_basename.replace("'", "").replace('"', '').replace("$", "") | |
cleaned_basename_arr = cleaned_basename.split() | |
for i in range(len(cleaned_basename_arr)): | |
if i != len(cleaned_basename_arr)-1: | |
cleaned_basename_arr[i] = cleaned_basename_arr[i].replace(".", "") | |
cleaned_basename = " ".join(cleaned_basename_arr) | |
return os.path.join(os.path.dirname(filename), cleaned_basename) | |
def get_audio_files(): | |
audio_files = [] | |
for root, dirs, files in os.walk("/content"): | |
for filename in files: | |
file_extension = os.path.splitext(filename)[1] | |
if file_extension.lower() in MUSIC_EXTENSIONS and "output" not in filename: | |
audio_files.append(filename) | |
return audio_files | |
def get_speakers(): | |
speakers = [] | |
for _,dirs,_ in os.walk(CUSTOM_MODELS_DIR): | |
for folder in dirs: | |
cur_speaker = {} | |
# Look for G_****.pth | |
g = glob.glob(os.path.join(CUSTOM_MODELS_DIR,folder,'G_*.pth')) | |
if not len(g): | |
continue | |
cur_speaker["model_path"] = g[0] | |
cur_speaker["model_folder"] = folder | |
# Look for *.pt (clustering model) | |
clst = glob.glob(os.path.join(CUSTOM_MODELS_DIR,folder,'*.pt')) | |
if not len(clst): | |
cur_speaker["cluster_path"] = "" | |
else: | |
cur_speaker["cluster_path"] = clst[0] | |
# Look for config.json | |
cfg = glob.glob(os.path.join(CUSTOM_MODELS_DIR,folder,'*.json')) | |
if not len(cfg): | |
continue | |
cur_speaker["cfg_path"] = cfg[0] | |
with open(cur_speaker["cfg_path"]) as f: | |
try: | |
cfg_json = json.loads(f.read()) | |
except Exception as e: | |
print("Malformed config json in "+folder) | |
for name, i in cfg_json["spk"].items(): | |
cur_speaker["name"] = name | |
cur_speaker["id"] = i | |
if not name.startswith('.'): | |
speakers.append(copy.copy(cur_speaker)) | |
return sorted(speakers, key=lambda x:x["name"].lower()) | |
def slice_audio(filepath): | |
assert os.path.exists(filepath), f"Не удалось найти {filepath}. Убедитесь, что вы ввели правильное имя файла." | |
# Get the filename and extension of the input file | |
filename, extension = os.path.splitext(filepath) | |
filename = filename.split("/")[-1] | |
os.makedirs(SEGMENTS_DIRNAME, exist_ok=True) | |
# Set the output filename pattern | |
output_pattern = f"{SEGMENTS_DIRNAME}/{filename}_%d{extension}" | |
# Use ffmpeg to split the audio into segments | |
os.system(f"ffmpeg -i {filepath} -f segment -segment_time {SEGMENT_LENGTH} -c copy {output_pattern}") | |
def get_container_format(filename): | |
command = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "format=format_name", "-of", "default=noprint_wrappers=1:nokey=1", filename] | |
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
output, error = process.communicate() | |
if error: | |
raise ValueError(f"Ошибка при получении формата контейнера: {error.decode()}") | |
return output.decode().strip() | |
def run_inference(speaker, f0_method, transpose, noise_scale, cluster_ratio, is_pitch_prediction_enabled): | |
loading_bar = display(progress(0, 100), display_id=True) | |
model_path = speaker["model_path"] | |
config_path = speaker["cfg_path"] | |
cluster_path = speaker["cluster_path"] | |
all_segs_paths = sorted(Path(SEGMENTS_DIRNAME).glob("*")) | |
for index, seg_path in enumerate(all_segs_paths): | |
max_load_value = float((index + 1)/len(all_segs_paths)) * 100 | |
loading_bar.update(progress(max_load_value / 2, 100)) | |
inference_cmd = f"svc infer {seg_path.absolute()} -m {model_path} -c {config_path} {f'-k {cluster_path} -r {cluster_ratio}' if cluster_path != '' and cluster_ratio > 0 else ''} -t {transpose} --f0-method {f0_method} -n {noise_scale} -o {INFERENCE_OUTPUT_DIRNAME}/{seg_path.name} {'' if is_pitch_prediction_enabled else '--no-auto-predict-f0'}" | |
# print(f"\nPerforming inference on... {seg_path.absolute()}\ninference cmd: {inference_cmd}") | |
result = subprocess.run( | |
inference_cmd.split(), | |
stdout=subprocess.PIPE, | |
stderr=subprocess.STDOUT, | |
text=True | |
) | |
loading_bar.update(progress(max_load_value, 100)) | |
if result.stderr: | |
if "AttributeError" in result.stderr: | |
raise Exception(result.stderr + "Убедитесь, что ваша модель не 4.0-v2. Этот блокнот работает только на моделях 4.0-v1.") | |
files_length = len(sorted(Path(SEGMENTS_DIRNAME).glob("*"))) | |
if files_length == 0: | |
raise Exception("Произошла неизвестная ошибка!") | |
def concatenate_segments(final_filename): | |
foldername = Path(INFERENCE_OUTPUT_DIRNAME) | |
assert foldername.exists(), "папка не существует. Введите правильное имя папки" | |
all_segs = [f for f in sorted(foldername.glob("**/*")) if f.is_file()] | |
print(all_segs) | |
try: | |
ext = all_segs[0].suffix | |
with open(foldername/"concat_list.txt", "w") as f: | |
for seg in all_segs: | |
f.write('file ' + str(seg.absolute()) + "\n") | |
os.system(f"ffmpeg -f concat -safe 0 -i {foldername}/concat_list.txt -codec copy {foldername}/{final_filename}") | |
except: | |
raise Exception(f'В каталоге {foldername} не найдено ни одного файла') | |
def cleanup_dirs(): | |
!rm -R {INFERENCE_OUTPUT_DIRNAME} &> /dev/null | |
!rm -R {SEGMENTS_DIRNAME} &> /dev/null | |
!rm -R ./so_vits_svc_fork.log &> /dev/null | |
class InferenceGui(): | |
def __init__(self): | |
# Initialize the background watcher thread as None | |
speakers = get_speakers() | |
self.is_inferencing = False | |
self.final_filename = "" | |
self.speakers = speakers if speakers is not None else [] | |
self.speaker_list = [x["name"] for x in self.speakers] | |
self.speaker_dropdown = widgets.Dropdown( | |
options = self.speaker_list, | |
description="AI модель" | |
) | |
self.audio_files = get_audio_files() | |
self.audio_files_dropdown = widgets.Dropdown( | |
options = self.audio_files, | |
description="Аудиофайл" | |
) | |
self.cluster_ratio_tx = widgets.FloatSlider( | |
value=1, | |
min=0, | |
max=1.0, | |
step=0.05, | |
description='Соотношение кластеров', | |
disabled=False, | |
continuous_update=False, | |
orientation='horizontal', | |
readout=True, | |
) | |
self.noise_scale_tx = widgets.FloatSlider( | |
value=2, | |
min=-2, | |
max=2, | |
step=.4, | |
description='Шкала шума', | |
disabled=False, | |
continuous_update=False, | |
orientation='horizontal', | |
readout=True, | |
) | |
def convert_cb(btn): | |
if (self.is_inferencing): | |
return | |
self.convert() | |
self.convert_btn = widgets.Button(description="Конвертировать") | |
self.convert_btn.on_click(convert_cb) | |
def refresh_files(btn): | |
self.update_file_list_dropdown() | |
self.refresh_files_btn = widgets.Button(description="Обновить аудиофайлы") | |
self.refresh_files_btn.on_click(refresh_files) | |
cluster_container = widgets.HBox([self.cluster_ratio_tx, widgets.Label(value="Отрегулируйте соотношение между звучанием, похожим на тембр цели, и четкостью и артикулированностью, чтобы найти подходящий компромисс.")]) | |
noise_scale_container = widgets.HBox([self.noise_scale_tx, widgets.Label(value="Если выходной сигнал звучит гулко/металлически, попробуйте увеличить масштаб шума. Если появляются артефакты, похожие на плохое шумоподавление или погружение динамика в воду, уменьшите масштаб шума.")]) | |
audio_files_container = widgets.HBox([ | |
self.audio_files_dropdown, | |
self.refresh_files_btn | |
]) | |
voice_cloning_tab = widgets.VBox([self.speaker_dropdown, audio_files_container, cluster_container, noise_scale_container]) | |
buttons_container = widgets.HBox([self.convert_btn]) | |
if (len(self.audio_files) == 0): | |
audio_file_error_widget = widgets.HBox([ | |
widgets.Label(value='Пожалуйста, загрузите аудиофайл и нажмите кнопку воспроизведения, чтобы повторно запустить эту ячейку.') | |
]) | |
display(audio_file_error_widget) | |
return | |
display(voice_cloning_tab) | |
display(buttons_container) | |
def update_file_list_dropdown(self): | |
self.audio_files = get_audio_files() | |
self.audio_files_dropdown.options = self.audio_files | |
def clean(self): | |
input_filepaths = [f for f in glob.glob('/content/**/*.*', recursive=True) | |
if any(f.endswith(ex) for ex in ['.wav','.flac','.mp3','.ogg','.opus'])] | |
for f in input_filepaths: | |
os.remove(f) | |
subprocess.run(['sudo', 'updatedb']) | |
self.update_file_list_dropdown() | |
def convert(self): | |
ts0 = time.time() | |
# Prevent a conversion process from one starting if one is already running | |
self.is_inferencing = True | |
speaker = next(x for x in self.speakers if x["name"] == | |
self.speaker_dropdown.value) | |
model_path = os.path.join(os.getcwd(),speaker["model_path"]) | |
config_path = os.path.join(os.getcwd(),speaker["cfg_path"]) | |
cluster_path = os.path.join(os.getcwd(),speaker["cluster_path"]) | |
file_path = os.path.join(os.getcwd(), str(self.audio_files_dropdown.value)) | |
f0_method = "dio" | |
transpose = 0 | |
noise_scale = int(self.noise_scale_tx.value) | |
cluster_ratio = float(self.cluster_ratio_tx.value) | |
is_pitch_prediction_enabled = True | |
if not speaker: | |
print("Пожалуйста, выберите модель искусственного интеллекта.") | |
return | |
if not self.audio_files_dropdown.value or self.audio_files_dropdown.value == "": | |
print("Пожалуйста, выберите аудиофайл для клонирования.") | |
return | |
if not is_valid_filename(file_path): | |
try: | |
new_filename = clean_filename(file_path) | |
os.rename(file_path, new_filename) | |
file_path = new_filename | |
except: | |
print("Пожалуйста, повторно запустите эту ячейку, нажав кнопку воспроизведения. Произошла неизвестная ошибка.") | |
if os.path.exists(SEGMENTS_DIRNAME) or os.path.exists(INFERENCE_OUTPUT_DIRNAME): | |
print(f"Обнаружены предыдущие папки {SEGMENTS_DIRNAME} и {INFERENCE_OUTPUT_DIRNAME}.") | |
cleanup_dirs() | |
# SLICE AUDIO | |
slice_audio(file_path) | |
# PERFORM INFERENCE | |
os.makedirs("inference_output", exist_ok=True) | |
run_inference(speaker, f0_method, transpose, noise_scale, cluster_ratio, is_pitch_prediction_enabled) | |
cleaned_speaker_name = speaker['name'].replace(" ", "_") | |
final_filename = f"{Path(file_path).stem}_{cleaned_speaker_name}_output{Path(file_path).suffix}" | |
self.final_filename = final_filename | |
# CONCATENATE FILES IN INFERENCE OUTPUT DIR | |
concatenate_segments(final_filename) | |
# MOVE FINAL CONCATENATED FILE TO TOP-LEVEL IN CURRENT DIR | |
shutil.move(Path(INFERENCE_OUTPUT_DIRNAME, final_filename), Path(final_filename)) | |
# CLEAN UP | |
cleanup_dirs() | |
ts1 = time.time() | |
print(f"Total Time Elapsed: {ts1 - ts0} seconds") | |
print(f"\nГотово! Можете скачать выходной файл через проводник как '{final_filename}' или через аудио-плеер ниже.") | |
audio = Audio(final_filename, autoplay=False) | |
display(audio) | |
self.is_inferencing = False | |
self.update_file_list_dropdown() | |
gui = InferenceGui() | |