|
import torch.multiprocessing |
|
|
|
from Modules.Aligner.CodecAlignerDataset import CodecAlignerDataset |
|
from Modules.Aligner.autoaligner_train_loop import train_loop as train_aligner |
|
from Modules.ToucanTTS.TTSDataset import TTSDataset |
|
from Utility.path_to_transcript_dicts import * |
|
from Utility.storage_config import MODELS_DIR |
|
|
|
|
|
def prepare_aligner_corpus(transcript_dict, corpus_dir, lang, device, phone_input=False, |
|
gpu_count=1, |
|
rank=0): |
|
return CodecAlignerDataset(transcript_dict, |
|
cache_dir=corpus_dir, |
|
lang=lang, |
|
loading_processes=5, |
|
device=device, |
|
phone_input=phone_input, |
|
gpu_count=gpu_count, |
|
rank=rank) |
|
|
|
|
|
def prepare_tts_corpus(transcript_dict, |
|
corpus_dir, |
|
lang, |
|
|
|
fine_tune_aligner=True, |
|
use_reconstruction=True, |
|
phone_input=False, |
|
save_imgs=False, |
|
gpu_count=1, |
|
rank=0): |
|
""" |
|
create an aligner dataset, |
|
fine-tune an aligner, |
|
create a TTS dataset, |
|
return it. |
|
|
|
Automatically skips parts that have been done before. |
|
""" |
|
if not os.path.exists(os.path.join(corpus_dir, "tts_train_cache.pt")): |
|
if fine_tune_aligner: |
|
aligner_dir = os.path.join(corpus_dir, "Aligner") |
|
aligner_loc = os.path.join(corpus_dir, "Aligner", "aligner.pt") |
|
|
|
if not os.path.exists(os.path.join(corpus_dir, "aligner_train_cache.pt")): |
|
prepare_aligner_corpus(transcript_dict, corpus_dir=corpus_dir, lang=lang, phone_input=phone_input, device=torch.device("cuda")) |
|
|
|
if not os.path.exists(os.path.join(aligner_dir, "aligner.pt")): |
|
aligner_datapoints = prepare_aligner_corpus(transcript_dict, corpus_dir=corpus_dir, lang=lang, phone_input=phone_input, device=torch.device("cuda")) |
|
if os.path.exists(os.path.join(MODELS_DIR, "Aligner", "aligner.pt")): |
|
train_aligner(train_dataset=aligner_datapoints, |
|
device=torch.device("cuda"), |
|
save_directory=aligner_dir, |
|
steps=min(len(aligner_datapoints) // 2, 10000), |
|
batch_size=32 if len(aligner_datapoints) > 32 else len(aligner_datapoints) // 2, |
|
path_to_checkpoint=os.path.join(MODELS_DIR, "Aligner", "aligner.pt"), |
|
fine_tune=True, |
|
debug_img_path=aligner_dir, |
|
resume=False, |
|
use_reconstruction=use_reconstruction) |
|
else: |
|
train_aligner(train_dataset=aligner_datapoints, |
|
device=torch.device("cuda"), |
|
save_directory=aligner_dir, |
|
steps=len(aligner_datapoints) // 2, |
|
batch_size=32 if len(aligner_datapoints) > 32 else len(aligner_datapoints) // 2, |
|
path_to_checkpoint=None, |
|
fine_tune=False, |
|
debug_img_path=aligner_dir, |
|
resume=False, |
|
use_reconstruction=use_reconstruction) |
|
else: |
|
aligner_loc = os.path.join(MODELS_DIR, "Aligner", "aligner.pt") |
|
else: |
|
aligner_loc = None |
|
return TTSDataset(transcript_dict, |
|
acoustic_checkpoint_path=aligner_loc, |
|
cache_dir=corpus_dir, |
|
device=torch.device("cuda"), |
|
lang=lang, |
|
save_imgs=save_imgs, |
|
gpu_count=gpu_count, |
|
rank=rank) |
|
|