PyTorch
Catalan
TTS
audio
synthesis
VITS
speech
coqui.ai
Gerard Muniesa
[NEW] Add model Card, model files and data preprocessing files
a5fbdd4
raw
history blame
6.43 kB
import os
import re
import argparse
from glob import glob
from pathlib import Path
from subprocess import call
def main():
my_parser = argparse.ArgumentParser()
my_parser.add_argument('--google-path',
metavar='path',
type=str,
help='the path to tsv file')
my_parser.add_argument('--festcat-path',
metavar='path',
type=str,
help='the path to wavs file')
#my_parser.add_argument('--cv-path',
# metavar='path',
# type=str,
# help='the path to wavs file')
my_parser.add_argument('--final-path',
metavar='path',
type=str,
help='the path to wavs file')
args = my_parser.parse_args()
google_path = args.google_path
festcat_path = args.festcat_path
#common_voice_path = args.cv_path
target_base_path = args.final_path
google_tts_male = google_path + "/male/"
google_tts_female = google_path + "/female/"
google_tts_paths = [google_tts_male, google_tts_female]
#google_tts_paths = ["/gpfs/scratch/bsc88/bsc88858/google_tts/male/","/gpfs/scratch/bsc88/bsc88858/google_tts/female/"]
#festcat_path = "/gpfs/scratch/bsc88/bsc88858/festcat/"
#common_voice_path = "/gpfs/scratch/bsc88/bsc88858/cv-corpus-9.0-2022-04-27/ca/"
#target_base_path = "/gpfs/scratch/bsc88/bsc88474/data/multispeaker_ca/"
if os.path.exists(google_path):
print("Converting google_tts data to vctk format")
convert_google(google_tts_paths, target_base_path)
else:
print("Google_tts processed data not found")
if os.path.exists(festcat_path):
print("Converting festcat data to vctk format")
convert_festcat(festcat_path, target_base_path)
else:
print("Festcat processed data not found")
#convert_cv(common_voice_path, target_base_path)
def convert_google(google_tts_paths, target_base_path):
for g_path in google_tts_paths[:1]:
meta_files = glob(f"{g_path}/*_*.txt")
for meta_file in meta_files:
print(meta_file)
for line in open(meta_file).readlines():
text_id, text = line.strip().split('|')
text.replace('¿','')
text.replace('¡','')
#speaker_id = '_'.join(text_id.split('_')[:2])
speaker_id = text_id.split('_')[1]
target_text_file = os.path.join(target_base_path, 'txt',
speaker_id, text_id+'.txt')
target_wav_file = os.path.join(target_base_path, 'wav',
speaker_id, text_id+'.wav')
source_wav_file = os.path.join(g_path, 'wavs', text_id+'.wav')
speaker_paths = [os.path.dirname(target_text_file),
os.path.dirname(target_wav_file)]
convert_meta(target_text_file, target_wav_file,
source_wav_file, speaker_paths, text)
def convert_meta(target_text_file,
target_wav_file,
source_wav_file,
speaker_paths, text):
# create directories
for speaker_path in speaker_paths:
if not os.path.isdir(speaker_path):
os.mkdir(speaker_path)
# write text file
with open(target_text_file, 'w') as out:
out.write(text)
# copy wav file
try:
os.path.isfile(source_wav_file)
except:
raise IOError('{} does not exist'.format(source_wav_file))
cp_args = ['cp', source_wav_file, target_wav_file]
if not os.path.isfile(target_wav_file):
#print(' '.join(cp_args))
call(cp_args)
def convert_festcat(festcat_path, target_base_path):
meta_files = glob(f"{festcat_path}/*/*_train.txt")
for meta_file in meta_files:
speaker_name = meta_file.split(os.sep)[-2]
print(meta_file)
for line in open(meta_file).readlines():
if '[' not in line:
text_id, text = line.strip().split('|')
text.replace('¿','')
text.replace('¡','')
#speaker_id = '_'.join(text_id.split('_')[:3])
speaker_id = speaker_name
target_text_file = os.path.join(target_base_path, 'txt',
speaker_id, text_id+'.txt')
target_wav_file = os.path.join(target_base_path, 'wav',
speaker_id, text_id+'.wav')
source_wav_file = os.path.join(festcat_path, speaker_name,
'wavs', text_id+'.wav')
speaker_paths = [os.path.dirname(target_text_file),
os.path.dirname(target_wav_file)]
convert_meta(target_text_file, target_wav_file,
source_wav_file, speaker_paths, text)
else:
print('line: {} skipped'.format(line))
def convert_cv(common_voice_path, target_base_path):
meta_files = glob(f"{common_voice_path}/*.txt")
for meta_file in meta_files:
print(meta_file)
speaker_id = meta_file.split(os.sep)[-1].replace("ca_","").replace(".txt","")
for line in open(meta_file).readlines():
text_id, text = line.strip().split('|')
target_text_file = os.path.join(target_base_path, 'txt',
speaker_id, text_id+'.txt')
target_wav_file = os.path.join(target_base_path, 'wav',
speaker_id, text_id+'.wav')
source_wav_file = os.path.join(common_voice_path,
'wavs', text_id+'.wav')
speaker_paths = [os.path.dirname(target_text_file),
os.path.dirname(target_wav_file)]
convert_meta(target_text_file, target_wav_file,
source_wav_file, speaker_paths, text)
if __name__ == "__main__":
main()