Spaces:
Running
on
Zero
Running
on
Zero
import json | |
import re | |
import csv | |
import shutil | |
import os | |
import sys | |
main_path = os.getcwd() | |
def prepare_data_for_model(path): | |
f = open(path, 'r') | |
data = csv.DictReader(f) | |
data_lines = [] | |
for row in data: | |
phoneme = row['phenome'] | |
utterance_name = row['seg_id'] | |
speake_id = row['speaker_id'] | |
phoneme = re.sub("\[([0-9]+)\]", '', phoneme) | |
phoneme = re.sub("\s+\|\s+", ' ', phoneme) | |
data_lines.append([phoneme, utterance_name, speake_id]) | |
f.close() | |
return data_lines | |
def save_files(train_data, test_data, data_path): | |
for line in train_data: | |
try: | |
original = os.path.join(data_path, 'train_wav/{}.wav'.format(line[1])) | |
target = os.path.join(main_path, 'dataset/persian_data/train_data/book-1/speaker-{0}/utterance-{1}.wav'.format(line[2], line[1])) | |
os.makedirs(os.path.dirname(target), exist_ok=True) | |
shutil.copyfile(original, target) | |
except Exception as e: | |
print(e) | |
return False | |
path = os.path.join(main_path, 'dataset/persian_data/train_data/book-1/speaker-{0}/utterance-{1}.txt'.format(line[2], line[1])) | |
with open(path, 'w') as fp: | |
fp.write(line[0]) | |
for line in test_data: | |
try: | |
original = os.path.join(data_path, 'test_wav/{}.wav'.format(line[1])) | |
target = os.path.join(main_path, 'dataset/persian_data/test_data/book-1/speaker-{0}/utterance-{1}.wav'.format(line[2], line[1])) | |
os.makedirs(os.path.dirname(target), exist_ok=True) | |
shutil.copyfile(original, target) | |
except Exception as e: | |
print(e) | |
return False | |
path = os.path.join(main_path, 'dataset/persian_data/test_data/book-1/speaker-{0}/utterance-{1}.txt'.format(line[2], line[1]) | |
with open(path, 'w') as fp: | |
fp.write(line[0]) | |
return True | |
def main(data_path): | |
if os.path.isfile(os.path.join(data_path, 'train_info.csv')): | |
train_data_path = os.path.join(data_path, 'train_info.csv') | |
else: | |
print('data_path is not correct!') | |
return -1 | |
if os.path.isfile(os.path.join(data_path, 'test_info.csv')): | |
test_data_path = os.path.join(data_path, 'test_info.csv') | |
else: | |
print('data_path is not correct!') | |
return -1 | |
train_data = prepare_data_for_model(train_data_path) | |
test_data = prepare_data_for_model(test_data_path) | |
print('number of train data: ' + str(len(train_data))) | |
print('number of test data: ' + str(len(test_data))) | |
res = save_files(train_data, test_data, data_path) | |
if res: | |
print('Data is created.') | |
if __name__ == "__main__": | |
main(sys.argv[1]) |