Spaces:
Sleeping
Sleeping
File size: 1,775 Bytes
4451360 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# This file needs to be run in the main folder
# %%
import text
from utils import read_lines_from_file
def write_lines_to_file(path, lines, mode='w', encoding='utf-8'):
with open(path, mode, encoding=encoding) as f:
for i, line in enumerate(lines):
if i == len(lines)-1:
f.write(line)
break
f.write(line + '\n')
# %%
lines = read_lines_from_file(r'C:\Users\ni-user\Downloads\New folder\tts-arabic-pytorch\datatext.txt')
#lines = read_lines_from_file('./data/test-orthographic-transcript.txt')
new_lines_arabic = []
new_lines_phonetic = []
new_lines_buckw = []
for line in lines:
wav_name, utterance = line.split('" "')
wav_name, utterance = wav_name[1:], utterance[:-1]
utterance = utterance.replace("a~", "~a") \
.replace("i~", "~i") \
.replace("u~", "~u") \
.replace(" - ", " ")
utterance_arab = text.arabic_to_buckwalter(utterance)
utterance_phon = text.arabic_to_phonemes(utterance)
line_new_ara = f'"{wav_name}" "{utterance_arab}"'
new_lines_arabic.append(line_new_ara)
line_new_pho = f'"{wav_name}" "{utterance_phon}"'
new_lines_phonetic.append(line_new_pho)
line_new_buckw = f'"{wav_name}" "{utterance}"'
new_lines_arabic.append(line_new_buckw)
# %% train
write_lines_to_file('./data/SA/train_arab.txt', new_lines_arabic)
write_lines_to_file('./data/SA/train_phon.txt', new_lines_phonetic)
write_lines_to_file('./data/SA/train_buckw.txt', new_lines_buckw)
# %% test
# write_lines_to_file('./data/test_arab.txt', new_lines_arabic)
# write_lines_to_file('./data/test_phon.txt', new_lines_phonetic)
# write_lines_to_file('./data/test_buckw.txt', new_lines_buckw)
|