Spaces:
Sleeping
Sleeping
File size: 986 Bytes
11f2c2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import os
import glob
from tqdm import tqdm
path = "data\speech_data\data_thchs30\data"
res = ""
for file in tqdm(glob.glob(os.path.join(path,"*.trn"))):
if file.endswith(".trn"):
a = open(file,"r").readlines()
for l in (a[0].strip().replace(" ","")):
if l not in res:
res+=l
print(len(res))
with open("dict_han.txt","w",encoding="utf-8") as f:
for i,l in enumerate(res):
s = f"{l}\t{i}\n"
f.write(s)
# path ="datalist/thchs30/cv.wav.lst"
# with open("datalist/thchs30/cv.hzlable.txt","w",encoding="utf-8") as fw:
# with open(path,"r",encoding="utf-8") as f:
# for line in f.readlines():
# name, p = line.strip().split(" ")
# p = os.path.join("data\speech_data",p+".trn")
# print(name, p)
# label = " ".join(open(p.replace("dev","data"),"r").readlines()[0].strip().replace(" ",""))
# print(label)
# fw.write(name+" "+label+"\n")
|