Spaces:
Sleeping
Sleeping
import os | |
import glob | |
from tqdm import tqdm | |
path = "data\speech_data\data_thchs30\data" | |
res = "" | |
for file in tqdm(glob.glob(os.path.join(path,"*.trn"))): | |
if file.endswith(".trn"): | |
a = open(file,"r").readlines() | |
for l in (a[0].strip().replace(" ","")): | |
if l not in res: | |
res+=l | |
print(len(res)) | |
with open("dict_han.txt","w",encoding="utf-8") as f: | |
for i,l in enumerate(res): | |
s = f"{l}\t{i}\n" | |
f.write(s) | |
# path ="datalist/thchs30/cv.wav.lst" | |
# with open("datalist/thchs30/cv.hzlable.txt","w",encoding="utf-8") as fw: | |
# with open(path,"r",encoding="utf-8") as f: | |
# for line in f.readlines(): | |
# name, p = line.strip().split(" ") | |
# p = os.path.join("data\speech_data",p+".trn") | |
# print(name, p) | |
# label = " ".join(open(p.replace("dev","data"),"r").readlines()[0].strip().replace(" ","")) | |
# print(label) | |
# fw.write(name+" "+label+"\n") | |