Spaces:
Running
on
L40S
Running
on
L40S
import kaldiio | |
from tqdm import tqdm | |
import torch | |
if __name__ == "__main__": | |
bar = torch.zeros(1, 16384) | |
with open('token.scp', 'r') as f: | |
for item_idx, line in tqdm(enumerate(f)): | |
idx, pos = line.strip().split() | |
codes = kaldiio.load_mat(pos) | |
for i0 in range(codes.shape[-1]): | |
bar[0, codes[0, 0, i0]] += 1 | |
if(item_idx % 1000 == 0): | |
print("=========") | |
print(1 - (bar[0]==0).sum() / bar.shape[-1]) | |
print("=========") | |
print("=========") | |
print(1 - (bar[0]==0).sum() / bar.shape[-1]) | |
print("=========") |