File size: 650 Bytes
258fd02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import kaldiio
from tqdm import tqdm
import torch

if __name__ == "__main__":
    bar = torch.zeros(1, 16384)
    with open('token.scp', 'r') as f:
        for item_idx, line in tqdm(enumerate(f)):
            idx, pos = line.strip().split()
            codes = kaldiio.load_mat(pos)
            for i0 in range(codes.shape[-1]):
                bar[0, codes[0, 0, i0]] += 1
            if(item_idx % 1000 == 0):
                print("=========")
                print(1 - (bar[0]==0).sum() / bar.shape[-1])
                print("=========")
        print("=========")
        print(1 - (bar[0]==0).sum() / bar.shape[-1])
        print("=========")