File size: 2,496 Bytes
bc752b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np
import torch
import json
import math


class GlobalCMVN(torch.nn.Module):
    def __init__(self, mean: torch.Tensor, istd: torch.Tensor, norm_var: bool = True):
        """
        Args:
            mean (torch.Tensor): mean stats
            istd (torch.Tensor): inverse std, std which is 1.0 / std
        """
        super().__init__()
        assert mean.shape == istd.shape
        self.norm_var = norm_var
        # The buffer can be accessed from this module using self.mean
        self.register_buffer("mean", mean)
        self.register_buffer("istd", istd)

    def forward(self, x: torch.Tensor):
        """
        Args:
            x (torch.Tensor): (batch, max_len, feat_dim)

        Returns:
            (torch.Tensor): normalized feature
        """
        x = x - self.mean
        if self.norm_var:
            x = x * self.istd
        return x


def load_cmvn_json(json_cmvn_file):
    with open(json_cmvn_file) as f:
        cmvn_json = json.load(f)

    avg = cmvn_json["mean_stat"]
    var = cmvn_json["var_stat"]
    count = cmvn_json["frame_num"]
    for i in range(len(avg)):
        avg[i] /= count
        var[i] = var[i] / count - avg[i] * avg[i]
        if var[i] < 1.0e-20:
            var[i] = 1.0e-20
        var[i] = 1.0 / math.sqrt(var[i])
    cmvn = np.array([avg, var])
    return cmvn


def load_cmvn_kaldi(kaldi_cmvn_file):
    avg = []
    var = []
    with open(kaldi_cmvn_file, "r") as file:
        # kaldi binary file start with '\0B'
        if file.read(2) == "\0B":
            logging.error(
                "kaldi cmvn binary file is not supported, please "
            )
            sys.exit(1)
        file.seek(0)
        arr = file.read().split()
        assert arr[0] == "["
        assert arr[-2] == "0"
        assert arr[-1] == "]"
        feat_dim = int((len(arr) - 2 - 2) / 2)
        for i in range(1, feat_dim + 1):
            avg.append(float(arr[i]))
        count = float(arr[feat_dim + 1])
        for i in range(feat_dim + 2, 2 * feat_dim + 2):
            var.append(float(arr[i]))

    for i in range(len(avg)):
        avg[i] /= count
        var[i] = var[i] / count - avg[i] * avg[i]
        if var[i] < 1.0e-20:
            var[i] = 1.0e-20
        var[i] = 1.0 / math.sqrt(var[i])
    cmvn = np.array([avg, var])
    return cmvn


def load_cmvn(filename, is_json):
    if is_json:
        file = load_cmvn_json(filename)
    else:
        file = load_cmvn_kaldi(filename)
    return file[0], file[1]