File size: 970 Bytes
d5175d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import numpy as np
from fairseq.data.audio.feature_transforms import (
    AudioFeatureTransform,
    register_audio_feature_transform,
)


@register_audio_feature_transform("global_cmvn")
class GlobalCMVN(AudioFeatureTransform):
    """Global CMVN (cepstral mean and variance normalization). The global mean
    and variance need to be pre-computed and stored in NumPy format (.npz)."""

    @classmethod
    def from_config_dict(cls, config=None):
        _config = {} if config is None else config
        return GlobalCMVN(_config.get("stats_npz_path"))

    def __init__(self, stats_npz_path):
        self.stats_npz_path = stats_npz_path
        stats = np.load(stats_npz_path)
        self.mean, self.std = stats["mean"], stats["std"]

    def __repr__(self):
        return self.__class__.__name__ + f'(stats_npz_path="{self.stats_npz_path}")'

    def __call__(self, x):
        x = np.subtract(x, self.mean)
        x = np.divide(x, self.std)
        return x