from dataclasses import dataclass @dataclass class HParams: ## Mel-filterbank mel_window_length = 25 # In milliseconds mel_window_step = 10 # In milliseconds mel_n_channels = 40 ## Audio sampling_rate = 16000 # Number of spectrogram frames in a partial utterance partials_n_frames = 160 # 1600 ms # Number of spectrogram frames at inference inference_n_frames = 80 # 800 ms ## Voice Activation Detection # Window size of the VAD. Must be either 10, 20 or 30 milliseconds. # This sets the granularity of the VAD. Should not need to be changed. vad_window_length = 30 # In milliseconds # Number of frames to average together when performing the moving average smoothing. # The larger this value, the larger the VAD variations must be to not get smoothed out. vad_moving_average_width = 8 # Maximum number of consecutive silent frames a segment can have. vad_max_silence_length = 6 ## Audio volume normalization audio_norm_target_dBFS = -30 ## Model parameters model_hidden_size = 256 model_embedding_size = 256 model_num_layers = 3 ## Training parameters learning_rate_init = 1e-4 speakers_per_batch = 64 utterances_per_speaker = 10 hparams = HParams()