|
sample_rate: 16000 |
|
hop_length: 256 |
|
win_length: 1024 |
|
n_mel_channels: 80 |
|
n_fft: 1024 |
|
mel_fmin: 0.0 |
|
mel_fmax: 8000.0 |
|
mel_normalized: False |
|
power: 1 |
|
norm: "slaney" |
|
mel_scale: "slaney" |
|
dynamic_range_compression: True |
|
|
|
|
|
embedding_model: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN |
|
input_size: !ref <n_mel_channels> |
|
channels: [1024, 1024, 1024, 1024, 3072] |
|
kernel_sizes: [5, 3, 3, 3, 1] |
|
dilations: [1, 2, 3, 4, 1] |
|
groups: [1, 1, 1, 1, 1] |
|
attention_channels: 128 |
|
lin_neurons: 192 |
|
|
|
|
|
normalizer: !new:speechbrain.processing.features.InputNormalization |
|
norm_type: sentence |
|
std_norm: False |
|
|
|
|
|
modules: |
|
normalizer: !ref <normalizer> |
|
embedding_model: !ref <embedding_model> |
|
|
|
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
|
loadables: |
|
normalizer: !ref <normalizer> |
|
embedding_model: !ref <embedding_model> |
|
|