|
|
|
|
|
|
|
|
|
|
|
n_mels: 80 |
|
|
|
|
|
pretrained_path: flexthink/discrete_hubert_spk_rec_ecapatdn |
|
|
|
save_folder: tmp |
|
|
|
|
|
|
|
|
|
ssl_model_type: hubert |
|
ssl_hub: facebook/hubert-large-ll60k |
|
ssl_folder: !ref <save_folder>/ssl_checkpoint |
|
kmeans_repo_id: speechbrain/SSL_Quantization |
|
kmeans_cache_dir: !ref <save_folder>/kmeans_checkpoint |
|
kmeans_dataset: LibriSpeech-100-360-500 |
|
freeze_ssl: True |
|
freeze_feature_extractor: True |
|
num_clusters: 1000 |
|
|
|
|
|
|
|
|
|
|
|
|
|
ssl_layer_num: [1, 3, 7, 12, 18, 23] |
|
ssl_layer_num_selected: [1, 3, 7, 12, 18, 23] |
|
num_codebooks: 6 |
|
deduplicate: [False, False, False, False, False, False] |
|
bpe_tokenizer_path: [null, null, null, null, null, null] |
|
sample_rate: 16000 |
|
|
|
|
|
encoder_dim: 1024 |
|
|
|
tokenizer_config: |
|
SSL_layers: !ref <ssl_layer_num> |
|
deduplicates: !ref <deduplicate> |
|
bpe_tokenizers: !ref <bpe_tokenizer_path> |
|
|
|
discrete_embedding_layer: !new:custom_interface.Discrete_EmbeddingLayer |
|
num_codebooks: !ref <num_codebooks> |
|
vocab_size: !ref <num_clusters> |
|
emb_dim: !ref <encoder_dim> |
|
available_layers: !ref <ssl_layer_num> |
|
layers: !ref <ssl_layer_num_selected> |
|
|
|
attention_mlp: !new:custom_interface.AttentionMLP |
|
input_dim: !ref <encoder_dim> |
|
hidden_dim: !ref <encoder_dim> |
|
|
|
embedding_model: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN |
|
input_size: !ref <encoder_dim> |
|
channels: [1024, 1024, 1024, 1024, 3072] |
|
kernel_sizes: [5, 3, 3, 3, 1] |
|
dilations: [1, 2, 3, 4, 1] |
|
groups: [1, 1, 1, 1, 1] |
|
attention_channels: 128 |
|
lin_neurons: 192 |
|
|
|
modules: |
|
embedding_model: !ref <embedding_model> |
|
attention_mlp: !ref <attention_mlp> |
|
discrete_embedding_layer: !ref <discrete_embedding_layer> |
|
|
|
|
|
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
|
loadables: |
|
embedding_model: !ref <embedding_model> |
|
attention_mlp: !ref <attention_mlp> |
|
discrete_embedding_layer: !ref <discrete_embedding_layer> |
|
|
|
paths: |
|
embedding_model: !ref <pretrained_path>/embedding_model.ckpt |
|
attention_mlp: !ref <pretrained_path>/attention_mlp.ckpt |
|
discrete_embedding_layer: !ref <pretrained_path>/discrete_embedding_layer.ckpt |
|
|
|
|
|
|