|
|
|
|
|
common: |
|
fp16: true |
|
log_format: json |
|
log_interval: 200 |
|
seed: 1337 |
|
tensorboard_logdir: tblog |
|
|
|
checkpoint: |
|
save_interval_updates: 25000 |
|
keep_interval_updates: 1 |
|
no_epoch_checkpoints: true |
|
|
|
|
|
distributed_training: |
|
ddp_backend: no_c10d |
|
distributed_backend: 'nccl' |
|
distributed_world_size: 32 |
|
distributed_port: 29671 |
|
nprocs_per_node: 8 |
|
find_unused_parameters: true |
|
|
|
task: |
|
_name: hubert_pretraining |
|
data: ??? |
|
label_dir: ??? |
|
labels: ??? |
|
label_rate: ${model.label_rate} |
|
sample_rate: 16000 |
|
max_sample_size: 250000 |
|
min_sample_size: 32000 |
|
pad_audio: false |
|
random_crop: true |
|
normalize: false |
|
|
|
dataset: |
|
num_workers: 6 |
|
max_tokens: 1400000 |
|
skip_invalid_size_inputs_valid_test: true |
|
validate_interval: 5 |
|
validate_interval_updates: 10000 |
|
|
|
criterion: |
|
_name: hubert |
|
pred_masked_weight: 1.0 |
|
pred_nomask_weight: 0.0 |
|
loss_weights: [10,] |
|
|
|
optimization: |
|
max_update: 400000 |
|
lr: [0.0005] |
|
clip_norm: 10.0 |
|
|
|
optimizer: |
|
_name: adam |
|
adam_betas: (0.9,0.98) |
|
adam_eps: 1e-06 |
|
weight_decay: 0.01 |
|
|
|
lr_scheduler: |
|
_name: polynomial_decay |
|
warmup_updates: 32000 |
|
|
|
model: |
|
_name: hubert |
|
label_rate: ??? |
|
skip_masked: false |
|
skip_nomask: false |
|
mask_prob: 0.80 |
|
extractor_mode: default |
|
conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' |
|
final_dim: 256 |
|
encoder_layerdrop: 0.05 |
|
dropout_input: 0.1 |
|
dropout_features: 0.1 |
|
dropout: 0.1 |
|
attention_dropout: 0.1 |
|
feature_grad_mult: 0.1 |
|
untie_final_proj: true |
|
activation_dropout: 0.0 |
|
|
|
hydra: |
|
job: |
|
config: |
|
override_dirname: |
|
kv_sep: '-' |
|
item_sep: '__' |
|
exclude_keys: |
|
- run |
|
- task.data |
|
- task.label_dir |
|
run: |
|
dir: ??? |
|
sweep: |
|
dir: ??? |
|
subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} |
|
|