runner: total_steps: 200000 gradient_clipping: 1 gradient_accumulate_steps: 1 log_step: 100 eval_step: 2000 save_step: 500 max_keep: 1 eval_dataloaders: - dev optimizer: name: TorchOptim torch_optim_name: Adam lr: 1.0e-4 # comment the whole scheduler config block # to disable learning rate scheduling # scheduler: # name: linear_schedule_with_warmup # num_warmup_steps: 1400 specaug: adaptive: false adaptive_number_ratio: 0.04 adaptive_size_ratio: 0.04 max_n_time_masks: 20 apply_time_warp: true apply_time_mask: true apply_freq_mask: true time_warp_window: 5 time_mask_width_range: [0, 40] freq_mask_width_range: [0, 50] num_freq_mask: 4 num_time_mask: 2 downstream_expert: corpus: name: 'snips' # Specify corpus path: '/path/to/SNIPS' # Path to raw LibriSpeech dataset train: ['train'] # Name of data splits to be used as training set dev: ['valid'] # Name of data splits to be used as validation set test: ['test'] bucketing: True # Enable/Disable bucketing batch_size: 32 train_speakers: # - Aditi # - Amy # - Brian # - Emma # - Geraint - Ivy - Joanna - Joey - Justin - Kendra - Kimberly - Matthew # - Nicole # - Raveena # - Russell - Salli valid_speakers: - Aditi - Amy # - Brian # - Emma - Geraint # - Ivy # - Joanna # - Joey # - Justin # - Kendra # - Kimberly # - Matthew - Nicole # - Raveena # - Russell # - Salli test_speakers: # - Aditi # - Amy - Brian - Emma # - Geraint # - Ivy # - Joanna # - Joey # - Justin # - Kendra # - Kimberly # - Matthew # - Nicole - Raveena - Russell # - Salli num_workers: 12 text: mode: 'character-slot' # 'character'/'word'/'subword' vocab_file: 'downstream/ctc/vocab/character.txt' slots_file: '/path/to/SNIPS/slots.txt' model: project_dim: 1024 zero_infinity: True select: RNNs Wav2Letter: total_rate: 320 RNNs: total_rate: -1 module: 'LSTM' # 'LSTM'/'GRU' bidirection: True dim: [1024, 1024] dropout: [0.2, 0.2] layer_norm: [False, False] proj: [False, False] # Linear projection + Tanh after each rnn layer sample_rate: [1, 1] sample_style: 'concat' # 'drop'/'concat' save_best_on: - dev metric_higher_better: True metric: # The first metric will be used to save checkpoint - slot_type_f1 - slot_value_cer - slot_value_wer - slot_edit_f1_full - slot_edit_f1_part - wer - cer