lmzjms's picture
Upload 1162 files
0b32ad6 verified
runner:
total_steps: 100000
gradient_clipping: 1
gradient_accumulate_steps: 2
log_step: 100
eval_step: 1000
save_step: 100
max_keep: 1
eval_dataloaders:
- dev
optimizer:
name: TorchOptim
torch_optim_name: Adam
lr: 1.0e-2
# comment the whole scheduler config block
# to disable learning rate scheduling
# scheduler:
# name: linear_schedule_with_warmup
# num_warmup_steps: 1400
# comment the whole specaug config block
# to disable specaug on representation
# specaug:
# apply_time_warp: true
# apply_time_mask: true
# apply_freq_mask: true
# time_warp_window: 3
# time_mask_width_range: [0, 10]
# freq_mask_width_range: [0, 10]
# num_freq_mask: 10
# num_time_mask: 10
downstream_expert:
corpus:
name: 'libriphone' # Specify corpus
path: '/home/leo/d/datasets/LibriSpeech' # Path to raw LibriSpeech dataset
lexicon:
# - downstream/ctc/lexicon/librispeech-lexicon.txt
- downstream/ctc/lexicon/librispeech-lexicon-200k-g2p.txt
- downstream/ctc/lexicon/librispeech-lexicon-allothers-g2p.txt
train: ['train-clean-100'] # Name of data splits to be used as training set
dev: ['dev-clean'] # Name of data splits to be used as validation set
test: ['test-clean']
bucketing: True # Enable/Disable bucketing
batch_size: 16
num_workers: 24
text:
mode: 'word' # 'character'/'word'/'subword'
vocab_file: 'downstream/ctc/vocab/phoneme.txt'
model:
project_dim: 256
zero_infinity: True
select: FrameLevel
Wav2Letter:
total_rate: 320
RNNs:
total_rate: 320
module: 'LSTM' # 'LSTM'/'GRU'
bidirection: True
dim: [1024, 1024, 1024]
dropout: [0.2, 0.2, 0.2]
layer_norm: [True, True, True]
proj: [True, True, True] # Linear projection + Tanh after each rnn layer
sample_rate: [1, 1, 1]
sample_style: 'concat' # 'drop'/'concat'
save_best_on:
- dev
metric_higher_better: False
metric: # The first metric will be used to save checkpoint
- per