|
runner: |
|
total_steps: 100000 |
|
gradient_clipping: 1 |
|
gradient_accumulate_steps: 2 |
|
|
|
log_step: 100 |
|
eval_step: 1000 |
|
save_step: 100 |
|
max_keep: 1 |
|
eval_dataloaders: |
|
- dev |
|
|
|
optimizer: |
|
name: TorchOptim |
|
torch_optim_name: Adam |
|
lr: 1.0e-2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
downstream_expert: |
|
corpus: |
|
name: 'libriphone' |
|
path: '/home/leo/d/datasets/LibriSpeech' |
|
lexicon: |
|
|
|
- downstream/ctc/lexicon/librispeech-lexicon-200k-g2p.txt |
|
- downstream/ctc/lexicon/librispeech-lexicon-allothers-g2p.txt |
|
|
|
train: ['train-clean-100'] |
|
dev: ['dev-clean'] |
|
test: ['test-clean'] |
|
|
|
bucketing: True |
|
batch_size: 16 |
|
num_workers: 24 |
|
|
|
text: |
|
mode: 'word' |
|
vocab_file: 'downstream/ctc/vocab/phoneme.txt' |
|
|
|
model: |
|
project_dim: 256 |
|
zero_infinity: True |
|
|
|
select: FrameLevel |
|
Wav2Letter: |
|
total_rate: 320 |
|
RNNs: |
|
total_rate: 320 |
|
module: 'LSTM' |
|
bidirection: True |
|
dim: [1024, 1024, 1024] |
|
dropout: [0.2, 0.2, 0.2] |
|
layer_norm: [True, True, True] |
|
proj: [True, True, True] |
|
sample_rate: [1, 1, 1] |
|
sample_style: 'concat' |
|
|
|
save_best_on: |
|
- dev |
|
|
|
metric_higher_better: False |
|
metric: |
|
- per |
|
|