|
{ |
|
"name": "test_config", |
|
"n_gpu": 1, |
|
"preprocessing": { |
|
"sr": 16000, |
|
"spectrogram": { |
|
"type": "MelSpectrogram", |
|
"args": { |
|
} |
|
}, |
|
"log_spec": true |
|
}, |
|
"augmentations": { |
|
"wave": [], |
|
"spectrogram": [] |
|
}, |
|
"arch": { |
|
"type": "BaselineModel", |
|
"args": { |
|
"n_feats": 128, |
|
"fc_hidden": 512 |
|
} |
|
}, |
|
"data": { |
|
"train": { |
|
"batch_size": 20, |
|
"num_workers": 0, |
|
"datasets": [ |
|
{ |
|
"type": "LibrispeechDataset", |
|
"args": { |
|
"part": "dev-clean", |
|
"max_audio_length": 20.0, |
|
"max_text_length": 200 |
|
} |
|
} |
|
] |
|
}, |
|
"val": { |
|
"batch_size": 20, |
|
"num_workers": 0, |
|
"datasets": [ |
|
{ |
|
"type": "LibrispeechDataset", |
|
"args": { |
|
"part": "dev-clean", |
|
"max_audio_length": 20.0, |
|
"max_text_length": 200 |
|
} |
|
} |
|
] |
|
} |
|
}, |
|
"optimizer": { |
|
"type": "SGD", |
|
"args": { |
|
"lr": 3e-4 |
|
} |
|
}, |
|
"loss": { |
|
"type": "CTCLoss", |
|
"args": {} |
|
}, |
|
"metrics": [ |
|
{ |
|
"type": "ArgmaxWERMetric", |
|
"args": { |
|
"name": "WER (argmax)" |
|
} |
|
}, |
|
{ |
|
"type": "ArgmaxCERMetric", |
|
"args": { |
|
"name": "CER (argmax)" |
|
} |
|
} |
|
], |
|
"lr_scheduler": { |
|
"type": "OneCycleLR", |
|
"args": { |
|
"steps_per_epoch": 100, |
|
"epochs": 50, |
|
"anneal_strategy": "cos", |
|
"max_lr": 4e-3, |
|
"pct_start": 0.2 |
|
} |
|
}, |
|
"trainer": { |
|
"epochs": 50, |
|
"save_dir": "saved/", |
|
"save_period": 5, |
|
"verbosity": 2, |
|
"monitor": "min val_loss", |
|
"early_stop": 100, |
|
"visualize": "wandb", |
|
"wandb_project": "asr_project", |
|
"len_epoch": 100, |
|
"grad_norm_clip": 10 |
|
} |
|
} |
|
|