audio_check / automatic-speech-recognition /default_test_config.json
tgritsaev's picture
Upload 198 files
affcd23 verified
raw
history blame
4.59 kB
{
"name": "default_test_config",
"n_gpu": 1,
"text_encoder": {
"type": "CTCCharTextEncoder",
"args": {
"kenlm_model_path": "hw_asr/text_encoder/lower_3-gram.arpa",
"unigrams_path": "hw_asr/text_encoder/librispeech-fixed-vocab.txt"
}
},
"preprocessing": {
"sr": 16000,
"spectrogram": {
"type": "MelSpectrogram",
"args": {
"n_mels": 256
}
},
"log_spec": true
},
"augmentations": {
"random_apply_p": 0.6,
"wave": [
{
"type": "AddColoredNoise",
"args": {
"p": 1,
"sample_rate": 16000
}
},
{
"type": "Gain",
"args": {
"p": 0.8,
"sample_rate": 16000
}
},
{
"type": "HighPassFilter",
"args": {
"p": 0,
"sample_rate": 16000
}
},
{
"type": "LowPassFilter",
"args": {
"p": 0,
"sample_rate": 16000
}
},
{
"type": "PitchShift",
"args": {
"p": 0.8,
"min_transpose_semitones": -2,
"max_transpose_semitones": 2,
"sample_rate": 16000
}
},
{
"type": "PolarityInversion",
"args": {
"p": 0.8,
"sample_rate": 16000
}
},
{
"type": "Shift",
"args": {
"p": 0.8,
"sample_rate": 16000
}
}
],
"spectrogram": [
{
"type": "TimeMasking",
"args": {
"time_mask_param": 80,
"p": 0.05
}
},
{
"type": "FrequencyMasking",
"args": {
"freq_mask_param": 80
}
}
]
},
"arch": {
"type": "DeepSpeech2Model",
"args": {
"n_feats": 256,
"n_rnn_layers": 6,
"rnn_hidden_size": 512,
"rnn_dropout": 0.2
}
},
"data": {
"test": {
"batch_size": 64,
"num_workers": 4,
"datasets": [
{
"type": "LibrispeechDataset",
"args": {
"part": "test-other"
}
}
]
}
},
"optimizer": {
"type": "AdamW",
"args": {
"lr": 0.0003,
"weight_decay": 1e-05
}
},
"loss": {
"type": "CTCLoss",
"args": {}
},
"metrics": [
{
"type": "ArgmaxWERMetric",
"args": {
"name": "WER (argmax)"
}
},
{
"type": "ArgmaxCERMetric",
"args": {
"name": "CER (argmax)"
}
},
{
"type": "BeamSearchWERMetric",
"args": {
"beam_size": 4,
"name": "WER (beam search)"
}
},
{
"type": "BeamSearchCERMetric",
"args": {
"beam_size": 4,
"name": "CER (beam search)"
}
},
{
"type": "LanguageModelWERMetric",
"args": {
"name": "WER (LM)"
}
},
{
"type": "LanguageModelCERMetric",
"args": {
"name": "CER (LM)"
}
}
],
"lr_scheduler": {
"type": "OneCycleLR",
"args": {
"steps_per_epoch": 1000,
"epochs": 50,
"anneal_strategy": "cos",
"max_lr": 0.0003,
"pct_start": 0.1
}
},
"trainer": {
"epochs": 50,
"save_dir": "saved/",
"save_period": 5,
"verbosity": 2,
"monitor": "min val_loss",
"early_stop": 100,
"visualize": "wandb",
"wandb_project": "asr_project",
"len_epoch": 1000,
"grad_norm_clip": 10
}
}