|
[ |
|
896, |
|
1024, |
|
{ |
|
"accum_grad": 3, |
|
"char_list": [], |
|
"debugmode": 0, |
|
"encoder_criterion": "ce", |
|
"encoder_drop_rate": 0.1, |
|
"encoder_input_dim": 896, |
|
"encoder_layer_config": "transformer", |
|
"encoder_output_dim": 896, |
|
"encoder_pre_norm_type": "ln", |
|
"encoder_upsample_rate": 9, |
|
"kv_cache_prefix_finetune": 1, |
|
"epochs": 100, |
|
"eps": 1e-08, |
|
"eps_decay": 0.8, |
|
"gpu_id": null, |
|
"gpu_num": 1, |
|
"grad_clip": 5, |
|
"grad_noise": false, |
|
"idim": 896, |
|
"init_lr": 0.0005, |
|
"lsm_weight": 0.0, |
|
"max_batch_size": 25, |
|
"max_duration": 256, |
|
"max_mem": 20000, |
|
"mtlalpha": 0.5, |
|
"n_iter_processes": 8, |
|
"noam_warmup_steps": 4000, |
|
"odim": 1024, |
|
"opt": "noamw", |
|
"rank": 0, |
|
"report_interval_iters": 100, |
|
"resume_trainer": false, |
|
"save_interval_iters": 2000, |
|
"seed": 19832, |
|
"sort_duration": true, |
|
"start_decay_epoch": 5, |
|
"stop_learning_rate": 1e-05, |
|
"sycn_batchnorm": false, |
|
"tensorboard_dir": null, |
|
"train_dtype": "bfloat16", |
|
"transformer_attention_dim": 896, |
|
"transformer_attention_dropout_rate": 0.1, |
|
"transformer_attention_heads": 14, |
|
"transformer_chunk_size": [ |
|
1 |
|
], |
|
"transformer_concat_after": false, |
|
"transformer_dropout_rate": 0.1, |
|
"transformer_dynamic_chunks": false, |
|
"transformer_input_dim": 896, |
|
"transformer_input_layer": "linear", |
|
"transformer_left_chunks": [ |
|
-1 |
|
], |
|
"transformer_linear_units": 4864, |
|
"transformer_normalize_before": true, |
|
"transformer_num_blocks": 4, |
|
"transformer_output_dim": 896, |
|
"transformer_pos_enc_class": "rel-enc", |
|
"transformer_positional_dropout_rate": 0.1, |
|
"transformer_positionwise_conv_kernel_size": 1, |
|
"transformer_positionwise_layer_type": "linear", |
|
"use_zero_redun_opt": false, |
|
"verbose": 0, |
|
"weight_decay": 0.05, |
|
"world_size": 1 |
|
} |
|
] |