speech_config: sample_rate: 16000 frame_ms: 25 stride_ms: 10 num_feature_bins: 80 feature_type: log_mel_spectrogram preemphasis: 0.97 normalize_signal: True normalize_feature: True normalize_per_feature: False model_config: name: acrnn d_model: 64 filters: [32,64,64] kernel_size: [[11,5],[11,5],[11,5]] rnn_cell: 256 seq_mask: True dataset_config: vocabulary: vocab/vocab.txt data_path: ./data/wavs/ corpus_name: ./data/demo_txt/demo file_nums: 1 max_audio_length: 2000 shuffle_size: 1200 data_length: None suffix: .txt load_type: txt train: train dev: dev test: test optimizer_config: init_steps: 0 warmup_steps: 10000 max_lr: 1e-4 beta1: 0.9 beta2: 0.999 epsilon: 1e-9 running_config: prefetch: False load_weights: ./saved_weights/20230228-084356/last/model num_epochs: 100 batch_size: 1 train_steps: 50 dev_steps: 10 test_steps: 10