File size: 3,038 Bytes
2defe04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
accum_grad: 4
adapter_type: gxl
cmvn: null
cmvn_conf:
  cmvn_file: null
  is_json_cmvn: null
ctc_conf:
  ctc_blank_id: 50362
dataset: asr
dataset_conf:
  batch_conf:
    batch_size: 26
    batch_type: dynamic
    max_frames_in_batch: 2700
    max_seq_in_batch: 2000
  cycle: 100
  emotion_en2zh_dict: conf/en2zh4emotion.json
  eod_id: 151643
  feats_type: log_mel_spectrogram
  filter_conf:
    filter_no_extra_info: true
    max_length: 1000
    max_seq_len: 950
    min_length: 0
    token_max_length: 360
    token_min_length: 1
  language_conf:
    limited_langs:
    - zh
  log_mel_spectrogram_conf:
    hop_length: 160
    n_fft: 400
    num_mel_bins: 80
    padding: 0
  resample_conf:
    resample_rate: 16000
  shuffle: true
  shuffle_conf:
    shuffle_size: 1500
  sort: true
  sort_conf:
    sort_size: 500
  spec_aug: true
  spec_aug_conf:
    max_f: 10
    max_t: 50
    num_f_mask: 2
    num_t_mask: 2
  spec_sub: true
  spec_sub_conf:
    max_t: 30
    num_t_sub: 3
  spec_trim: false
  speed_perturb: false
  split_num: 10
decoder: transformer
decoder_conf:
  activation_type: gelu
  attention_heads: 16
  dropout_rate: 0.1
  gradient_checkpointing: true
  input_layer: embed_learnable_pe
  key_bias: false
  linear_units: 4096
  normalize_before: true
  num_blocks: 24
  positional_dropout_rate: 0.1
  self_attention_dropout_rate: 0.0
  src_attention: true
  src_attention_dropout_rate: 0.0
  tie_word_embedding: true
  use_output_layer: true
downsample_rate: 4
dtype: bf16
encoder: transformer
encoder_conf:
  activation_type: gelu
  attention_dropout_rate: 0.0
  attention_heads: 16
  dropout_rate: 0.1
  gradient_checkpointing: true
  input_layer: conv1d2
  key_bias: false
  linear_units: 4096
  normalize_before: true
  num_blocks: 24
  output_size: 1024
  pos_enc_layer_type: abs_pos_whisper
  positional_dropout_rate: 0.1
  static_chunk_size: -1
  use_dynamic_chunk: false
  use_dynamic_left_chunk: false
epoch: 11
fire_module: link_and_encoder_and_lora
grad_clip: 5
init_step: false
input_dim: 80
llm_path: Qwen/Qwen2-7B #/home/node54_tmpdata/xlgeng/ckpt/qwen-7B-instruct/qwen2_7b
log_interval: 10
lora_alpha: 32
lora_dropout: 0.1
lora_rank: 8
loss_dict:
  acc: 0.0
  loss: 1.4107781417203814e-05
lrs:
- 1.3727899783270902e-05
max_epoch: 100
model: llmasr
model_conf:
  ctc_weight: 0
  length_normalized_loss: false
  lsm_weight: 0.1
model_dir: /home/node54_tmpdata/xlgeng/ckpt/wenet_whisper_finetune_xlgeng/examples/wenetspeech/whisper/exp/qwen2_multi_task_4_6gpus_gxl_adapter/update_data/epoch_1_with_token
optim: adamw
optim_conf:
  betas:
  - 0.9
  - 0.99
  eps: 1.0e-06
  lr: 5.0e-05
  weight_decay: 0.01
output_dim: 151646
save_interval: 5000
save_states: model+optimizer
save_time: 14/01/2025 20:35:10
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 8000
speech_token_num: 4097
step: 106124
tag: epoch_11
tokenizer: huggingface
tokenizer_conf:
  llm_path: Qwen/Qwen2-7B #/home/node54_tmpdata/xlgeng/ckpt/qwen-7B-instruct/qwen2_7b
train_engine: deepspeed
use_amp: true
use_lora: true
vocab_size: 151646