Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,038 Bytes
2defe04 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
accum_grad: 4
adapter_type: gxl
cmvn: null
cmvn_conf:
cmvn_file: null
is_json_cmvn: null
ctc_conf:
ctc_blank_id: 50362
dataset: asr
dataset_conf:
batch_conf:
batch_size: 26
batch_type: dynamic
max_frames_in_batch: 2700
max_seq_in_batch: 2000
cycle: 100
emotion_en2zh_dict: conf/en2zh4emotion.json
eod_id: 151643
feats_type: log_mel_spectrogram
filter_conf:
filter_no_extra_info: true
max_length: 1000
max_seq_len: 950
min_length: 0
token_max_length: 360
token_min_length: 1
language_conf:
limited_langs:
- zh
log_mel_spectrogram_conf:
hop_length: 160
n_fft: 400
num_mel_bins: 80
padding: 0
resample_conf:
resample_rate: 16000
shuffle: true
shuffle_conf:
shuffle_size: 1500
sort: true
sort_conf:
sort_size: 500
spec_aug: true
spec_aug_conf:
max_f: 10
max_t: 50
num_f_mask: 2
num_t_mask: 2
spec_sub: true
spec_sub_conf:
max_t: 30
num_t_sub: 3
spec_trim: false
speed_perturb: false
split_num: 10
decoder: transformer
decoder_conf:
activation_type: gelu
attention_heads: 16
dropout_rate: 0.1
gradient_checkpointing: true
input_layer: embed_learnable_pe
key_bias: false
linear_units: 4096
normalize_before: true
num_blocks: 24
positional_dropout_rate: 0.1
self_attention_dropout_rate: 0.0
src_attention: true
src_attention_dropout_rate: 0.0
tie_word_embedding: true
use_output_layer: true
downsample_rate: 4
dtype: bf16
encoder: transformer
encoder_conf:
activation_type: gelu
attention_dropout_rate: 0.0
attention_heads: 16
dropout_rate: 0.1
gradient_checkpointing: true
input_layer: conv1d2
key_bias: false
linear_units: 4096
normalize_before: true
num_blocks: 24
output_size: 1024
pos_enc_layer_type: abs_pos_whisper
positional_dropout_rate: 0.1
static_chunk_size: -1
use_dynamic_chunk: false
use_dynamic_left_chunk: false
epoch: 11
fire_module: link_and_encoder_and_lora
grad_clip: 5
init_step: false
input_dim: 80
llm_path: Qwen/Qwen2-7B #/home/node54_tmpdata/xlgeng/ckpt/qwen-7B-instruct/qwen2_7b
log_interval: 10
lora_alpha: 32
lora_dropout: 0.1
lora_rank: 8
loss_dict:
acc: 0.0
loss: 1.4107781417203814e-05
lrs:
- 1.3727899783270902e-05
max_epoch: 100
model: llmasr
model_conf:
ctc_weight: 0
length_normalized_loss: false
lsm_weight: 0.1
model_dir: /home/node54_tmpdata/xlgeng/ckpt/wenet_whisper_finetune_xlgeng/examples/wenetspeech/whisper/exp/qwen2_multi_task_4_6gpus_gxl_adapter/update_data/epoch_1_with_token
optim: adamw
optim_conf:
betas:
- 0.9
- 0.99
eps: 1.0e-06
lr: 5.0e-05
weight_decay: 0.01
output_dim: 151646
save_interval: 5000
save_states: model+optimizer
save_time: 14/01/2025 20:35:10
scheduler: warmuplr
scheduler_conf:
warmup_steps: 8000
speech_token_num: 4097
step: 106124
tag: epoch_11
tokenizer: huggingface
tokenizer_conf:
llm_path: Qwen/Qwen2-7B #/home/node54_tmpdata/xlgeng/ckpt/qwen-7B-instruct/qwen2_7b
train_engine: deepspeed
use_amp: true
use_lora: true
vocab_size: 151646
|