Spaces:

ASLP-lab
/

OSUM

Running on Zero

File size: 3,038 Bytes

2defe04

accum_grad: 4
adapter_type: gxl
cmvn: null
cmvn_conf:
  cmvn_file: null
  is_json_cmvn: null
ctc_conf:
  ctc_blank_id: 50362
dataset: asr
dataset_conf:
  batch_conf:
    batch_size: 26
    batch_type: dynamic
    max_frames_in_batch: 2700
    max_seq_in_batch: 2000
  cycle: 100
  emotion_en2zh_dict: conf/en2zh4emotion.json
  eod_id: 151643
  feats_type: log_mel_spectrogram
  filter_conf:
    filter_no_extra_info: true
    max_length: 1000
    max_seq_len: 950
    min_length: 0
    token_max_length: 360
    token_min_length: 1
  language_conf:
    limited_langs:
    - zh
  log_mel_spectrogram_conf:
    hop_length: 160
    n_fft: 400
    num_mel_bins: 80
    padding: 0
  resample_conf:
    resample_rate: 16000
  shuffle: true
  shuffle_conf:
    shuffle_size: 1500
  sort: true
  sort_conf:
    sort_size: 500
  spec_aug: true
  spec_aug_conf:
    max_f: 10
    max_t: 50
    num_f_mask: 2
    num_t_mask: 2
  spec_sub: true
  spec_sub_conf:
    max_t: 30
    num_t_sub: 3
  spec_trim: false
  speed_perturb: false
  split_num: 10
decoder: transformer
decoder_conf:
  activation_type: gelu
  attention_heads: 16
  dropout_rate: 0.1
  gradient_checkpointing: true
  input_layer: embed_learnable_pe
  key_bias: false
  linear_units: 4096
  normalize_before: true
  num_blocks: 24
  positional_dropout_rate: 0.1
  self_attention_dropout_rate: 0.0
  src_attention: true
  src_attention_dropout_rate: 0.0
  tie_word_embedding: true
  use_output_layer: true
downsample_rate: 4
dtype: bf16
encoder: transformer
encoder_conf:
  activation_type: gelu
  attention_dropout_rate: 0.0
  attention_heads: 16
  dropout_rate: 0.1
  gradient_checkpointing: true
  input_layer: conv1d2
  key_bias: false
  linear_units: 4096
  normalize_before: true
  num_blocks: 24
  output_size: 1024
  pos_enc_layer_type: abs_pos_whisper
  positional_dropout_rate: 0.1
  static_chunk_size: -1
  use_dynamic_chunk: false
  use_dynamic_left_chunk: false
epoch: 11
fire_module: link_and_encoder_and_lora
grad_clip: 5
init_step: false
input_dim: 80
llm_path: Qwen/Qwen2-7B #/home/node54_tmpdata/xlgeng/ckpt/qwen-7B-instruct/qwen2_7b
log_interval: 10
lora_alpha: 32
lora_dropout: 0.1
lora_rank: 8
loss_dict:
  acc: 0.0
  loss: 1.4107781417203814e-05
lrs:
- 1.3727899783270902e-05
max_epoch: 100
model: llmasr
model_conf:
  ctc_weight: 0
  length_normalized_loss: false
  lsm_weight: 0.1
model_dir: /home/node54_tmpdata/xlgeng/ckpt/wenet_whisper_finetune_xlgeng/examples/wenetspeech/whisper/exp/qwen2_multi_task_4_6gpus_gxl_adapter/update_data/epoch_1_with_token
optim: adamw
optim_conf:
  betas:
  - 0.9
  - 0.99
  eps: 1.0e-06
  lr: 5.0e-05
  weight_decay: 0.01
output_dim: 151646
save_interval: 5000
save_states: model+optimizer
save_time: 14/01/2025 20:35:10
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 8000
speech_token_num: 4097
step: 106124
tag: epoch_11
tokenizer: huggingface
tokenizer_conf:
  llm_path: Qwen/Qwen2-7B #/home/node54_tmpdata/xlgeng/ckpt/qwen-7B-instruct/qwen2_7b
train_engine: deepspeed
use_amp: true
use_lora: true
vocab_size: 151646