File size: 2,117 Bytes
a3985eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
model:
  d_model: 384
  n_layer: 2
  d_inner: ${eval:4 * ${.d_model}}
  vocab_size: ${tokenizer.vocab_size}
  resid_dropout: 0.0
  embed_dropout: 0.1
  residual_in_fp32: true
  pad_vocab_size_multiple: 8
  mamba_ver: mamba2
  layer:
    d_model: ${model.d_model}
    d_state: 64
    d_conv: 4
    expand: 2
    headdim: 48
  n_classes: null
dataset:
  __train_len: ${div_up:1_000_000_000, ${.max_len}}
  __l_max: ${.max_len}
  randomize_offset: true
  input_path: ./data/
  max_len: 660
  use_padding: true
  add_eos: false
  rc_aug: true
  phase: pretrain
  classify_level: null
  num_workers: 0
  batch_size: 16
  pretrain_method: ntp
  mask_ratio: 0.5
tokenizer:
  use_unk_token: true
  name: char
  characters:
  - A
  - C
  - G
  - T
  - 'N'
  model_max_length: ${dataset.max_len} + 2
  add_special_tokens: false
  padding_side: left
  vocab_size: 8
trainer:
  accelerator: gpu
  devices: -1
  num_nodes: 1
  max_epochs: 50
  gradient_clip_val: 1.0
  fast_dev_run: false
  strategy: ddp
train:
  logger: wandb
  run_name: null
  gpu_mem: ${eval:"round(float(__import__('subprocess').check_output('nvidia-smi -i
    0 --query-gpu=memory.total --format=csv,noheader,nounits', shell=True).strip().decode())
    / 1000)"}
  seed: 2222
  global_batch_size: 256
  ckpt: null
  ema: 0.0
  test: true
  interval: step
  monitor: val/loss_epoch
  mode: min
  validate_at_start: false
  pretrained_model_strict_load: false
  pretrained_model_path: null
scheduler:
  t_in_epochs: false
  t_initial: ${eval:${div_up:${dataset.__train_len}, ${train.global_batch_size}} *
    ${trainer.max_epochs}}
  warmup_lr_init: 1.0e-06
  warmup_t: ${eval:${div_up:${dataset.__train_len}, ${train.global_batch_size}} *
    ${trainer.max_epochs} * 0.01}
  lr_min: ${eval:0.1 * ${optimizer.lr}}
optimizer:
  lr: 0.0008
  weight_decay: 0.1
  betas:
  - 0.9
  - 0.999
model_checkpoint:
  monitor: ${train.monitor}
  mode: ${train.mode}
  save_top_k: 1
  save_last: true
  dirpath: checkpoints/
  filename: barcode-mamba-${dataset.phase}-{epoch:02d}
  save_on_train_epoch_end: true
  auto_insert_metric_name: true
  verbose: true
debug: false