GTCtech commited on
Commit
634b270
·
verified ·
1 Parent(s): 6748ae1

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.yaml +91 -0
  2. last.ckpt +3 -0
config.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ d_model: 384
3
+ n_layer: 2
4
+ d_inner: ${eval:4 * ${.d_model}}
5
+ vocab_size: ${tokenizer.vocab_size}
6
+ resid_dropout: 0.0
7
+ embed_dropout: 0.1
8
+ residual_in_fp32: true
9
+ pad_vocab_size_multiple: 8
10
+ mamba_ver: mamba2
11
+ layer:
12
+ d_model: ${model.d_model}
13
+ d_state: 64
14
+ d_conv: 4
15
+ expand: 2
16
+ headdim: 48
17
+ n_classes: null
18
+ dataset:
19
+ __train_len: ${div_up:1_000_000_000, ${.max_len}}
20
+ __l_max: ${.max_len}
21
+ randomize_offset: true
22
+ input_path: ./data/
23
+ max_len: 660
24
+ use_padding: true
25
+ add_eos: false
26
+ rc_aug: true
27
+ phase: pretrain
28
+ classify_level: null
29
+ num_workers: 0
30
+ batch_size: 16
31
+ pretrain_method: ntp
32
+ mask_ratio: 0.5
33
+ tokenizer:
34
+ use_unk_token: true
35
+ k_mer: 6
36
+ padding: true
37
+ padding_side: left
38
+ name: k-mer
39
+ stride: ${.k_mer}
40
+ max_len: ${dataset.max_len}
41
+ vocab_size: ${eval:4 ** ${.k_mer} + 3}
42
+ trainer:
43
+ accelerator: gpu
44
+ devices: -1
45
+ num_nodes: 1
46
+ max_epochs: 50
47
+ gradient_clip_val: 1.0
48
+ fast_dev_run: false
49
+ strategy: ddp
50
+ train:
51
+ logger: wandb
52
+ run_name: null
53
+ gpu_mem: ${eval:"round(float(__import__('subprocess').check_output('nvidia-smi -i
54
+ 0 --query-gpu=memory.total --format=csv,noheader,nounits', shell=True).strip().decode())
55
+ / 1000)"}
56
+ seed: 2222
57
+ global_batch_size: 256
58
+ ckpt: null
59
+ ema: 0.0
60
+ test: true
61
+ interval: step
62
+ monitor: val/loss_epoch
63
+ mode: min
64
+ validate_at_start: false
65
+ pretrained_model_strict_load: false
66
+ pretrained_model_path: null
67
+ scheduler:
68
+ t_in_epochs: false
69
+ t_initial: ${eval:${div_up:${dataset.__train_len}, ${train.global_batch_size}} *
70
+ ${trainer.max_epochs}}
71
+ warmup_lr_init: 1.0e-06
72
+ warmup_t: ${eval:${div_up:${dataset.__train_len}, ${train.global_batch_size}} *
73
+ ${trainer.max_epochs} * 0.01}
74
+ lr_min: ${eval:0.1 * ${optimizer.lr}}
75
+ optimizer:
76
+ lr: 0.0008
77
+ weight_decay: 0.1
78
+ betas:
79
+ - 0.9
80
+ - 0.999
81
+ model_checkpoint:
82
+ monitor: ${train.monitor}
83
+ mode: ${train.mode}
84
+ save_top_k: 1
85
+ save_last: true
86
+ dirpath: checkpoints/
87
+ filename: barcode-mamba-${dataset.phase}-{epoch:02d}
88
+ save_on_train_epoch_end: true
89
+ auto_insert_metric_name: true
90
+ verbose: true
91
+ debug: false
last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f042eae4c1c545ce77ebb80ccd10f2c17353f4bfec4d7b3835857c676bc62ce8
3
+ size 88996614