GTCtech commited on
Commit
a3985eb
1 Parent(s): 6860ef1

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.yaml +95 -0
  2. last.ckpt +3 -0
config.yaml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ d_model: 384
3
+ n_layer: 2
4
+ d_inner: ${eval:4 * ${.d_model}}
5
+ vocab_size: ${tokenizer.vocab_size}
6
+ resid_dropout: 0.0
7
+ embed_dropout: 0.1
8
+ residual_in_fp32: true
9
+ pad_vocab_size_multiple: 8
10
+ mamba_ver: mamba2
11
+ layer:
12
+ d_model: ${model.d_model}
13
+ d_state: 64
14
+ d_conv: 4
15
+ expand: 2
16
+ headdim: 48
17
+ n_classes: null
18
+ dataset:
19
+ __train_len: ${div_up:1_000_000_000, ${.max_len}}
20
+ __l_max: ${.max_len}
21
+ randomize_offset: true
22
+ input_path: ./data/
23
+ max_len: 660
24
+ use_padding: true
25
+ add_eos: false
26
+ rc_aug: true
27
+ phase: pretrain
28
+ classify_level: null
29
+ num_workers: 0
30
+ batch_size: 16
31
+ pretrain_method: ntp
32
+ mask_ratio: 0.5
33
+ tokenizer:
34
+ use_unk_token: true
35
+ name: char
36
+ characters:
37
+ - A
38
+ - C
39
+ - G
40
+ - T
41
+ - 'N'
42
+ model_max_length: ${dataset.max_len} + 2
43
+ add_special_tokens: false
44
+ padding_side: left
45
+ vocab_size: 8
46
+ trainer:
47
+ accelerator: gpu
48
+ devices: -1
49
+ num_nodes: 1
50
+ max_epochs: 50
51
+ gradient_clip_val: 1.0
52
+ fast_dev_run: false
53
+ strategy: ddp
54
+ train:
55
+ logger: wandb
56
+ run_name: null
57
+ gpu_mem: ${eval:"round(float(__import__('subprocess').check_output('nvidia-smi -i
58
+ 0 --query-gpu=memory.total --format=csv,noheader,nounits', shell=True).strip().decode())
59
+ / 1000)"}
60
+ seed: 2222
61
+ global_batch_size: 256
62
+ ckpt: null
63
+ ema: 0.0
64
+ test: true
65
+ interval: step
66
+ monitor: val/loss_epoch
67
+ mode: min
68
+ validate_at_start: false
69
+ pretrained_model_strict_load: false
70
+ pretrained_model_path: null
71
+ scheduler:
72
+ t_in_epochs: false
73
+ t_initial: ${eval:${div_up:${dataset.__train_len}, ${train.global_batch_size}} *
74
+ ${trainer.max_epochs}}
75
+ warmup_lr_init: 1.0e-06
76
+ warmup_t: ${eval:${div_up:${dataset.__train_len}, ${train.global_batch_size}} *
77
+ ${trainer.max_epochs} * 0.01}
78
+ lr_min: ${eval:0.1 * ${optimizer.lr}}
79
+ optimizer:
80
+ lr: 0.0008
81
+ weight_decay: 0.1
82
+ betas:
83
+ - 0.9
84
+ - 0.999
85
+ model_checkpoint:
86
+ monitor: ${train.monitor}
87
+ mode: ${train.mode}
88
+ save_top_k: 1
89
+ save_last: true
90
+ dirpath: checkpoints/
91
+ filename: barcode-mamba-${dataset.phase}-{epoch:02d}
92
+ save_on_train_epoch_end: true
93
+ auto_insert_metric_name: true
94
+ verbose: true
95
+ debug: false
last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:541c34f3f0767d5127a03018b3f42c0da329d49eb3e5cc07f16f38ee29cf697a
3
+ size 51249094