|
model: |
|
d_model: 384 |
|
n_layer: 2 |
|
d_inner: ${eval:4 * ${.d_model}} |
|
vocab_size: ${tokenizer.vocab_size} |
|
resid_dropout: 0.0 |
|
embed_dropout: 0.1 |
|
residual_in_fp32: true |
|
pad_vocab_size_multiple: 8 |
|
mamba_ver: mamba2 |
|
layer: |
|
d_model: ${model.d_model} |
|
d_state: 64 |
|
d_conv: 4 |
|
expand: 2 |
|
headdim: 48 |
|
n_classes: null |
|
dataset: |
|
__train_len: ${div_up:1_000_000_000, ${.max_len}} |
|
__l_max: ${.max_len} |
|
randomize_offset: true |
|
input_path: ./data/ |
|
max_len: 660 |
|
use_padding: true |
|
add_eos: false |
|
rc_aug: true |
|
phase: pretrain |
|
classify_level: null |
|
num_workers: 0 |
|
batch_size: 16 |
|
pretrain_method: ntp |
|
mask_ratio: 0.5 |
|
tokenizer: |
|
use_unk_token: true |
|
name: char |
|
characters: |
|
- A |
|
- C |
|
- G |
|
- T |
|
- 'N' |
|
model_max_length: ${dataset.max_len} + 2 |
|
add_special_tokens: false |
|
padding_side: left |
|
vocab_size: 8 |
|
trainer: |
|
accelerator: gpu |
|
devices: -1 |
|
num_nodes: 1 |
|
max_epochs: 50 |
|
gradient_clip_val: 1.0 |
|
fast_dev_run: false |
|
strategy: ddp |
|
train: |
|
logger: wandb |
|
run_name: null |
|
gpu_mem: ${eval:"round(float(__import__('subprocess').check_output('nvidia-smi -i |
|
0 --query-gpu=memory.total --format=csv,noheader,nounits', shell=True).strip().decode()) |
|
/ 1000)"} |
|
seed: 2222 |
|
global_batch_size: 256 |
|
ckpt: null |
|
ema: 0.0 |
|
test: true |
|
interval: step |
|
monitor: val/loss_epoch |
|
mode: min |
|
validate_at_start: false |
|
pretrained_model_strict_load: false |
|
pretrained_model_path: null |
|
scheduler: |
|
t_in_epochs: false |
|
t_initial: ${eval:${div_up:${dataset.__train_len}, ${train.global_batch_size}} * |
|
${trainer.max_epochs}} |
|
warmup_lr_init: 1.0e-06 |
|
warmup_t: ${eval:${div_up:${dataset.__train_len}, ${train.global_batch_size}} * |
|
${trainer.max_epochs} * 0.01} |
|
lr_min: ${eval:0.1 * ${optimizer.lr}} |
|
optimizer: |
|
lr: 0.0008 |
|
weight_decay: 0.1 |
|
betas: |
|
- 0.9 |
|
- 0.999 |
|
model_checkpoint: |
|
monitor: ${train.monitor} |
|
mode: ${train.mode} |
|
save_top_k: 1 |
|
save_last: true |
|
dirpath: checkpoints/ |
|
filename: barcode-mamba-${dataset.phase}-{epoch:02d} |
|
save_on_train_epoch_end: true |
|
auto_insert_metric_name: true |
|
verbose: true |
|
debug: false |
|
|