|
defaults: |
|
- _self_ |
|
- /callbacks: [checkpoint_every_n_steps, checkpoint_monitor, learning_rate_monitor] |
|
- /model: small |
|
- /strategy: ddp |
|
- /noise: loglinear |
|
- /lr_scheduler: constant_warmup |
|
|
|
mode: sample_eval |
|
diffusion: absorbing_state |
|
backbone: membrane_esm_finetune |
|
parameterization: subs |
|
time_conditioning: False |
|
T: 0 |
|
subs_masking: False |
|
|
|
seed: 42 |
|
|
|
data: |
|
train: |
|
vanilla_esm_train_path: /workspace/sg666/MDpLM/data/uniref50/200k_seqs/train.csv |
|
membrane_esm_train_path: /workspace/sg666/MDpLM/data/membrane/train.csv |
|
wrap: null |
|
test: |
|
vanilla_esm_test_path: /workspace/sg666/MDpLM/data/uniref50/200k_seqs/test.csv |
|
membrane_esm_test_path: /workspace/sg666/MDpLM/data/membrane/test.csv |
|
wrap: null |
|
valid: |
|
vanilla_esm_valid_path: /workspace/sg666/MDpLM/data/uniref50/200k_seqs/val.csv |
|
membrane_esm_valid_path: /workspace/sg666/MDpLM/data/membrane/val.csv |
|
wrap: null |
|
wrapping: True |
|
|
|
loader: |
|
global_batch_size: 8 |
|
eval_global_batch_size: ${.global_batch_size} |
|
|
|
batch_size: ${div_up:${.global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}} |
|
eval_batch_size: ${div_up:${.eval_global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}} |
|
num_workers: ${eval:"len(__import__('os').sched_getaffinity(0))"} |
|
pin_memory: True |
|
|
|
sampling: |
|
predictor: ddpm_cache |
|
steps: 128 |
|
noise_removal: True |
|
|
|
num_sample_batches: 2 |
|
num_sample_log: 2 |
|
semi_ar: False |
|
stride_length: 1 |
|
num_strides: 1 |
|
|
|
training: |
|
ema: 0.9999 |
|
antithetic_sampling: True |
|
importance_sampling: False |
|
sampling_eps: 1e-3 |
|
change_of_variables: False |
|
mlm_model_path: /workspace/sg666/MDpLM/benchmarks/MLM/model_ckpts_650M/best_model_epoch |
|
esm_model_path: facebook/esm2_t30_150M_UR50D |
|
focus_mask: False |
|
|
|
eval: |
|
checkpoint_path: /workspace/sg666/MDpLM/checkpoints/membrane_mdlm/eos-wrapping_epochs60_lr3e-4_200k-seqs_bsz16_all-params_no-compile_gradclip1_beta-one0.9_beta-two0.999_bf16/checkpoints/best.ckpt |
|
disable_ema: False |
|
compute_generative_perplexity: False |
|
perplexity_batch_size: 8 |
|
compute_perplexity_on_sanity: False |
|
gen_ppl_eval_model_name_or_path: gpt2-large |
|
generate_samples: True |
|
generation_model: /workspace/sg666/MDpLM/checkpoints/membrane_automodel/epochs60_lr3e-4_200k-seqs_bsz16_all-params_no-compile_gradclip1_beta-one0.9_beta-two0.999_bf16/ |
|
|
|
optim: |
|
weight_decay: 0.075 |
|
lr: 3e-4 |
|
beta1: 0.9 |
|
beta2: 0.999 |
|
eps: 1e-8 |
|
|
|
Model: |
|
hidden_size: 1280 |
|
cond_dim: 256 |
|
n_heads: 20 |
|
n_blocks: 4 |
|
dropout: 0.5 |
|
length: null |
|
scale_by_sigma: True |
|
|
|
trainer: |
|
_target_: lightning.Trainer |
|
accelerator: cuda |
|
num_nodes: 1 |
|
devices: ${device_count:} |
|
accumulate_grad_batches: ${div_up:${loader.global_batch_size}, ${eval:${trainer.devices} * ${loader.batch_size} * ${trainer.num_nodes}}} |
|
gradient_clip_val: 1.0 |
|
precision: bf16 |
|
num_sanity_val_steps: 2 |
|
max_epochs: 60 |
|
max_steps: 1_000_000 |
|
log_every_n_steps: 10 |
|
limit_train_batches: 1.0 |
|
limit_val_batches: 1.0 |
|
val_check_interval: 955 |
|
|
|
wandb: |
|
project: MDpLM_finetune_membrane_200k-seqs |
|
notes: null |
|
group: programmablebio |
|
job_type: null |
|
name: dit_test |
|
id: ${.name}_${seed} |
|
|
|
hydra: |
|
run: |
|
dir: /workspace/sg666/MDpLM/outputs/${data.train}/${now:%Y.%m.%d}/${now:%H%M%S} |
|
job: |
|
chdir: true |
|
|
|
checkpointing: |
|
|
|
save_dir: /workspace/sg666/MDpLM/checkpoints/membrane_mdlm/ |
|
|
|
resume_from_ckpt: false |
|
resume_ckpt_path: ${.save_dir}/epochs30_lr3e-4_bsz8_gradclip1_beta-one0.9_beta-two0.999_bf16_all-params_no-compile/checkpoints/last.ckpt |
|
pretrained_esm_mdlm_automodel_path: /workspace/sg666/MDpLM/checkpoints/vanilla_esm_pretrained_automodel/epochs10_lr3e-4_200k-seqs_bsz16_all-params_no-compile_gradclip1_beta-one0.9_beta-two0.999_bf16/ |
|
finetuned_esm_mdlm_automodel_path: /workspace/sg666/MDpLM/checkpoints/membrane_mdlm/ |