Upload 3 files

Browse files

feat: kinase group task checkpoint

Files changed (3) hide show

kinase_group/2025-02-03__23-14-25/checkpoints/best_valid_kinase_group_macro_f1.pth +3 -0
kinase_group/2025-02-03__23-14-25/config.yaml +83 -0
kinase_group/2025-02-03__23-14-25/decoder_tokenizer.yaml +13 -0

kinase_group/2025-02-03__23-14-25/checkpoints/best_valid_kinase_group_macro_f1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5c7a6827b9d05527ad0c12deefdb769c75778be28f6abdf64b08624e784d691
+size 4835332462

kinase_group/2025-02-03__23-14-25/config.yaml ADDED Viewed

	@@ -0,0 +1,83 @@

+fix_seed: 0
+checkpoints_every: 256
+tensorboard_log: True
+tqdm_progress_bar: False
+result_path: ./results/kinase_group/
+resume:
+  enable: False
+  resume_path: path/to/checkpoints
+  restart_optimizer: True
+  restart_steps: True
+prot2token_model:
+  compile_model: True
+  positional_encoding_type: learned # absolute, learned
+  protein_encoder:
+    model_type: esm_v2 # esm_v2, t5
+    model_name:  facebook/esm2_t33_650M_UR50D # facebook/esm2_t33_650M_UR50D, facebook/esm2_t30_150M_UR50D, facebook/esm2_t12_35M_UR50D, facebook/esm2_t6_8M_UR50D, Rostlab/prot_t5_base_mt_uniref50
+    max_len: 1280
+    max_label_index: 1280
+    drop_positional_encoding: True
+    quantization_4_bit: False # use with tune_embedding enable
+    tune_embedding: False # only for esm
+    fine_tune:
+      enable: True
+      last_layers_trainable: 6
+    lora:
+      enable: False
+      r: 8
+      lora_alpha: 32
+      lora_dropout: 0.05
+  decoder:
+    latest_flash_attention: False
+    dimension: 640
+    dim_feedforward: 2560
+    num_heads: 16
+    num_layers: 16
+    max_len: 16
+    activation_function: gelu
+    decoder_context_dropout: 0.0
+train_settings:
+  skip: False
+  data_path: ../../datasets/Joint_training/
+  num_epochs: 64
+  start_metric_epoch: 2
+  shuffle: True
+  loss: crossentropy # crossentropy or focal
+  mixed_precision: bf16 # no, fp16, bf16, fp8
+  device: cuda
+  batch_size: 16
+  num_workers: 4
+  grad_accumulation: 1
+  max_task_samples: 20000
+valid_settings:
+  data_path: ../../datasets/Joint_training/
+  do_every: 1
+  batch_size: 1 # 1 when perplexity = False
+  perplexity: False
+  device: cuda
+  num_workers: 2
+test_settings:
+  enable: True
+  data_path: ../../datasets/Joint_training/
+  batch_size: 1
+  device: cuda
+  num_workers: 0
+  inference_type: inference_greedy
+  beam_search:
+    top_k: 1
+    beam_width: 3
+    temperature: 1.0
+  monitoring_metrics:
+    kinase_group: macro_f1
+    kinase_interaction: f1
+    kinase_phosphorylation_site: f1
+tasks:
+  kinase_group: True
+  kinase_interaction: False
+  kinase_phosphorylation_site: False

kinase_group/2025-02-03__23-14-25/decoder_tokenizer.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+<bos>: 1
+<eos>: 2
+<pad>: 0
+<task_kinase_group>: 3
+AGC: 7
+Atypical: 10
+CAMK: 8
+CK1: 11
+CMGC: 4
+Other: 5
+STE: 6
+TK: 9
+TKL: 12