Mahdip72 commited on
Commit
ad77351
·
verified ·
1 Parent(s): f50eece

Upload 3 files

Browse files

feat: kinase group task checkpoint

kinase_group/2025-02-03__23-14-25/checkpoints/best_valid_kinase_group_macro_f1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5c7a6827b9d05527ad0c12deefdb769c75778be28f6abdf64b08624e784d691
3
+ size 4835332462
kinase_group/2025-02-03__23-14-25/config.yaml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fix_seed: 0
2
+ checkpoints_every: 256
3
+ tensorboard_log: True
4
+ tqdm_progress_bar: False
5
+ result_path: ./results/kinase_group/
6
+
7
+ resume:
8
+ enable: False
9
+ resume_path: path/to/checkpoints
10
+ restart_optimizer: True
11
+ restart_steps: True
12
+
13
+ prot2token_model:
14
+ compile_model: True
15
+ positional_encoding_type: learned # absolute, learned
16
+ protein_encoder:
17
+ model_type: esm_v2 # esm_v2, t5
18
+ model_name: facebook/esm2_t33_650M_UR50D # facebook/esm2_t33_650M_UR50D, facebook/esm2_t30_150M_UR50D, facebook/esm2_t12_35M_UR50D, facebook/esm2_t6_8M_UR50D, Rostlab/prot_t5_base_mt_uniref50
19
+ max_len: 1280
20
+ max_label_index: 1280
21
+ drop_positional_encoding: True
22
+ quantization_4_bit: False # use with tune_embedding enable
23
+ tune_embedding: False # only for esm
24
+ fine_tune:
25
+ enable: True
26
+ last_layers_trainable: 6
27
+ lora:
28
+ enable: False
29
+ r: 8
30
+ lora_alpha: 32
31
+ lora_dropout: 0.05
32
+ decoder:
33
+ latest_flash_attention: False
34
+ dimension: 640
35
+ dim_feedforward: 2560
36
+ num_heads: 16
37
+ num_layers: 16
38
+ max_len: 16
39
+ activation_function: gelu
40
+ decoder_context_dropout: 0.0
41
+
42
+ train_settings:
43
+ skip: False
44
+ data_path: ../../datasets/Joint_training/
45
+ num_epochs: 64
46
+ start_metric_epoch: 2
47
+ shuffle: True
48
+ loss: crossentropy # crossentropy or focal
49
+ mixed_precision: bf16 # no, fp16, bf16, fp8
50
+ device: cuda
51
+ batch_size: 16
52
+ num_workers: 4
53
+ grad_accumulation: 1
54
+ max_task_samples: 20000
55
+
56
+ valid_settings:
57
+ data_path: ../../datasets/Joint_training/
58
+ do_every: 1
59
+ batch_size: 1 # 1 when perplexity = False
60
+ perplexity: False
61
+ device: cuda
62
+ num_workers: 2
63
+
64
+ test_settings:
65
+ enable: True
66
+ data_path: ../../datasets/Joint_training/
67
+ batch_size: 1
68
+ device: cuda
69
+ num_workers: 0
70
+ inference_type: inference_greedy
71
+ beam_search:
72
+ top_k: 1
73
+ beam_width: 3
74
+ temperature: 1.0
75
+ monitoring_metrics:
76
+ kinase_group: macro_f1
77
+ kinase_interaction: f1
78
+ kinase_phosphorylation_site: f1
79
+
80
+ tasks:
81
+ kinase_group: True
82
+ kinase_interaction: False
83
+ kinase_phosphorylation_site: False
kinase_group/2025-02-03__23-14-25/decoder_tokenizer.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <bos>: 1
2
+ <eos>: 2
3
+ <pad>: 0
4
+ <task_kinase_group>: 3
5
+ AGC: 7
6
+ Atypical: 10
7
+ CAMK: 8
8
+ CK1: 11
9
+ CMGC: 4
10
+ Other: 5
11
+ STE: 6
12
+ TK: 9
13
+ TKL: 12