Ning Sun commited on
Commit
88a320b
·
1 Parent(s): bcb1fbf

initial upload

Browse files
fold0/config.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # lightning.pytorch==2.4.0
2
+ seed_everything: 42
3
+ trainer:
4
+ accelerator: auto
5
+ strategy:
6
+ class_path: lightning.pytorch.strategies.DDPStrategy
7
+ init_args:
8
+ accelerator: null
9
+ parallel_devices: null
10
+ cluster_environment: null
11
+ checkpoint_io: null
12
+ precision_plugin: null
13
+ ddp_comm_state: null
14
+ ddp_comm_hook: null
15
+ ddp_comm_wrapper: null
16
+ model_averaging_period: null
17
+ process_group_backend: null
18
+ timeout: 0:30:00
19
+ start_method: popen
20
+ output_device: null
21
+ dim: 0
22
+ broadcast_buffers: true
23
+ process_group: null
24
+ bucket_cap_mb: 25
25
+ find_unused_parameters: false
26
+ check_reduction: false
27
+ gradient_as_bucket_view: false
28
+ static_graph: false
29
+ delay_all_reduce_named_params: null
30
+ param_to_hook_all_reduce: null
31
+ mixed_precision: null
32
+ device_mesh: null
33
+ devices: 4
34
+ num_nodes: 2
35
+ precision: 32
36
+ logger:
37
+ class_path: lightning.pytorch.loggers.WandbLogger
38
+ init_args:
39
+ name: Q53Z42_HUMAN_McShan_2019_binding-TAPBPR
40
+ save_dir: logs
41
+ version: null
42
+ offline: false
43
+ dir: null
44
+ id: null
45
+ anonymous: null
46
+ project: GBFT_PROTEINFM_DMS
47
+ log_model: false
48
+ experiment: null
49
+ prefix: ''
50
+ checkpoint_name: null
51
+ job_type: null
52
+ config: null
53
+ entity: null
54
+ reinit: null
55
+ tags: null
56
+ group: null
57
+ notes: null
58
+ magic: null
59
+ config_exclude_keys: null
60
+ config_include_keys: null
61
+ mode: null
62
+ allow_val_change: null
63
+ resume: null
64
+ force: null
65
+ tensorboard: null
66
+ sync_tensorboard: null
67
+ monitor_gym: null
68
+ save_code: true
69
+ settings: null
70
+ callbacks:
71
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
72
+ init_args:
73
+ logging_interval: step
74
+ log_momentum: false
75
+ log_weight_decay: false
76
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
77
+ init_args:
78
+ dirpath: null
79
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
80
+ monitor: val_spearman
81
+ verbose: false
82
+ save_last: true
83
+ save_top_k: 1
84
+ save_weights_only: false
85
+ mode: max
86
+ auto_insert_metric_name: true
87
+ every_n_train_steps: null
88
+ train_time_interval: null
89
+ every_n_epochs: 1
90
+ save_on_train_epoch_end: null
91
+ enable_version_counter: true
92
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
93
+ dict_kwargs:
94
+ monitor: val_spearman
95
+ mode: max
96
+ patience: 5
97
+ fast_dev_run: false
98
+ max_epochs: 100
99
+ min_epochs: null
100
+ max_steps: null
101
+ min_steps: null
102
+ max_time: null
103
+ limit_train_batches: null
104
+ limit_val_batches: null
105
+ limit_test_batches: null
106
+ limit_predict_batches: null
107
+ overfit_batches: 0.0
108
+ val_check_interval: null
109
+ check_val_every_n_epoch: 1
110
+ num_sanity_val_steps: null
111
+ log_every_n_steps: 50
112
+ enable_checkpointing: null
113
+ enable_progress_bar: null
114
+ enable_model_summary: null
115
+ accumulate_grad_batches: 1
116
+ gradient_clip_val: 0.1
117
+ gradient_clip_algorithm: null
118
+ deterministic: null
119
+ benchmark: null
120
+ inference_mode: true
121
+ use_distributed_sampler: true
122
+ profiler:
123
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
124
+ init_args:
125
+ dirpath: null
126
+ filename: null
127
+ group_by_input_shapes: false
128
+ emit_nvtx: false
129
+ export_to_chrome: true
130
+ row_limit: 20
131
+ sort_by_key: null
132
+ record_module_names: true
133
+ table_kwargs: null
134
+ record_shapes: false
135
+ dict_kwargs:
136
+ profile_memory: true
137
+ detect_anomaly: false
138
+ barebones: false
139
+ plugins: null
140
+ sync_batchnorm: false
141
+ reload_dataloaders_every_n_epochs: 0
142
+ default_root_dir: logs
143
+ model:
144
+ class_path: genbio_finetune.tasks.SequenceRegression
145
+ init_args:
146
+ backbone:
147
+ class_path: genbio_finetune.models.proteinfm_v1
148
+ init_args:
149
+ from_scratch: false
150
+ use_peft: true
151
+ save_peft_only: true
152
+ lora_r: 16
153
+ lora_alpha: 32
154
+ lora_dropout: 0.05
155
+ config_overwrites: null
156
+ model_init_args: null
157
+ max_length: 2048
158
+ adapter:
159
+ class_path: genbio_finetune.models.MLPPoolAdapter
160
+ init_args:
161
+ pooling: mean_pooling
162
+ hidden_sizes:
163
+ - 128
164
+ bias: true
165
+ dropout: 0.1
166
+ dropout_in_middle: false
167
+ optimizer:
168
+ class_path: torch.optim.AdamW
169
+ init_args:
170
+ lr: 0.0001
171
+ betas:
172
+ - 0.9
173
+ - 0.95
174
+ eps: 1.0e-08
175
+ weight_decay: 0.01
176
+ amsgrad: false
177
+ maximize: false
178
+ foreach: null
179
+ capturable: false
180
+ differentiable: false
181
+ fused: null
182
+ lr_scheduler:
183
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
184
+ init_args:
185
+ warmup_ratio: 0.05
186
+ strict_loading: true
187
+ reset_optimizer_states: false
188
+ data:
189
+ class_path: genbio_finetune.data.DMSFitnessPrediction
190
+ init_args:
191
+ path: genbio-ai/ProteinGYM-DMS
192
+ train_split_files:
193
+ - singles_substitutions/Q53Z42_HUMAN_McShan_2019_binding-TAPBPR.tsv
194
+ normalize: true
195
+ train_split_name: 'train'
196
+ test_split_files: null
197
+ valid_split_files: null
198
+ random_seed: 42
199
+ batch_size: 2
200
+ shuffle: true
201
+ sampler: null
202
+ num_workers: 0
203
+ pin_memory: true
204
+ persistent_workers: false
205
+ cv_num_folds: 5
206
+ cv_test_fold_id: 0
207
+ cv_enable_val_fold: true
208
+ cv_fold_id_col: fold_id
209
+ ckpt_path: null
fold0/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:300736b910f48a946049ff4fc793b1da88b48bb666ea8b87e953e204b947f452
3
+ size 147655256
fold1/config.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # lightning.pytorch==2.4.0
2
+ seed_everything: 42
3
+ trainer:
4
+ accelerator: auto
5
+ strategy:
6
+ class_path: lightning.pytorch.strategies.DDPStrategy
7
+ init_args:
8
+ accelerator: null
9
+ parallel_devices: null
10
+ cluster_environment: null
11
+ checkpoint_io: null
12
+ precision_plugin: null
13
+ ddp_comm_state: null
14
+ ddp_comm_hook: null
15
+ ddp_comm_wrapper: null
16
+ model_averaging_period: null
17
+ process_group_backend: null
18
+ timeout: 0:30:00
19
+ start_method: popen
20
+ output_device: null
21
+ dim: 0
22
+ broadcast_buffers: true
23
+ process_group: null
24
+ bucket_cap_mb: 25
25
+ find_unused_parameters: false
26
+ check_reduction: false
27
+ gradient_as_bucket_view: false
28
+ static_graph: false
29
+ delay_all_reduce_named_params: null
30
+ param_to_hook_all_reduce: null
31
+ mixed_precision: null
32
+ device_mesh: null
33
+ devices: 4
34
+ num_nodes: 2
35
+ precision: 32
36
+ logger:
37
+ class_path: lightning.pytorch.loggers.WandbLogger
38
+ init_args:
39
+ name: Q53Z42_HUMAN_McShan_2019_binding-TAPBPR
40
+ save_dir: logs
41
+ version: null
42
+ offline: false
43
+ dir: null
44
+ id: null
45
+ anonymous: null
46
+ project: GBFT_PROTEINFM_DMS
47
+ log_model: false
48
+ experiment: null
49
+ prefix: ''
50
+ checkpoint_name: null
51
+ job_type: null
52
+ config: null
53
+ entity: null
54
+ reinit: null
55
+ tags: null
56
+ group: null
57
+ notes: null
58
+ magic: null
59
+ config_exclude_keys: null
60
+ config_include_keys: null
61
+ mode: null
62
+ allow_val_change: null
63
+ resume: null
64
+ force: null
65
+ tensorboard: null
66
+ sync_tensorboard: null
67
+ monitor_gym: null
68
+ save_code: true
69
+ settings: null
70
+ callbacks:
71
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
72
+ init_args:
73
+ logging_interval: step
74
+ log_momentum: false
75
+ log_weight_decay: false
76
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
77
+ init_args:
78
+ dirpath: null
79
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
80
+ monitor: val_spearman
81
+ verbose: false
82
+ save_last: true
83
+ save_top_k: 1
84
+ save_weights_only: false
85
+ mode: max
86
+ auto_insert_metric_name: true
87
+ every_n_train_steps: null
88
+ train_time_interval: null
89
+ every_n_epochs: 1
90
+ save_on_train_epoch_end: null
91
+ enable_version_counter: true
92
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
93
+ dict_kwargs:
94
+ monitor: val_spearman
95
+ mode: max
96
+ patience: 5
97
+ fast_dev_run: false
98
+ max_epochs: 100
99
+ min_epochs: null
100
+ max_steps: null
101
+ min_steps: null
102
+ max_time: null
103
+ limit_train_batches: null
104
+ limit_val_batches: null
105
+ limit_test_batches: null
106
+ limit_predict_batches: null
107
+ overfit_batches: 0.0
108
+ val_check_interval: null
109
+ check_val_every_n_epoch: 1
110
+ num_sanity_val_steps: null
111
+ log_every_n_steps: 50
112
+ enable_checkpointing: null
113
+ enable_progress_bar: null
114
+ enable_model_summary: null
115
+ accumulate_grad_batches: 1
116
+ gradient_clip_val: 0.1
117
+ gradient_clip_algorithm: null
118
+ deterministic: null
119
+ benchmark: null
120
+ inference_mode: true
121
+ use_distributed_sampler: true
122
+ profiler:
123
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
124
+ init_args:
125
+ dirpath: null
126
+ filename: null
127
+ group_by_input_shapes: false
128
+ emit_nvtx: false
129
+ export_to_chrome: true
130
+ row_limit: 20
131
+ sort_by_key: null
132
+ record_module_names: true
133
+ table_kwargs: null
134
+ record_shapes: false
135
+ dict_kwargs:
136
+ profile_memory: true
137
+ detect_anomaly: false
138
+ barebones: false
139
+ plugins: null
140
+ sync_batchnorm: false
141
+ reload_dataloaders_every_n_epochs: 0
142
+ default_root_dir: logs
143
+ model:
144
+ class_path: genbio_finetune.tasks.SequenceRegression
145
+ init_args:
146
+ backbone:
147
+ class_path: genbio_finetune.models.proteinfm_v1
148
+ init_args:
149
+ from_scratch: false
150
+ use_peft: true
151
+ save_peft_only: true
152
+ lora_r: 16
153
+ lora_alpha: 32
154
+ lora_dropout: 0.05
155
+ config_overwrites: null
156
+ model_init_args: null
157
+ max_length: 2048
158
+ adapter:
159
+ class_path: genbio_finetune.models.MLPPoolAdapter
160
+ init_args:
161
+ pooling: mean_pooling
162
+ hidden_sizes:
163
+ - 128
164
+ bias: true
165
+ dropout: 0.1
166
+ dropout_in_middle: false
167
+ optimizer:
168
+ class_path: torch.optim.AdamW
169
+ init_args:
170
+ lr: 0.0001
171
+ betas:
172
+ - 0.9
173
+ - 0.95
174
+ eps: 1.0e-08
175
+ weight_decay: 0.01
176
+ amsgrad: false
177
+ maximize: false
178
+ foreach: null
179
+ capturable: false
180
+ differentiable: false
181
+ fused: null
182
+ lr_scheduler:
183
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
184
+ init_args:
185
+ warmup_ratio: 0.05
186
+ strict_loading: true
187
+ reset_optimizer_states: false
188
+ data:
189
+ class_path: genbio_finetune.data.DMSFitnessPrediction
190
+ init_args:
191
+ path: genbio-ai/ProteinGYM-DMS
192
+ train_split_files:
193
+ - singles_substitutions/Q53Z42_HUMAN_McShan_2019_binding-TAPBPR.tsv
194
+ normalize: true
195
+ train_split_name: 'train'
196
+ test_split_files: null
197
+ valid_split_files: null
198
+ random_seed: 42
199
+ batch_size: 2
200
+ shuffle: true
201
+ sampler: null
202
+ num_workers: 0
203
+ pin_memory: true
204
+ persistent_workers: false
205
+ cv_num_folds: 5
206
+ cv_test_fold_id: 1
207
+ cv_enable_val_fold: true
208
+ cv_fold_id_col: fold_id
209
+ ckpt_path: null
fold1/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6d7e5e275fb30ab9a70fda362cbdad708c5584a7eaf66f8276a665685f9ba5b
3
+ size 147655256
fold2/config.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # lightning.pytorch==2.4.0
2
+ seed_everything: 42
3
+ trainer:
4
+ accelerator: auto
5
+ strategy:
6
+ class_path: lightning.pytorch.strategies.DDPStrategy
7
+ init_args:
8
+ accelerator: null
9
+ parallel_devices: null
10
+ cluster_environment: null
11
+ checkpoint_io: null
12
+ precision_plugin: null
13
+ ddp_comm_state: null
14
+ ddp_comm_hook: null
15
+ ddp_comm_wrapper: null
16
+ model_averaging_period: null
17
+ process_group_backend: null
18
+ timeout: 0:30:00
19
+ start_method: popen
20
+ output_device: null
21
+ dim: 0
22
+ broadcast_buffers: true
23
+ process_group: null
24
+ bucket_cap_mb: 25
25
+ find_unused_parameters: false
26
+ check_reduction: false
27
+ gradient_as_bucket_view: false
28
+ static_graph: false
29
+ delay_all_reduce_named_params: null
30
+ param_to_hook_all_reduce: null
31
+ mixed_precision: null
32
+ device_mesh: null
33
+ devices: 4
34
+ num_nodes: 2
35
+ precision: 32
36
+ logger:
37
+ class_path: lightning.pytorch.loggers.WandbLogger
38
+ init_args:
39
+ name: Q53Z42_HUMAN_McShan_2019_binding-TAPBPR
40
+ save_dir: logs
41
+ version: null
42
+ offline: false
43
+ dir: null
44
+ id: null
45
+ anonymous: null
46
+ project: GBFT_PROTEINFM_DMS
47
+ log_model: false
48
+ experiment: null
49
+ prefix: ''
50
+ checkpoint_name: null
51
+ job_type: null
52
+ config: null
53
+ entity: null
54
+ reinit: null
55
+ tags: null
56
+ group: null
57
+ notes: null
58
+ magic: null
59
+ config_exclude_keys: null
60
+ config_include_keys: null
61
+ mode: null
62
+ allow_val_change: null
63
+ resume: null
64
+ force: null
65
+ tensorboard: null
66
+ sync_tensorboard: null
67
+ monitor_gym: null
68
+ save_code: true
69
+ settings: null
70
+ callbacks:
71
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
72
+ init_args:
73
+ logging_interval: step
74
+ log_momentum: false
75
+ log_weight_decay: false
76
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
77
+ init_args:
78
+ dirpath: null
79
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
80
+ monitor: val_spearman
81
+ verbose: false
82
+ save_last: true
83
+ save_top_k: 1
84
+ save_weights_only: false
85
+ mode: max
86
+ auto_insert_metric_name: true
87
+ every_n_train_steps: null
88
+ train_time_interval: null
89
+ every_n_epochs: 1
90
+ save_on_train_epoch_end: null
91
+ enable_version_counter: true
92
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
93
+ dict_kwargs:
94
+ monitor: val_spearman
95
+ mode: max
96
+ patience: 5
97
+ fast_dev_run: false
98
+ max_epochs: 100
99
+ min_epochs: null
100
+ max_steps: null
101
+ min_steps: null
102
+ max_time: null
103
+ limit_train_batches: null
104
+ limit_val_batches: null
105
+ limit_test_batches: null
106
+ limit_predict_batches: null
107
+ overfit_batches: 0.0
108
+ val_check_interval: null
109
+ check_val_every_n_epoch: 1
110
+ num_sanity_val_steps: null
111
+ log_every_n_steps: 50
112
+ enable_checkpointing: null
113
+ enable_progress_bar: null
114
+ enable_model_summary: null
115
+ accumulate_grad_batches: 1
116
+ gradient_clip_val: 0.1
117
+ gradient_clip_algorithm: null
118
+ deterministic: null
119
+ benchmark: null
120
+ inference_mode: true
121
+ use_distributed_sampler: true
122
+ profiler:
123
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
124
+ init_args:
125
+ dirpath: null
126
+ filename: null
127
+ group_by_input_shapes: false
128
+ emit_nvtx: false
129
+ export_to_chrome: true
130
+ row_limit: 20
131
+ sort_by_key: null
132
+ record_module_names: true
133
+ table_kwargs: null
134
+ record_shapes: false
135
+ dict_kwargs:
136
+ profile_memory: true
137
+ detect_anomaly: false
138
+ barebones: false
139
+ plugins: null
140
+ sync_batchnorm: false
141
+ reload_dataloaders_every_n_epochs: 0
142
+ default_root_dir: logs
143
+ model:
144
+ class_path: genbio_finetune.tasks.SequenceRegression
145
+ init_args:
146
+ backbone:
147
+ class_path: genbio_finetune.models.proteinfm_v1
148
+ init_args:
149
+ from_scratch: false
150
+ use_peft: true
151
+ save_peft_only: true
152
+ lora_r: 16
153
+ lora_alpha: 32
154
+ lora_dropout: 0.05
155
+ config_overwrites: null
156
+ model_init_args: null
157
+ max_length: 2048
158
+ adapter:
159
+ class_path: genbio_finetune.models.MLPPoolAdapter
160
+ init_args:
161
+ pooling: mean_pooling
162
+ hidden_sizes:
163
+ - 128
164
+ bias: true
165
+ dropout: 0.1
166
+ dropout_in_middle: false
167
+ optimizer:
168
+ class_path: torch.optim.AdamW
169
+ init_args:
170
+ lr: 0.0001
171
+ betas:
172
+ - 0.9
173
+ - 0.95
174
+ eps: 1.0e-08
175
+ weight_decay: 0.01
176
+ amsgrad: false
177
+ maximize: false
178
+ foreach: null
179
+ capturable: false
180
+ differentiable: false
181
+ fused: null
182
+ lr_scheduler:
183
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
184
+ init_args:
185
+ warmup_ratio: 0.05
186
+ strict_loading: true
187
+ reset_optimizer_states: false
188
+ data:
189
+ class_path: genbio_finetune.data.DMSFitnessPrediction
190
+ init_args:
191
+ path: genbio-ai/ProteinGYM-DMS
192
+ train_split_files:
193
+ - singles_substitutions/Q53Z42_HUMAN_McShan_2019_binding-TAPBPR.tsv
194
+ normalize: true
195
+ train_split_name: 'train'
196
+ test_split_files: null
197
+ valid_split_files: null
198
+ random_seed: 42
199
+ batch_size: 2
200
+ shuffle: true
201
+ sampler: null
202
+ num_workers: 0
203
+ pin_memory: true
204
+ persistent_workers: false
205
+ cv_num_folds: 5
206
+ cv_test_fold_id: 2
207
+ cv_enable_val_fold: true
208
+ cv_fold_id_col: fold_id
209
+ ckpt_path: null
fold2/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae67ad1fde69f146f30eefd8575a9aaa67589e3c43c2d2bf12b551ec1075976a
3
+ size 147655256
fold3/config.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # lightning.pytorch==2.4.0
2
+ seed_everything: 42
3
+ trainer:
4
+ accelerator: auto
5
+ strategy:
6
+ class_path: lightning.pytorch.strategies.DDPStrategy
7
+ init_args:
8
+ accelerator: null
9
+ parallel_devices: null
10
+ cluster_environment: null
11
+ checkpoint_io: null
12
+ precision_plugin: null
13
+ ddp_comm_state: null
14
+ ddp_comm_hook: null
15
+ ddp_comm_wrapper: null
16
+ model_averaging_period: null
17
+ process_group_backend: null
18
+ timeout: 0:30:00
19
+ start_method: popen
20
+ output_device: null
21
+ dim: 0
22
+ broadcast_buffers: true
23
+ process_group: null
24
+ bucket_cap_mb: 25
25
+ find_unused_parameters: false
26
+ check_reduction: false
27
+ gradient_as_bucket_view: false
28
+ static_graph: false
29
+ delay_all_reduce_named_params: null
30
+ param_to_hook_all_reduce: null
31
+ mixed_precision: null
32
+ device_mesh: null
33
+ devices: 4
34
+ num_nodes: 2
35
+ precision: 32
36
+ logger:
37
+ class_path: lightning.pytorch.loggers.WandbLogger
38
+ init_args:
39
+ name: Q53Z42_HUMAN_McShan_2019_binding-TAPBPR
40
+ save_dir: logs
41
+ version: null
42
+ offline: false
43
+ dir: null
44
+ id: null
45
+ anonymous: null
46
+ project: GBFT_PROTEINFM_DMS
47
+ log_model: false
48
+ experiment: null
49
+ prefix: ''
50
+ checkpoint_name: null
51
+ job_type: null
52
+ config: null
53
+ entity: null
54
+ reinit: null
55
+ tags: null
56
+ group: null
57
+ notes: null
58
+ magic: null
59
+ config_exclude_keys: null
60
+ config_include_keys: null
61
+ mode: null
62
+ allow_val_change: null
63
+ resume: null
64
+ force: null
65
+ tensorboard: null
66
+ sync_tensorboard: null
67
+ monitor_gym: null
68
+ save_code: true
69
+ settings: null
70
+ callbacks:
71
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
72
+ init_args:
73
+ logging_interval: step
74
+ log_momentum: false
75
+ log_weight_decay: false
76
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
77
+ init_args:
78
+ dirpath: null
79
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
80
+ monitor: val_spearman
81
+ verbose: false
82
+ save_last: true
83
+ save_top_k: 1
84
+ save_weights_only: false
85
+ mode: max
86
+ auto_insert_metric_name: true
87
+ every_n_train_steps: null
88
+ train_time_interval: null
89
+ every_n_epochs: 1
90
+ save_on_train_epoch_end: null
91
+ enable_version_counter: true
92
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
93
+ dict_kwargs:
94
+ monitor: val_spearman
95
+ mode: max
96
+ patience: 5
97
+ fast_dev_run: false
98
+ max_epochs: 100
99
+ min_epochs: null
100
+ max_steps: null
101
+ min_steps: null
102
+ max_time: null
103
+ limit_train_batches: null
104
+ limit_val_batches: null
105
+ limit_test_batches: null
106
+ limit_predict_batches: null
107
+ overfit_batches: 0.0
108
+ val_check_interval: null
109
+ check_val_every_n_epoch: 1
110
+ num_sanity_val_steps: null
111
+ log_every_n_steps: 50
112
+ enable_checkpointing: null
113
+ enable_progress_bar: null
114
+ enable_model_summary: null
115
+ accumulate_grad_batches: 1
116
+ gradient_clip_val: 0.1
117
+ gradient_clip_algorithm: null
118
+ deterministic: null
119
+ benchmark: null
120
+ inference_mode: true
121
+ use_distributed_sampler: true
122
+ profiler:
123
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
124
+ init_args:
125
+ dirpath: null
126
+ filename: null
127
+ group_by_input_shapes: false
128
+ emit_nvtx: false
129
+ export_to_chrome: true
130
+ row_limit: 20
131
+ sort_by_key: null
132
+ record_module_names: true
133
+ table_kwargs: null
134
+ record_shapes: false
135
+ dict_kwargs:
136
+ profile_memory: true
137
+ detect_anomaly: false
138
+ barebones: false
139
+ plugins: null
140
+ sync_batchnorm: false
141
+ reload_dataloaders_every_n_epochs: 0
142
+ default_root_dir: logs
143
+ model:
144
+ class_path: genbio_finetune.tasks.SequenceRegression
145
+ init_args:
146
+ backbone:
147
+ class_path: genbio_finetune.models.proteinfm_v1
148
+ init_args:
149
+ from_scratch: false
150
+ use_peft: true
151
+ save_peft_only: true
152
+ lora_r: 16
153
+ lora_alpha: 32
154
+ lora_dropout: 0.05
155
+ config_overwrites: null
156
+ model_init_args: null
157
+ max_length: 2048
158
+ adapter:
159
+ class_path: genbio_finetune.models.MLPPoolAdapter
160
+ init_args:
161
+ pooling: mean_pooling
162
+ hidden_sizes:
163
+ - 128
164
+ bias: true
165
+ dropout: 0.1
166
+ dropout_in_middle: false
167
+ optimizer:
168
+ class_path: torch.optim.AdamW
169
+ init_args:
170
+ lr: 0.0001
171
+ betas:
172
+ - 0.9
173
+ - 0.95
174
+ eps: 1.0e-08
175
+ weight_decay: 0.01
176
+ amsgrad: false
177
+ maximize: false
178
+ foreach: null
179
+ capturable: false
180
+ differentiable: false
181
+ fused: null
182
+ lr_scheduler:
183
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
184
+ init_args:
185
+ warmup_ratio: 0.05
186
+ strict_loading: true
187
+ reset_optimizer_states: false
188
+ data:
189
+ class_path: genbio_finetune.data.DMSFitnessPrediction
190
+ init_args:
191
+ path: genbio-ai/ProteinGYM-DMS
192
+ train_split_files:
193
+ - singles_substitutions/Q53Z42_HUMAN_McShan_2019_binding-TAPBPR.tsv
194
+ normalize: true
195
+ train_split_name: 'train'
196
+ test_split_files: null
197
+ valid_split_files: null
198
+ random_seed: 42
199
+ batch_size: 2
200
+ shuffle: true
201
+ sampler: null
202
+ num_workers: 0
203
+ pin_memory: true
204
+ persistent_workers: false
205
+ cv_num_folds: 5
206
+ cv_test_fold_id: 3
207
+ cv_enable_val_fold: true
208
+ cv_fold_id_col: fold_id
209
+ ckpt_path: null
fold3/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b08d6e8e61943f2dc44eeb34a11a8623921bc9cb041c5c263a0554100ea62b6
3
+ size 147655256
fold4/config.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # lightning.pytorch==2.4.0
2
+ seed_everything: 42
3
+ trainer:
4
+ accelerator: auto
5
+ strategy:
6
+ class_path: lightning.pytorch.strategies.DDPStrategy
7
+ init_args:
8
+ accelerator: null
9
+ parallel_devices: null
10
+ cluster_environment: null
11
+ checkpoint_io: null
12
+ precision_plugin: null
13
+ ddp_comm_state: null
14
+ ddp_comm_hook: null
15
+ ddp_comm_wrapper: null
16
+ model_averaging_period: null
17
+ process_group_backend: null
18
+ timeout: 0:30:00
19
+ start_method: popen
20
+ output_device: null
21
+ dim: 0
22
+ broadcast_buffers: true
23
+ process_group: null
24
+ bucket_cap_mb: 25
25
+ find_unused_parameters: false
26
+ check_reduction: false
27
+ gradient_as_bucket_view: false
28
+ static_graph: false
29
+ delay_all_reduce_named_params: null
30
+ param_to_hook_all_reduce: null
31
+ mixed_precision: null
32
+ device_mesh: null
33
+ devices: 4
34
+ num_nodes: 2
35
+ precision: 32
36
+ logger:
37
+ class_path: lightning.pytorch.loggers.WandbLogger
38
+ init_args:
39
+ name: Q53Z42_HUMAN_McShan_2019_binding-TAPBPR
40
+ save_dir: logs
41
+ version: null
42
+ offline: false
43
+ dir: null
44
+ id: null
45
+ anonymous: null
46
+ project: GBFT_PROTEINFM_DMS
47
+ log_model: false
48
+ experiment: null
49
+ prefix: ''
50
+ checkpoint_name: null
51
+ job_type: null
52
+ config: null
53
+ entity: null
54
+ reinit: null
55
+ tags: null
56
+ group: null
57
+ notes: null
58
+ magic: null
59
+ config_exclude_keys: null
60
+ config_include_keys: null
61
+ mode: null
62
+ allow_val_change: null
63
+ resume: null
64
+ force: null
65
+ tensorboard: null
66
+ sync_tensorboard: null
67
+ monitor_gym: null
68
+ save_code: true
69
+ settings: null
70
+ callbacks:
71
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
72
+ init_args:
73
+ logging_interval: step
74
+ log_momentum: false
75
+ log_weight_decay: false
76
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
77
+ init_args:
78
+ dirpath: null
79
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
80
+ monitor: val_spearman
81
+ verbose: false
82
+ save_last: true
83
+ save_top_k: 1
84
+ save_weights_only: false
85
+ mode: max
86
+ auto_insert_metric_name: true
87
+ every_n_train_steps: null
88
+ train_time_interval: null
89
+ every_n_epochs: 1
90
+ save_on_train_epoch_end: null
91
+ enable_version_counter: true
92
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
93
+ dict_kwargs:
94
+ monitor: val_spearman
95
+ mode: max
96
+ patience: 5
97
+ fast_dev_run: false
98
+ max_epochs: 100
99
+ min_epochs: null
100
+ max_steps: null
101
+ min_steps: null
102
+ max_time: null
103
+ limit_train_batches: null
104
+ limit_val_batches: null
105
+ limit_test_batches: null
106
+ limit_predict_batches: null
107
+ overfit_batches: 0.0
108
+ val_check_interval: null
109
+ check_val_every_n_epoch: 1
110
+ num_sanity_val_steps: null
111
+ log_every_n_steps: 50
112
+ enable_checkpointing: null
113
+ enable_progress_bar: null
114
+ enable_model_summary: null
115
+ accumulate_grad_batches: 1
116
+ gradient_clip_val: 0.1
117
+ gradient_clip_algorithm: null
118
+ deterministic: null
119
+ benchmark: null
120
+ inference_mode: true
121
+ use_distributed_sampler: true
122
+ profiler:
123
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
124
+ init_args:
125
+ dirpath: null
126
+ filename: null
127
+ group_by_input_shapes: false
128
+ emit_nvtx: false
129
+ export_to_chrome: true
130
+ row_limit: 20
131
+ sort_by_key: null
132
+ record_module_names: true
133
+ table_kwargs: null
134
+ record_shapes: false
135
+ dict_kwargs:
136
+ profile_memory: true
137
+ detect_anomaly: false
138
+ barebones: false
139
+ plugins: null
140
+ sync_batchnorm: false
141
+ reload_dataloaders_every_n_epochs: 0
142
+ default_root_dir: logs
143
+ model:
144
+ class_path: genbio_finetune.tasks.SequenceRegression
145
+ init_args:
146
+ backbone:
147
+ class_path: genbio_finetune.models.proteinfm_v1
148
+ init_args:
149
+ from_scratch: false
150
+ use_peft: true
151
+ save_peft_only: true
152
+ lora_r: 16
153
+ lora_alpha: 32
154
+ lora_dropout: 0.05
155
+ config_overwrites: null
156
+ model_init_args: null
157
+ max_length: 2048
158
+ adapter:
159
+ class_path: genbio_finetune.models.MLPPoolAdapter
160
+ init_args:
161
+ pooling: mean_pooling
162
+ hidden_sizes:
163
+ - 128
164
+ bias: true
165
+ dropout: 0.1
166
+ dropout_in_middle: false
167
+ optimizer:
168
+ class_path: torch.optim.AdamW
169
+ init_args:
170
+ lr: 0.0001
171
+ betas:
172
+ - 0.9
173
+ - 0.95
174
+ eps: 1.0e-08
175
+ weight_decay: 0.01
176
+ amsgrad: false
177
+ maximize: false
178
+ foreach: null
179
+ capturable: false
180
+ differentiable: false
181
+ fused: null
182
+ lr_scheduler:
183
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
184
+ init_args:
185
+ warmup_ratio: 0.05
186
+ strict_loading: true
187
+ reset_optimizer_states: false
188
+ data:
189
+ class_path: genbio_finetune.data.DMSFitnessPrediction
190
+ init_args:
191
+ path: genbio-ai/ProteinGYM-DMS
192
+ train_split_files:
193
+ - singles_substitutions/Q53Z42_HUMAN_McShan_2019_binding-TAPBPR.tsv
194
+ normalize: true
195
+ train_split_name: 'train'
196
+ test_split_files: null
197
+ valid_split_files: null
198
+ random_seed: 42
199
+ batch_size: 2
200
+ shuffle: true
201
+ sampler: null
202
+ num_workers: 0
203
+ pin_memory: true
204
+ persistent_workers: false
205
+ cv_num_folds: 5
206
+ cv_test_fold_id: 4
207
+ cv_enable_val_fold: true
208
+ cv_fold_id_col: fold_id
209
+ ckpt_path: null
fold4/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb54d10b7864136023833b06ed2d1e4d60f7550617030dd207fb8ed8304c01c
3
+ size 147655256