DeepSEQreen_fast_build

Running on CPU Upgrade

App Files Files Community

libokj commited on Dec 22, 2023

Commit

416833a

1 Parent(s): 141df83

Upload 154 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

configs/callbacks/csv_prediction_writer.yaml +4 -0
configs/callbacks/default.yaml +5 -0
configs/callbacks/early_stopping.yaml +17 -0
configs/callbacks/inference.yaml +6 -0
configs/callbacks/model_checkpoint.yaml +19 -0
configs/callbacks/model_summary.yaml +7 -0
configs/callbacks/none.yaml +0 -0
configs/callbacks/rich_progress_bar.yaml +6 -0
configs/callbacks/tqdm_progress_bar.yaml +2 -0
configs/data/collator/default.yaml +5 -0
configs/data/collator/none.yaml +2 -0
configs/data/drug_featurizer/ecfp.yaml +6 -0
configs/data/drug_featurizer/fcs.yaml +4 -0
configs/data/drug_featurizer/graph.yaml +2 -0
configs/data/drug_featurizer/label.yaml +15 -0
configs/data/drug_featurizer/mol_features.yaml +4 -0
configs/data/drug_featurizer/none.yaml +2 -0
configs/data/drug_featurizer/onehot.yaml +15 -0
configs/data/drug_featurizer/tokenizer.yaml +6 -0
configs/data/dti.yaml.bak +21 -0
configs/data/dti_data.yaml +20 -0
configs/data/protein_featurizer/fcs.yaml +4 -0
configs/data/protein_featurizer/label.yaml +12 -0
configs/data/protein_featurizer/none.yaml +2 -0
configs/data/protein_featurizer/onehot.yaml +12 -0
configs/data/protein_featurizer/tokenizer.yaml +6 -0
configs/data/protein_featurizer/word2vec.yaml +6 -0
configs/data/split/cold_drug.yaml +4 -0
configs/data/split/cold_protein.yaml +4 -0
configs/data/split/none.yaml +0 -0
configs/data/split/random.yaml +10 -0
configs/data/transform/minmax.yaml +5 -0
configs/data/transform/none.yaml +2 -0
configs/debug/advanced.yaml +25 -0
configs/debug/default.yaml +35 -0
configs/debug/fdr.yaml +11 -0
configs/debug/fdr_advanced.yaml +11 -0
configs/debug/limit.yaml +12 -0
configs/debug/overfit.yaml +13 -0
configs/debug/profiler.yaml +12 -0
configs/experiment/bindingdb.yaml +9 -0
configs/experiment/chembl_random.yaml +9 -0
configs/experiment/chembl_rmfh_random.yaml +9 -0
configs/experiment/davis.yaml +9 -0
configs/experiment/demo_bindingdb.yaml +9 -0
configs/experiment/dti_experiment.yaml +19 -0
configs/experiment/example.yaml +35 -0
configs/experiment/ion_channels.yaml +9 -0
configs/experiment/kiba.yaml +9 -0
configs/experiment/kinase.yaml +13 -0

configs/callbacks/csv_prediction_writer.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+csv_prediction_writer:
+  _target_: deepscreen.utils.lightning.CSVPredictionWriter
+  output_dir: ${paths.output_dir}
+  write_interval: batch

configs/callbacks/default.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+defaults:
+  - model_checkpoint
+  - early_stopping
+  - model_summary
+  - rich_progress_bar

configs/callbacks/early_stopping.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.EarlyStopping.html
+# Monitor a metric and stop training when it stops improving.
+# Look at the above link for more detailed information.
+early_stopping:
+  _target_: lightning.pytorch.callbacks.EarlyStopping
+  monitor: ${oc.select:callbacks.model_checkpoint.monitor,"val/loss"} # quantity to be monitored, must be specified!!!
+  min_delta: 0. # minimum change in the monitored quantity to qualify as an improvement
+  patience: 50 # number of checks with no improvement after which training will be stopped
+  verbose: False # verbosity mode
+  mode: ${callbacks.model_checkpoint.mode} # "max" means higher metric value is better, can be also "min"
+  strict: True # whether to crash the training if monitor is not found in the validation metrics
+  check_finite: True # when set True, stops training when the monitor becomes NaN or infinite
+  stopping_threshold: null # stop training immediately once the monitored quantity reaches this threshold
+  divergence_threshold: null # stop training as soon as the monitored quantity becomes worse than this threshold
+  check_on_train_epoch_end: False # whether to run early stopping at the end of the training epoch
+  log_rank_zero_only: False  # logs the status of the early stopping callback only for rank 0 process

configs/callbacks/inference.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+defaults:
+  - model_summary
+  - rich_progress_bar
+model_summary:
+  max_depth: 2

configs/callbacks/model_checkpoint.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.ModelCheckpoint.html
+# Save the model periodically by monitoring a quantity.
+# Look at the above link for more detailed information.
+model_checkpoint:
+  _target_: lightning.pytorch.callbacks.ModelCheckpoint
+  dirpath: ${paths.output_dir} # directory to save the model file
+  filename: "checkpoints/epoch_{epoch:03d}" # checkpoint filename
+  monitor: ${eval:'"val/loss" if ${data.train_val_test_split}[1] else "train/loss"'} # name of the logged metric which determines when model is improving
+  verbose: False # verbosity mode
+  save_last: True # additionally always save an exact copy of the last checkpoint to a file last.ckpt
+  save_top_k: 1 # save k best models (determined by above metric)
+  mode: "min" # "max" means higher metric value is better, can be also "min"
+  auto_insert_metric_name: False # when True, the checkpoints filenames will contain the metric name
+  save_weights_only: False # if True, then only the model’s weights will be saved
+  every_n_train_steps: null # number of training steps between checkpoints
+  train_time_interval: null # checkpoints are monitored at the specified time interval
+  every_n_epochs: null # number of epochs between checkpoints
+  save_on_train_epoch_end: null # whether to run checkpointing at the end of the training epoch or the end of validation

configs/callbacks/model_summary.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.RichModelSummary.html
+# Generates a summary of all layers in a LightningModule with rich text formatting.
+# Look at the above link for more detailed information.
+model_summary:
+  _target_: lightning.pytorch.callbacks.RichModelSummary
+  max_depth: 2 # The maximum depth of layer nesting that the summary will include. `-1` for all modules `0` for none.

configs/callbacks/none.yaml ADDED Viewed

File without changes

configs/callbacks/rich_progress_bar.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.RichProgressBar.html
+# Create a progress bar with rich text formatting.
+# Look at the above link for more detailed information.
+rich_progress_bar:
+  _target_: lightning.pytorch.callbacks.RichProgressBar

configs/callbacks/tqdm_progress_bar.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ tqdm_progress_bar:
2	+ _target_: lightning.pytorch.callbacks.TQDMProgressBar

configs/data/collator/default.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+_target_: deepscreen.data.utils.collator.collate_fn
+_partial_: true
+automatic_padding: false
+padding_value: 0.0

configs/data/collator/none.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ _target_: deepscreen.utils.passthrough
2	+ _partial_: true

configs/data/drug_featurizer/ecfp.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+_target_: deepscreen.data.featurizers.fingerprint.smiles_to_fingerprint
+_partial_: true
+fingerprint: MorganFP
+nBits: 1024
+radius: 2

configs/data/drug_featurizer/fcs.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+_target_: deepscreen.data.featurizers.fcs.drug_to_embedding
+_partial_: true
+max_sequence_length: 205

configs/data/drug_featurizer/graph.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ _target_: deepscreen.data.featurizers.graph.smiles_to_graph
2	+ _partial_: true

configs/data/drug_featurizer/label.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+#_target_: deepscreen.data.featurizers.categorical.smiles_to_label
+#_partial_: true
+#
+#max_sequence_length: 100
+##in_channels: 63
+_target_: deepscreen.data.featurizers.categorical.sequence_to_label
+_partial_: true
+charset: ['#', '%', ')', '(', '+', '-', '.', '1', '0', '3', '2', '5', '4',
+          '7', '6', '9', '8', '=', 'A', 'C', 'B', 'E', 'D', 'G', 'F', 'I',
+          'H', 'K', 'M', 'L', 'O', 'N', 'P', 'S', 'R', 'U', 'T', 'W', 'V',
+          'Y', '[', 'Z', ']', '_', 'a', 'c', 'b', 'e', 'd', 'g', 'f', 'i',
+          'h', 'm', 'l', 'o', 'n', 's', 'r', 'u', 't', 'y']
+max_sequence_length: 100

configs/data/drug_featurizer/mol_features.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+_target_: deepscreen.data.featurizers.graph.smiles_to_mol_features
+_partial_: true
+num_atom_feat: 34

configs/data/drug_featurizer/none.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ _target_: deepscreen.utils.passthrough
2	+ _partial_: true

configs/data/drug_featurizer/onehot.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+#_target_: deepscreen.data.featurizers.categorical.smiles_to_onehot
+#_partial_: true
+#
+#max_sequence_length: 100
+##in_channels: 63
+_target_: deepscreen.data.featurizers.categorical.sequence_to_onehot
+_partial_: true
+charset: ['#', '%', ')', '(', '+', '-', '.', '1', '0', '3', '2', '5', '4',
+          '7', '6', '9', '8', '=', 'A', 'C', 'B', 'E', 'D', 'G', 'F', 'I',
+          'H', 'K', 'M', 'L', 'O', 'N', 'P', 'S', 'R', 'U', 'T', 'W', 'V',
+          'Y', '[', 'Z', ']', '_', 'a', 'c', 'b', 'e', 'd', 'g', 'f', 'i',
+          'h', 'm', 'l', 'o', 'n', 's', 'r', 'u', 't', 'y']
+max_sequence_length: 100

configs/data/drug_featurizer/tokenizer.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+_target_: deepscreen.data.featurizers.token.sequence_to_token_ids
+_partial_: true
+tokenizer:
+  _target_: deepscreen.data.featurizers.token.SmilesTokenizer
+  vocab_file: resources/vocabs/smiles.txt

configs/data/dti.yaml.bak ADDED Viewed

	@@ -0,0 +1,21 @@

+_target_: deepscreen.data.dti_datamodule.DTIdatamodule
+defaults:
+  - _self_
+  - split: null
+  - drug_featurizer: null
+  - protein_featurizer: null
+task: ${task.task}
+n_class: ${oc.select:task.task.n_class,null}
+data_dir: ${paths.data_dir}
+dataset_name: null
+batch_size: 16
+train_val_test_split: [0.7, 0.1, 0.2]
+num_workers: 0
+pin_memory: false
+train: ${train}

configs/data/dti_data.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+_target_: deepscreen.data.dti.DTIDataModule
+defaults:
+  - split: null
+  - drug_featurizer: none  # ???
+  - protein_featurizer: none  # ???
+  - collator: default
+task: ${task.task}
+num_classes: ${task.num_classes}
+data_dir: ${paths.data_dir}
+data_file: null
+train_val_test_split: null
+batch_size: ???
+num_workers: 0
+pin_memory: false
+#train: ${train}

configs/data/protein_featurizer/fcs.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+_target_: deepscreen.data.featurizers.fcs.protein_to_embedding
+_partial_: true
+max_sequence_length: 545

configs/data/protein_featurizer/label.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+#_target_: deepscreen.data.featurizers.categorical.fasta_to_label
+#_partial_: true
+#
+#max_sequence_length: 1000
+##in_channels: 26
+_target_: deepscreen.data.featurizers.categorical.sequence_to_label
+_partial_: true
+charset: ['A', 'C', 'B', 'E', 'D', 'G', 'F', 'I', 'H', 'K', 'M', 'L', 'O',
+          'N', 'Q', 'P', 'S', 'R', 'U', 'T', 'W', 'V', 'Y', 'X', 'Z']
+max_sequence_length: 1000

configs/data/protein_featurizer/none.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ _target_: deepscreen.utils.passthrough
2	+ _partial_: true

configs/data/protein_featurizer/onehot.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+#_target_: deepscreen.data.featurizers.categorical.fasta_to_onehot
+#_partial_: true
+#
+#max_sequence_length: 1000
+##in_channels: 26
+_target_: deepscreen.data.featurizers.categorical.sequence_to_onehot
+_partial_: true
+charset: ['A', 'C', 'B', 'E', 'D', 'G', 'F', 'I', 'H', 'K', 'M', 'L', 'O',
+          'N', 'Q', 'P', 'S', 'R', 'U', 'T', 'W', 'V', 'Y', 'X', 'Z']
+max_sequence_length: 1000

configs/data/protein_featurizer/tokenizer.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+_target_: deepscreen.data.featurizers.token.sequence_to_token_ids
+_partial_: true
+tokenizer:
+  _target_: tape.TAPETokenizer.from_pretrained
+  vocab: iupac

configs/data/protein_featurizer/word2vec.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+_target_: deepscreen.data.featurizers.word.protein_to_word_embedding
+_partial_: true
+model:
+  _target_: gensim.models.Word2Vec.load
+  fname: ${paths.resource_dir}/models/word2vec_30.model

configs/data/split/cold_drug.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+_target_: deepscreen.data.utils.split.cold_start
+_partial_: true
+entity: drug

configs/data/split/cold_protein.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+_target_: deepscreen.data.utils.split.cold_start
+_partial_: true
+entity: protein

configs/data/split/none.yaml ADDED Viewed

File without changes

configs/data/split/random.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+#_target_: torch.utils.data.random_split
+#_partial_: true
+#generator:
+#  _target_: torch.Generator # will use global seed set by lightning.seed_everything or torch.manual_seed automatically
+_target_: deepscreen.data.utils.split.random_split
+_partial_: true
+seed: ${seed}

configs/data/transform/minmax.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+_target_: deepscreen.data.utils.transform
+_partial_: true
+scaler:
+  _target_: sklearn.preprocessing.MinMaxScaler

configs/data/transform/none.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ _target_: deepscreen.utils.passthrough
2	+ _partial_: true

configs/debug/advanced.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+# @package _global_
+# advanced debug mode that enables callbacks, loggers and gpu during debugging
+job_name: "debug"
+extras:
+  ignore_warnings: False
+  enforce_tags: False
+hydra:
+  job_logging:
+    root:
+      level: DEBUG
+  verbose: True
+trainer:
+  max_epochs: 1
+  accelerator: gpu
+  devices: 1
+  detect_anomaly: true
+  deterministic: false
+data:
+  num_workers: 0
+  pin_memory: False

configs/debug/default.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+# @package _global_
+# default debugging setup, runs 1 full epoch
+# other debugging configs can inherit from this one
+# overwrite job name so debugging logs are stored in separate folder
+job_name: "debug"
+# disable callbacks and loggers during debugging
+callbacks: null
+logger: null
+extras:
+  ignore_warnings: False
+  enforce_tags: False
+# sets level of all command line loggers to 'DEBUG'
+# https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
+hydra:
+  job_logging:
+    root:
+      level: DEBUG
+  # use this to also set hydra loggers to 'DEBUG'
+  verbose: True
+trainer:
+  max_epochs: 1
+  accelerator: cpu # debuggers don't like gpus
+  devices: 1 # debuggers don't like multiprocessing
+  detect_anomaly: true # raise exception if NaN or +/-inf is detected in any tensor
+  deterministic: false
+data:
+  num_workers: 0 # debuggers don't like multiprocessing
+  pin_memory: False # disable gpu memory pin

configs/debug/fdr.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+# @package _global_
+# runs 1 train, 1 validation and 1 test step
+defaults:
+  - default
+trainer:
+  accelerator: gpu
+  fast_dev_run: true
+  detect_anomaly: true

configs/debug/fdr_advanced.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+# @package _global_
+# runs 1 train, 1 validation and 1 test step
+defaults:
+  - advanced
+trainer:
+  accelerator: gpu
+  fast_dev_run: true
+  detect_anomaly: true

configs/debug/limit.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+# @package _global_
+# uses only 1% of the training data and 5% of validation/test data
+defaults:
+  - default
+trainer:
+  max_epochs: 3
+  limit_train_batches: 0.01
+  limit_val_batches: 0.05
+  limit_test_batches: 0.05

configs/debug/overfit.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+# @package _global_
+# overfits to 3 batches
+defaults:
+  - default
+trainer:
+  max_epochs: 20
+  overfit_batches: 3
+# model ckpt and early stopping need to be disabled during overfitting
+callbacks: null

configs/debug/profiler.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+# @package _global_
+# runs with execution time profiling
+defaults:
+  - default
+trainer:
+  max_epochs: 1
+  profiler: "simple"
+  # profiler: "advanced"
+  # profiler: "pytorch"

configs/experiment/bindingdb.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# @package _global_
+defaults:
+  - dti_experiment
+  - override /task: binary
+data:
+  train_val_test_split: [dti_benchmark/random_split_update/bindingdb_train.csv,
+                         dti_benchmark/random_split_update/bindingdb_valid.csv,
+                         dti_benchmark/random_split_update/bindingdb_test.csv]

configs/experiment/chembl_random.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# @package _global_
+defaults:
+  - dti_experiment
+  - override /task: binary
+data:
+  train_val_test_split: [chembl_random_global_balance_1_train.csv,
+                         chembl_random_global_balance_1_valid.csv,
+                         chembl_random_global_balance_1_test.csv]

configs/experiment/chembl_rmfh_random.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# @package _global_
+defaults:
+  - dti_experiment
+  - override /task: binary
+data:
+  train_val_test_split: [chembl_rmFH_random_global_balance_1_train.csv,
+                         chembl_rmFH_random_global_balance_1_valid.csv,
+                         chembl_rmFH_random_global_balance_1_test.csv]

configs/experiment/davis.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# @package _global_
+defaults:
+  - dti_experiment
+  - override /task: binary
+data:
+  train_val_test_split: [dti_benchmark/random_split_update/davis_train.csv,
+                         dti_benchmark/random_split_update/davis_valid.csv,
+                         dti_benchmark/random_split_update/davis_test.csv]

configs/experiment/demo_bindingdb.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# @package _global_
+defaults:
+  - dti_experiment
+  - override /task: binary
+  - override /data/split: random
+data:
+  data_file: demo/binddb_ic50_demo.csv
+  train_val_test_split: [0.7, 0.1, 0.2]

configs/experiment/dti_experiment.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+# @package _global_
+defaults:
+  - override /data: dti_data
+  - override /model: dti_model
+  - override /trainer: gpu
+seed: 12345
+trainer:
+  min_epochs: 1
+  max_epochs: 500
+  precision: 16-mixed
+callbacks:
+  early_stopping:
+    patience: 50
+data:
+  num_workers: 8

configs/experiment/example.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+# @package _global_
+# to execute this experiment run:
+# python train.py experiment=example
+defaults:
+  - override /data: dti_data
+  - override /data/drug_featurizer: onehot
+  - override /data/protein_featurizer: onehot
+  - override /model: dti_model
+  - override /model/protein_encoder: cnn
+  - override /model/drug_encoder: cnn
+  - override /model/decoder: concat_mlp
+  - override /callbacks: default
+  - override /trainer: default
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+tags: ["dti"]
+seed: 12345
+data:
+  data_file: davis.csv
+  batch_size: 64
+model:
+  optimizer:
+    lr: 0.0001
+trainer:
+  min_epochs: 1
+  max_epochs: 100
+  accelerator: gpu

configs/experiment/ion_channels.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# @package _global_
+defaults:
+  - dti_experiment
+  - override /task: binary
+data:
+  train_val_test_split: [dti_benchmark/ChEMBL33/train/Ion_channels_train_data.csv,
+                         dti_benchmark/ChEMBL33/valid/Ion_channels_valid_data.csv,
+                         dti_benchmark/ChEMBL33/test/Ion_channels_both_unseen_test_data.csv]

configs/experiment/kiba.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# @package _global_
+defaults:
+  - dti_experiment
+  - /task: binary
+data:
+  train_val_test_split: [dti_benchmark/random_split_update/kiba_train.csv,
+                         dti_benchmark/random_split_update/kiba_valid.csv,
+                         dti_benchmark/random_split_update/kiba_test.csv]

configs/experiment/kinase.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+# @package _global_
+defaults:
+  - dti_experiment
+  - override /task: binary
+data:
+  train_val_test_split:
+    - dti_benchmark/ChEMBL33/train/kinase_train_data.csv
+    - null
+    - null
+#                         dti_benchmark/ChEMBL33/valid/kinase_valid_data.csv,
+#                         dti_benchmark/ChEMBL33/test/kinase_both_unseen_test_data.csv