|
#!/bin/bash |
|
|
|
|
|
|
|
|
|
|
|
IFS=',' read -ra arr <<< $2 |
|
output_folder=$3 |
|
mkdir -p $output_folder |
|
CUDA_VISIBLE_DEVICES=$4 |
|
echo "CUDA_VISIBLE_DEVICES="$CUDA_VISIBLE_DEVICES |
|
if [ -z "$5" ]; then |
|
echo "Check if tasks have finished" |
|
fi |
|
for seed in {0..4} |
|
do |
|
for gene in ${arr[@]} |
|
do |
|
logdir=$(cat $1/$gene/$gene.seed.$seed.yaml | grep log_dir | sed 's/.*: //') |
|
num_epochs=$(cat $1/$gene/$gene.seed.$seed.yaml | grep num_epochs | sed 's/.*: //') |
|
data_type=$(cat $1/$gene/$gene.seed.$seed.yaml | grep data_type | sed 's/.*: //') |
|
if [[ $data_type == "GLOF" ]]; then |
|
echo "Begin "$gene |
|
|
|
if [[ -z "$5" ]] && [[ -f $logdir/FOLD.3/model.epoch.$num_epochs.pt ]]; then |
|
echo "Skip "$gene |
|
else |
|
echo "Run "$gene |
|
python -W ignore::UserWarning:torch_geometric.data.collate:147 train.py --conf $1/$gene/$gene.seed.$seed.yaml --mode train_4_fold |
|
fi |
|
echo "Begin large window of "$gene |
|
logdir=$(cat $1/$gene/$gene.seed.$seed.large.window.yaml | grep log_dir | sed 's/.*: //') |
|
num_epochs=$(cat $1/$gene/$gene.seed.$seed.large.window.yaml | grep num_epochs | sed 's/.*: //') |
|
|
|
if [[ -z "$5" ]] && [[ -f $logdir/FOLD.3/model.epoch.$num_epochs.pt ]]; then |
|
echo "Skip large window of "$gene |
|
else |
|
echo "Run large window of "$gene |
|
python -W ignore::UserWarning:torch_geometric.data.collate:147 train.py --conf $1/$gene/$gene.seed.$seed.large.window.yaml --mode train_4_fold |
|
fi |
|
else |
|
|
|
echo "Begin "$gene |
|
if [[ -z "$5" ]] && [[ -f $logdir/model.epoch.$num_epochs.pt ]]; then |
|
echo "Skip "$gene |
|
else |
|
echo "Run "$gene |
|
python -W ignore::UserWarning:torch_geometric.data.collate:147 train.py --conf $1/$gene/$gene.seed.$seed.yaml --mode continue_train |
|
fi |
|
fi |
|
done |
|
done |
|
|
|
for seed in {0..4} |
|
do |
|
for gene in ${arr[@]} |
|
do |
|
echo "Begin "$gene |
|
logdir=$(cat $1/$gene/$gene.seed.$seed.yaml | grep log_dir | sed 's/.*: //') |
|
num_epochs=$(cat $1/$gene/$gene.seed.$seed.yaml | grep num_epochs | sed 's/.*: //') |
|
data_type=$(cat $1/$gene/$gene.seed.$seed.yaml | grep data_type | sed 's/.*: //') |
|
data_file_train=$(cat $1/$gene/$gene.seed.$seed.yaml | grep data_file_train: | sed 's/.*: //') |
|
|
|
if [[ $data_type == "GLOF" ]]; then |
|
|
|
if [[ -f $logdir/FOLD.0/model.epoch.$num_epochs.pt ]] && [[ -f $logdir/FOLD.1/model.epoch.$num_epochs.pt ]] && [[ -f $logdir/FOLD.2/model.epoch.$num_epochs.pt ]] && [[ -f $logdir/FOLD.3/model.epoch.$num_epochs.pt ]] && [[ ! -f $output_folder/$gene.training.seed.$seed.csv ]]; then |
|
echo "Begin inference "$gene |
|
python -W ignore::UserWarning:torch_geometric.data.collate:147 train.py --conf $1/$gene/$gene.seed.$seed.yaml --mode interpret_4_fold --interpret-by both --out-dir $output_folder/$gene.testing.seed.$seed.csv |
|
python -W ignore::UserWarning:torch_geometric.data.collate:147 train.py --conf $1/$gene/$gene.seed.$seed.yaml --mode interpret_4_fold --interpret-by both --data-file-test $data_file_train --out-dir $output_folder/$gene.training.seed.$seed.csv |
|
else |
|
echo "Not finished "$gene |
|
fi |
|
echo "Begin large window of "$gene |
|
logdir=$(cat $1/$gene/$gene.seed.$seed.large.window.yaml | grep log_dir | sed 's/.*: //') |
|
num_epochs=$(cat $1/$gene/$gene.seed.$seed.large.window.yaml | grep num_epochs | sed 's/.*: //') |
|
|
|
if [[ -f $logdir/FOLD.0/model.epoch.$num_epochs.pt ]] && [[ -f $logdir/FOLD.1/model.epoch.$num_epochs.pt ]] && [[ -f $logdir/FOLD.2/model.epoch.$num_epochs.pt ]] && [[ -f $logdir/FOLD.3/model.epoch.$num_epochs.pt ]] && [[ ! -f $output_folder/$gene.training.seed.$seed.large.window.csv ]]; then |
|
echo "Begin inference large window of "$gene |
|
python -W ignore::UserWarning:torch_geometric.data.collate:147 train.py --conf $1/$gene/$gene.seed.$seed.large.window.yaml --mode interpret_4_fold --interpret-by both --out-dir $output_folder/$gene.testing.seed.$seed.large.window.csv |
|
python -W ignore::UserWarning:torch_geometric.data.collate:147 train.py --conf $1/$gene/$gene.seed.$seed.large.window.yaml --mode interpret_4_fold --interpret-by both --data-file-test $data_file_train --out-dir $output_folder/$gene.training.seed.$seed.large.window.csv |
|
else |
|
echo "Not finished large window of "$gene |
|
fi |
|
else |
|
|
|
|
|
if [[ -f $logdir/model.epoch.$num_epochs.pt ]] && [[ ! -f $output_folder/$gene.testing.seed.$seed.csv ]]; then |
|
echo "Begin inference "$gene |
|
python -W ignore::UserWarning:torch_geometric.data.collate:147 train.py --conf $1/$gene/$gene.seed.$seed.yaml --mode interpret --interpret-by both --out-dir $output_folder/$gene.testing.seed.$seed.csv |
|
else |
|
echo "Not finished "$gene |
|
fi |
|
fi |
|
done |
|
done |
|
|
|
|
|
conda_home=$(conda info --base) |
|
for gene in ${arr[@]}; do |
|
$conda_home/envs/r4-base/bin/Rscript scripts/run.new.task.R $1/$gene/$gene $output_folder |
|
done |