# | |
# Runs jackhmmer search with bitscore thresholds | |
# | |
#SBATCH --cluster=<clustername> | |
#SBATCH --partition=<partitionname> | |
#SBATCH --account=<accountname> | |
#SBATCH --job-name=jackhmmer | |
#SBATCH --output=jackhmmer.out | |
#SBATCH --gres=gpu:0 # Number of GPU(s) per node. | |
#SBATCH --cpus-per-task=4 # CPU cores/threads | |
#SBATCH --mem=48000M # memory per node | |
#SBATCH --time=0-24:00 # Max time (DD-HH:MM) | |
#SBATCH --ntasks=1 # Only set to >1 if you want to use multi-threading | |
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK | |
## USAGE | |
## Create a directory, and put the WT sequence in wt.fasta in the directory | |
## sbatch jackhmmer.sh <dir> <bitscore_threshold> <niter> | |
dir=$1 | |
bitscore=$2 # e.g. 0.5 | |
niter=$3 | |
seqdb=$4 # location for e.g. uniref100 or uniref90 fasta files | |
query="$dir/wt.fasta" | |
tblout="$dir/targets.tblout" | |
alignmentfile="$dir/alignment.sto" | |
hmmprefix="$dir/iter" | |
aliprefix="$dir/iter" | |
wtseq=$(sed 1d $query) | |
seqlen=${#wtseq} | |
bitscore=$(echo "$seqlen*$bitscore" | bc) # scale bitscore by seqlen | |
echo "$bitscore" | |
#EVcouplings defaults | |
jackhmmer -N $niter \ | |
--incT $bitscore --incdomT $bitscore -T $bitscore --domT $bitscore \ | |
--popen 0.02 --pextend 0.4 --mx BLOSUM62 \ | |
--tblout $tblout -A $alignmentfile --noali --notextw\ | |
--chkhmm $hmmprefix --chkali $aliprefix \ | |
--cpu $SLURM_CPUS_PER_TASK \ | |
$query $seqdb | |
# convert tblout to target id list | |
targetidfile="$dir/target_ids.txt" | |
python scripts/tblout2ids.py $tblout $targetidfile | |
# fetch sequences | |
fastafile="$dir/target_seqs.fasta" | |
txtfile="$dir/target_seqs.txt" | |
esl-sfetch -o $fastafile -f $seqdb $targetidfile | |
python scripts/fasta2txt.py $fastafile $txtfile | |
# split into train and validation | |
python scripts/randsplit.py $txtfile 0.2 | |
python src/sto2a2m.py $query $alignmentfile ${dir}/alignment | |
for (( i=1; i<=$niter; i++ )) | |
do | |
python src/sto2a2m.py $query $aliprefix-$i.sto $aliprefix-$i | |
done | |