#!/bin/bash # # Runs jackhmmer search with bitscore thresholds # #SBATCH --cluster= #SBATCH --partition= #SBATCH --account= #SBATCH --job-name=jackhmmer #SBATCH --output=jackhmmer.out #SBATCH --gres=gpu:0 # Number of GPU(s) per node. #SBATCH --cpus-per-task=4 # CPU cores/threads #SBATCH --mem=48000M # memory per node #SBATCH --time=0-24:00 # Max time (DD-HH:MM) #SBATCH --ntasks=1 # Only set to >1 if you want to use multi-threading export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK ## USAGE ## Create a directory, and put the WT sequence in wt.fasta in the directory ## sbatch jackhmmer.sh dir=$1 bitscore=$2 # e.g. 0.5 niter=$3 seqdb=$4 # location for e.g. uniref100 or uniref90 fasta files query="$dir/wt.fasta" tblout="$dir/targets.tblout" alignmentfile="$dir/alignment.sto" hmmprefix="$dir/iter" aliprefix="$dir/iter" wtseq=$(sed 1d $query) seqlen=${#wtseq} bitscore=$(echo "$seqlen*$bitscore" | bc) # scale bitscore by seqlen echo "$bitscore" #EVcouplings defaults jackhmmer -N $niter \ --incT $bitscore --incdomT $bitscore -T $bitscore --domT $bitscore \ --popen 0.02 --pextend 0.4 --mx BLOSUM62 \ --tblout $tblout -A $alignmentfile --noali --notextw\ --chkhmm $hmmprefix --chkali $aliprefix \ --cpu $SLURM_CPUS_PER_TASK \ $query $seqdb # convert tblout to target id list targetidfile="$dir/target_ids.txt" python scripts/tblout2ids.py $tblout $targetidfile # fetch sequences fastafile="$dir/target_seqs.fasta" txtfile="$dir/target_seqs.txt" esl-sfetch -o $fastafile -f $seqdb $targetidfile python scripts/fasta2txt.py $fastafile $txtfile # split into train and validation python scripts/randsplit.py $txtfile 0.2 python src/sto2a2m.py $query $alignmentfile ${dir}/alignment for (( i=1; i<=$niter; i++ )) do python src/sto2a2m.py $query $aliprefix-$i.sto $aliprefix-$i done