PreMode / analysis /Hsu.et.al.git /scripts /jackhmmer.sh

Upload folder using huggingface_hub

7718235 verified about 1 year ago

1.96 kB

	#!/bin/bash
	#
	# Runs jackhmmer search with bitscore thresholds
	#
	#SBATCH --cluster=<clustername>
	#SBATCH --partition=<partitionname>
	#SBATCH --account=<accountname>
	#SBATCH --job-name=jackhmmer
	#SBATCH --output=jackhmmer.out
	#SBATCH --gres=gpu:0 # Number of GPU(s) per node.
	#SBATCH --cpus-per-task=4 # CPU cores/threads
	#SBATCH --mem=48000M # memory per node
	#SBATCH --time=0-24:00 # Max time (DD-HH:MM)
	#SBATCH --ntasks=1 # Only set to >1 if you want to use multi-threading
	export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK


	## USAGE
	## Create a directory, and put the WT sequence in wt.fasta in the directory
	## sbatch jackhmmer.sh <dir> <bitscore_threshold> <niter>

	dir=$1
	bitscore=$2 # e.g. 0.5
	niter=$3
	seqdb=$4 # location for e.g. uniref100 or uniref90 fasta files

	query="$dir/wt.fasta"
	tblout="$dir/targets.tblout"
	alignmentfile="$dir/alignment.sto"
	hmmprefix="$dir/iter"
	aliprefix="$dir/iter"

	wtseq=$(sed 1d $query)
	seqlen=${#wtseq}
	bitscore=$(echo "$seqlen*$bitscore" \| bc) # scale bitscore by seqlen
	echo "$bitscore"

	#EVcouplings defaults
	jackhmmer -N $niter \
	--incT $bitscore --incdomT $bitscore -T $bitscore --domT $bitscore \
	--popen 0.02 --pextend 0.4 --mx BLOSUM62 \
	--tblout $tblout -A $alignmentfile --noali --notextw\
	--chkhmm $hmmprefix --chkali $aliprefix \
	--cpu $SLURM_CPUS_PER_TASK \
	$query $seqdb

	# convert tblout to target id list
	targetidfile="$dir/target_ids.txt"
	python scripts/tblout2ids.py $tblout $targetidfile

	# fetch sequences
	fastafile="$dir/target_seqs.fasta"
	txtfile="$dir/target_seqs.txt"
	esl-sfetch -o $fastafile -f $seqdb $targetidfile
	python scripts/fasta2txt.py $fastafile $txtfile

	# split into train and validation
	python scripts/randsplit.py $txtfile 0.2

	python src/sto2a2m.py $query $alignmentfile ${dir}/alignment
	for (( i=1; i<=$niter; i++ ))
	do
	python src/sto2a2m.py $query $aliprefix-$i.sto $aliprefix-$i
	done