Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /SearchIO /ExonerateIO /exonerate_cigar.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 1 year ago

raw

history blame contribute delete

4.09 kB

	# Copyright 2012 by Wibowo Arindrarto. All rights reserved.
	# This file is part of the Biopython distribution and governed by your
	# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
	# Please see the LICENSE file that should have been included as part of this
	# package.
	"""Bio.SearchIO parser for Exonerate cigar output format."""

	import re

	from ._base import _BaseExonerateParser, _STRAND_MAP
	from .exonerate_vulgar import ExonerateVulgarIndexer


	__all__ = ("ExonerateCigarParser", "ExonerateCigarIndexer")


	# precompile regex
	_RE_CIGAR = re.compile(
	r"""^cigar:\s+
	(\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+ # query: ID, start, end, strand
	(\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+ # hit: ID, start, end, strand
	(\d+)(\s+.*)$ # score, vulgar components
	""",
	re.VERBOSE,
	)


	class ExonerateCigarParser(_BaseExonerateParser):
	"""Parser for Exonerate cigar strings."""

	_ALN_MARK = "cigar"

	def parse_alignment_block(self, header):
	"""Parse alignment block for cigar format, return query results, hits, hsps."""
	qresult = header["qresult"]
	hit = header["hit"]
	hsp = header["hsp"]
	self.read_until(lambda line: line.startswith("cigar"))
	cigars = re.search(_RE_CIGAR, self.line)
	# if the file has c4 alignments
	# check if cigar values match our previously parsed header values
	if self.has_c4_alignment:
	assert qresult["id"] == cigars.group(1)
	assert hsp["query_start"] == cigars.group(2)
	assert hsp["query_end"] == cigars.group(3)
	assert hsp["query_strand"] == cigars.group(4)
	assert hit["id"] == cigars.group(5)
	assert hsp["hit_start"] == cigars.group(6)
	assert hsp["hit_end"] == cigars.group(7)
	assert hsp["hit_strand"] == cigars.group(8)
	assert hsp["score"] == cigars.group(9)
	else:
	qresult["id"] = cigars.group(1)
	hsp["query_start"] = cigars.group(2)
	hsp["query_end"] = cigars.group(3)
	hsp["query_strand"] = cigars.group(4)
	hit["id"] = cigars.group(5)
	hsp["hit_start"] = cigars.group(6)
	hsp["hit_end"] = cigars.group(7)
	hsp["hit_strand"] = cigars.group(8)
	hsp["score"] = cigars.group(9)

	# adjust strands
	hsp["query_strand"] = _STRAND_MAP[hsp["query_strand"]]
	hsp["hit_strand"] = _STRAND_MAP[hsp["hit_strand"]]
	# cast coords into ints
	qstart = int(hsp["query_start"])
	qend = int(hsp["query_end"])
	hstart = int(hsp["hit_start"])
	hend = int(hsp["hit_end"])
	# set coords (start <= end)
	hsp["query_start"] = min(qstart, qend)
	hsp["query_end"] = max(qstart, qend)
	hsp["hit_start"] = min(hstart, hend)
	hsp["hit_end"] = max(hstart, hend)
	# cast score into int
	hsp["score"] = int(hsp["score"])
	# store cigar components
	hsp["cigar_comp"] = cigars.group(10)
	# HACK: since we can't really figure out exactly when a
	# HSP starts or ends, we set the entire alignment as one HSP
	hsp["query_ranges"] = [(hsp["query_start"], hsp["query_end"])]
	hsp["hit_ranges"] = [(hsp["hit_start"], hsp["hit_end"])]

	return {"qresult": qresult, "hit": hit, "hsp": hsp}


	class ExonerateCigarIndexer(ExonerateVulgarIndexer):
	"""Indexer class for exonerate cigar lines."""

	_parser = ExonerateCigarParser
	_query_mark = b"cigar"

	def get_qresult_id(self, pos):
	"""Return the query ID of the nearest cigar line."""
	handle = self._handle
	handle.seek(pos)
	# get line, check if it's a vulgar line, and get query ID
	line = handle.readline()
	assert line.startswith(self._query_mark), line
	id = re.search(_RE_CIGAR, line.decode())
	return id.group(1)


	# if not used as a module, run the doctest
	if __name__ == "__main__":
	from Bio._utils import run_doctest

	run_doctest()