Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /Align /Applications /_ClustalOmega.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 1 year ago

raw

history blame contribute delete

10 kB

	# Copyright 2011 by Andreas Wilm. All rights reserved.
	# Based on ClustalW wrapper copyright 2009 by Cymon J. Cox.
	#
	# This file is part of the Biopython distribution and governed by your
	# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
	# Please see the LICENSE file that should have been included as part of this
	# package.
	"""Command line wrapper for the multiple alignment program Clustal Omega."""


	from Bio.Application import _Option, _Switch, AbstractCommandline


	class ClustalOmegaCommandline(AbstractCommandline):
	"""Command line wrapper for clustal omega.

	http://www.clustal.org/omega

	Notes
	-----
	Last checked against version: 1.2.0

	References
	----------
	Sievers F, Wilm A, Dineen DG, Gibson TJ, Karplus K, Li W, Lopez R,
	McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG (2011).
	Fast, scalable generation of high-quality protein multiple
	sequence alignments using Clustal Omega.
	Molecular Systems Biology 7:539 https://doi.org/10.1038/msb.2011.75

	Examples
	--------
	>>> from Bio.Align.Applications import ClustalOmegaCommandline
	>>> in_file = "unaligned.fasta"
	>>> out_file = "aligned.fasta"
	>>> clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True)
	>>> print(clustalomega_cline)
	clustalo -i unaligned.fasta -o aligned.fasta --auto -v

	You would typically run the command line with clustalomega_cline() or via
	the Python subprocess module, as described in the Biopython tutorial.

	"""

	def __init__(self, cmd="clustalo", **kwargs):
	"""Initialize the class."""
	# order parameters in the same order as clustalo --help
	self.parameters = [
	# Sequence Input
	_Option(
	["-i", "--in", "--infile", "infile"],
	"Multiple sequence input file",
	filename=True,
	equate=False,
	),
	_Option(
	["--hmm-in", "HMM input", "hmm_input"],
	"HMM input files",
	filename=True,
	equate=False,
	),
	_Switch(["--dealign", "dealign"], "Dealign input sequences"),
	_Option(
	["--profile1", "--p1", "profile1"],
	"Pre-aligned multiple sequence file (aligned columns will be kept fix).",
	filename=True,
	equate=False,
	),
	_Option(
	["--profile2", "--p2", "profile2"],
	"Pre-aligned multiple sequence file (aligned columns will be kept fix).",
	filename=True,
	equate=False,
	),
	_Option(
	["-t", "--seqtype", "seqtype"],
	"{Protein, RNA, DNA} Force a sequence type (default: auto).",
	equate=False,
	checker_function=lambda x: x
	in ["protein", "rna", "dna", "Protein", "RNA", "DNA", "PROTEIN"],
	),
	_Switch(
	["--is-profile", "isprofile"],
	"disable check if profile, force profile (default no)",
	),
	_Option(
	["--infmt", "infmt"],
	"""Forced sequence input file format (default: auto)

	Allowed values: a2m, fa[sta], clu[stal], msf, phy[lip], selex, st[ockholm], vie[nna]
	""",
	equate=False,
	checker_function=lambda x: x
	in [
	"a2m",
	"fa",
	"fasta",
	"clu",
	"clustal",
	"msf",
	"phy",
	"phylip",
	"selex",
	"st",
	"stockholm",
	"vie",
	"vienna",
	],
	),
	# Clustering
	_Option(
	["--distmat-in", "distmat_in"],
	"Pairwise distance matrix input file (skips distance computation).",
	filename=True,
	equate=False,
	),
	_Option(
	["--distmat-out", "distmat_out"],
	"Pairwise distance matrix output file.",
	filename=True,
	equate=False,
	),
	_Option(
	["--guidetree-in", "guidetree_in"],
	"Guide tree input file (skips distance computation and guide-tree clustering step).",
	filename=True,
	equate=False,
	),
	_Option(
	["--guidetree-out", "guidetree_out"],
	"Guide tree output file.",
	filename=True,
	equate=False,
	),
	_Switch(
	["--full", "distmat_full"],
	"Use full distance matrix for guide-tree calculation (slow; mBed is default)",
	),
	_Switch(
	["--full-iter", "distmat_full_iter"],
	"Use full distance matrix for guide-tree calculation during iteration (mBed is default)",
	),
	_Option(
	["--cluster-size", "clustersize"],
	"soft maximum of sequences in sub-clusters",
	checker_function=lambda x: isinstance(x, int),
	),
	_Option(
	["--clustering-out", "clusteringout"],
	"Clustering output file",
	filename=True,
	),
	_Switch(
	["--use-kimura", "usekimura"],
	"use Kimura distance correction for aligned sequences (default no)",
	),
	_Switch(
	["--percent-id", "percentid"],
	"convert distances into percent identities (default no)",
	),
	# Alignment Output
	_Option(
	["-o", "--out", "--outfile", "outfile"],
	"Multiple sequence alignment output file (default: stdout).",
	filename=True,
	equate=False,
	),
	_Option(
	["--outfmt", "outfmt"],
	"MSA output file format:"
	" a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]"
	" (default: fasta).",
	equate=False,
	checker_function=lambda x: x
	in [
	"a2m",
	"fa",
	"fasta",
	"clu",
	"clustal",
	"msf",
	"phy",
	"phylip",
	"selex",
	"st",
	"stockholm",
	"vie",
	"vienna",
	],
	),
	_Switch(
	["--residuenumber", "--resno", "residuenumber"],
	"in Clustal format print residue numbers (default no)",
	),
	_Option(
	["--wrap", "wrap"],
	"number of residues before line-wrap in output",
	checker_function=lambda x: isinstance(x, int),
	),
	_Option(
	["--output-order", "outputorder"],
	"MSA output order like in input/guide-tree",
	checker_function=lambda x: x in ["input-order", "tree-order"],
	),
	# Iteration
	_Option(
	["--iterations", "--iter", "iterations"],
	"Number of (combined guide-tree/HMM) iterations",
	equate=False,
	checker_function=lambda x: isinstance(x, int),
	),
	_Option(
	["--max-guidetree-iterations", "max_guidetree_iterations"],
	"Maximum number of guidetree iterations",
	equate=False,
	checker_function=lambda x: isinstance(x, int),
	),
	_Option(
	["--max-hmm-iterations", "max_hmm_iterations"],
	"Maximum number of HMM iterations",
	equate=False,
	checker_function=lambda x: isinstance(x, int),
	),
	# Limits (will exit early, if exceeded):
	_Option(
	["--maxnumseq", "maxnumseq"],
	"Maximum allowed number of sequences",
	equate=False,
	checker_function=lambda x: isinstance(x, int),
	),
	_Option(
	["--maxseqlen", "maxseqlen"],
	"Maximum allowed sequence length",
	equate=False,
	checker_function=lambda x: isinstance(x, int),
	),
	# Miscellaneous:
	_Switch(
	["--auto", "auto"],
	"Set options automatically (might overwrite some of your options)",
	),
	_Option(
	["--threads", "threads"],
	"Number of processors to use",
	equate=False,
	checker_function=lambda x: isinstance(x, int),
	),
	_Option(
	["-l", "--log", "log"],
	"Log all non-essential output to this file.",
	filename=True,
	equate=False,
	),
	_Switch(["-h", "--help", "help"], "Print help and exit."),
	_Switch(["-v", "--verbose", "verbose"], "Verbose output"),
	_Switch(["--version", "version"], "Print version information and exit"),
	_Switch(
	["--long-version", "long_version"],
	"Print long version information and exit",
	),
	_Switch(["--force", "force"], "Force file overwriting."),
	]
	AbstractCommandline.__init__(self, cmd, **kwargs)


	if __name__ == "__main__":
	from Bio._utils import run_doctest

	run_doctest()