Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /Align /Applications /_Clustalw.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 1 year ago

raw

history blame contribute delete

20.2 kB

	# Copyright 2009 by Cymon J. Cox. All rights reserved.
	#
	# This file is part of the Biopython distribution and governed by your
	# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
	# Please see the LICENSE file that should have been included as part of this
	# package.
	"""Command line wrapper for the multiple alignment program Clustal W."""


	import os
	from Bio.Application import _Option, _Switch, AbstractCommandline


	class ClustalwCommandline(AbstractCommandline):
	"""Command line wrapper for clustalw (version one or two).

	http://www.clustal.org/

	Notes
	-----
	Last checked against versions: 1.83 and 2.1

	References
	----------
	Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA,
	McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD,
	Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0.
	Bioinformatics, 23, 2947-2948.

	Examples
	--------
	>>> from Bio.Align.Applications import ClustalwCommandline
	>>> in_file = "unaligned.fasta"
	>>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file)
	>>> print(clustalw_cline)
	clustalw2 -infile=unaligned.fasta

	You would typically run the command line with clustalw_cline() or via
	the Python subprocess module, as described in the Biopython tutorial.

	"""

	# TODO - Should we default to cmd="clustalw2" now?
	def __init__(self, cmd="clustalw", **kwargs):
	"""Initialize the class."""
	self.parameters = [
	_Option(
	["-infile", "-INFILE", "INFILE", "infile"],
	"Input sequences.",
	filename=True,
	),
	_Option(
	["-profile1", "-PROFILE1", "PROFILE1", "profile1"],
	"Profiles (old alignment).",
	filename=True,
	),
	_Option(
	["-profile2", "-PROFILE2", "PROFILE2", "profile2"],
	"Profiles (old alignment).",
	filename=True,
	),
	# ################# VERBS (do things) #############################
	_Switch(
	["-options", "-OPTIONS", "OPTIONS", "options"],
	"List the command line parameters",
	),
	_Switch(
	["-help", "-HELP", "HELP", "help"], "Outline the command line params."
	),
	_Switch(
	["-check", "-CHECK", "CHECK", "check"],
	"Outline the command line params.",
	),
	_Switch(
	["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"],
	"Output full help content.",
	),
	_Switch(
	["-align", "-ALIGN", "ALIGN", "align"], "Do full multiple alignment."
	),
	_Switch(["-tree", "-TREE", "TREE", "tree"], "Calculate NJ tree."),
	_Switch(
	["-pim", "-PIM", "PIM", "pim"],
	"Output percent identity matrix (while calculating the tree).",
	),
	_Option(
	["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"],
	"Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).",
	checker_function=lambda x: isinstance(x, int),
	),
	_Switch(
	["-convert", "-CONVERT", "CONVERT", "convert"],
	"Output the input sequences in a different file format.",
	),
	# #################### PARAMETERS (set things) #########################
	# *General settings:**
	# Makes no sense in biopython
	# _Option(["-interactive", "-INTERACTIVE", "INTERACTIVE", "interactive"],
	# [],
	# lambda x: 0, # Does not take value
	# False,
	# "read command line, then enter normal interactive menus",
	# False),
	_Switch(
	["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"],
	"Use FAST algorithm for the alignment guide tree",
	),
	_Option(
	["-type", "-TYPE", "TYPE", "type"],
	"PROTEIN or DNA sequences",
	checker_function=lambda x: x in ["PROTEIN", "DNA", "protein", "dna"],
	),
	_Switch(
	["-negative", "-NEGATIVE", "NEGATIVE", "negative"],
	"Protein alignment with negative values in matrix",
	),
	_Option(
	["-outfile", "-OUTFILE", "OUTFILE", "outfile"],
	"Output sequence alignment file name",
	filename=True,
	),
	_Option(
	["-output", "-OUTPUT", "OUTPUT", "output"],
	"Output format: CLUSTAL(default), GCG, GDE, PHYLIP, PIR, NEXUS and FASTA",
	checker_function=lambda x: x
	in [
	"CLUSTAL",
	"GCG",
	"GDE",
	"PHYLIP",
	"PIR",
	"NEXUS",
	"FASTA",
	"clustal",
	"gcg",
	"gde",
	"phylip",
	"pir",
	"nexus",
	"fasta",
	],
	),
	_Option(
	["-outorder", "-OUTORDER", "OUTORDER", "outorder"],
	"Output taxon order: INPUT or ALIGNED",
	checker_function=lambda x: x
	in ["INPUT", "input", "ALIGNED", "aligned"],
	),
	_Option(
	["-case", "-CASE", "CASE", "case"],
	"LOWER or UPPER (for GDE output only)",
	checker_function=lambda x: x in ["UPPER", "upper", "LOWER", "lower"],
	),
	_Option(
	["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"],
	"OFF or ON (for Clustal output only)",
	checker_function=lambda x: x in ["ON", "on", "OFF", "off"],
	),
	_Option(
	["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"],
	"OFF or ON (NEW- for all output formats)",
	checker_function=lambda x: x in ["ON", "on", "OFF", "off"],
	),
	_Option(
	["-range", "-RANGE", "RANGE", "range"],
	"Sequence range to write starting m to m+n. "
	"Input as string eg. '24,200'",
	),
	_Option(
	["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"],
	"Maximum allowed input sequence length",
	checker_function=lambda x: isinstance(x, int),
	),
	_Switch(
	["-quiet", "-QUIET", "QUIET", "quiet"],
	"Reduce console output to minimum",
	),
	_Option(
	["-stats", "-STATS", "STATS", "stats"],
	"Log some alignment statistics to file",
	filename=True,
	),
	# *Fast Pairwise Alignments:*
	_Option(
	["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"],
	"Word size",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Option(
	["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"],
	"Number of best diags.",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Option(
	["-window", "-WINDOW", "WINDOW", "window"],
	"Window around best diags.",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Option(
	["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"],
	"Gap penalty",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Option(
	["-score", "-SCORE", "SCORE", "score"],
	"Either: PERCENT or ABSOLUTE",
	checker_function=lambda x: x
	in ["percent", "PERCENT", "absolute", "ABSOLUTE"],
	),
	# *Slow Pairwise Alignments:*
	_Option(
	["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"],
	"Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
	checker_function=lambda x: (
	x
	in [
	"BLOSUM",
	"PAM",
	"GONNET",
	"ID",
	"blosum",
	"pam",
	"gonnet",
	"id",
	]
	or os.path.exists(x)
	),
	filename=True,
	),
	_Option(
	["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"],
	"DNA weight matrix=IUB, CLUSTALW or filename",
	checker_function=lambda x: (
	x in ["IUB", "CLUSTALW", "iub", "clustalw"] or os.path.exists(x)
	),
	filename=True,
	),
	_Option(
	["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"],
	"Gap opening penalty",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Option(
	["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"],
	"Gap extension penalty",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	# *Multiple Alignments:*
	_Option(
	["-newtree", "-NEWTREE", "NEWTREE", "newtree"],
	"Output file name for newly created guide tree",
	filename=True,
	),
	_Option(
	["-usetree", "-USETREE", "USETREE", "usetree"],
	"File name of guide tree",
	checker_function=lambda x: os.path.exists,
	filename=True,
	),
	_Option(
	["-matrix", "-MATRIX", "MATRIX", "matrix"],
	"Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
	checker_function=lambda x: (
	x
	in [
	"BLOSUM",
	"PAM",
	"GONNET",
	"ID",
	"blosum",
	"pam",
	"gonnet",
	"id",
	]
	or os.path.exists(x)
	),
	filename=True,
	),
	_Option(
	["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"],
	"DNA weight matrix=IUB, CLUSTALW or filename",
	checker_function=lambda x: (
	x in ["IUB", "CLUSTALW", "iub", "clustalw"] or os.path.exists(x)
	),
	filename=True,
	),
	_Option(
	["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"],
	"Gap opening penalty",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Option(
	["-gapext", "-GAPEXT", "GAPEXT", "gapext"],
	"Gap extension penalty",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Switch(
	["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"],
	"No end gap separation pen.",
	),
	_Option(
	["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"],
	"Gap separation pen. range",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Switch(
	["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"], "Residue-specific gaps off"
	),
	_Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"], "Hydrophilic gaps off"),
	_Switch(
	["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"],
	"List hydrophilic res.",
	),
	_Option(
	["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"],
	"% ident. for delay",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	# Already handled in General Settings section, but appears a second
	# time under Multiple Alignments in the help
	# _Option(["-type", "-TYPE", "TYPE", "type"],
	# "PROTEIN or DNA",
	# checker_function=lambda x: x in ["PROTEIN", "DNA",
	# "protein", "dna"]),
	_Option(
	["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"],
	"Transitions weighting",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Option(
	["-iteration", "-ITERATION", "ITERATION", "iteration"],
	"NONE or TREE or ALIGNMENT",
	checker_function=lambda x: x
	in ["NONE", "TREE", "ALIGNMENT", "none", "tree", "alignment"],
	),
	_Option(
	["-numiter", "-NUMITER", "NUMITER", "numiter"],
	"maximum number of iterations to perform",
	checker_function=lambda x: isinstance(x, int),
	),
	_Switch(
	["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"],
	"Disable sequence weighting",
	),
	# *Profile Alignments:*
	_Switch(
	["-profile", "-PROFILE", "PROFILE", "profile"],
	"Merge two alignments by profile alignment",
	),
	_Option(
	["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"],
	"Output file name for new guide tree of profile1",
	filename=True,
	),
	_Option(
	["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"],
	"Output file for new guide tree of profile2",
	filename=True,
	),
	_Option(
	["-usetree1", "-USETREE1", "USETREE1", "usetree1"],
	"File name of guide tree for profile1",
	checker_function=lambda x: os.path.exists,
	filename=True,
	),
	_Option(
	["-usetree2", "-USETREE2", "USETREE2", "usetree2"],
	"File name of guide tree for profile2",
	checker_function=lambda x: os.path.exists,
	filename=True,
	),
	# *Sequence to Profile Alignments:*
	_Switch(
	["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"],
	"Sequentially add profile2 sequences to profile1 alignment",
	),
	# These are already handled in the Multiple Alignments section,
	# but appear a second time here in the help.
	# _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"],
	# "File for new guide tree",
	# filename=True),
	# _Option(["-usetree", "-USETREE", "USETREE", "usetree"],
	# "File for old guide tree",
	# checker_function=lambda x: os.path.exists,
	# filename=True),
	# *Structure Alignments:*
	_Switch(
	["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"],
	"Do not use secondary structure-gap penalty mask for profile 1",
	),
	_Switch(
	["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"],
	"Do not use secondary structure-gap penalty mask for profile 2",
	),
	_Option(
	["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"],
	"STRUCTURE or MASK or BOTH or NONE output in alignment file",
	checker_function=lambda x: x
	in [
	"STRUCTURE",
	"MASK",
	"BOTH",
	"NONE",
	"structure",
	"mask",
	"both",
	"none",
	],
	),
	_Option(
	["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"],
	"Gap penalty for helix core residues",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Option(
	["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"],
	"gap penalty for strand core residues",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Option(
	["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"],
	"Gap penalty for loop regions",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Option(
	["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"],
	"Gap penalty for structure termini",
	checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
	),
	_Option(
	["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"],
	"Number of residues inside helix to be treated as terminal",
	checker_function=lambda x: isinstance(x, int),
	),
	_Option(
	["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"],
	"Number of residues outside helix to be treated as terminal",
	checker_function=lambda x: isinstance(x, int),
	),
	_Option(
	["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"],
	"Number of residues inside strand to be treated as terminal",
	checker_function=lambda x: isinstance(x, int),
	),
	_Option(
	["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"],
	"Number of residues outside strand to be treated as terminal",
	checker_function=lambda x: isinstance(x, int),
	),
	# *Trees:*
	_Option(
	["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"],
	"nj OR phylip OR dist OR nexus",
	checker_function=lambda x: x
	in ["NJ", "PHYLIP", "DIST", "NEXUS", "nj", "phylip", "dist", "nexus"],
	),
	_Option(
	["-seed", "-SEED", "SEED", "seed"],
	"Seed number for bootstraps.",
	checker_function=lambda x: isinstance(x, int),
	),
	_Switch(
	["-kimura", "-KIMURA", "KIMURA", "kimura"], "Use Kimura's correction."
	),
	_Switch(
	["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"],
	"Ignore positions with gaps.",
	),
	_Option(
	["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"],
	"Node OR branch position of bootstrap values in tree display",
	checker_function=lambda x: x in ["NODE", "BRANCH", "node", "branch"],
	),
	_Option(
	["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"],
	"NJ or UPGMA",
	checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"],
	),
	]
	AbstractCommandline.__init__(self, cmd, **kwargs)


	if __name__ == "__main__":
	from Bio._utils import run_doctest

	run_doctest()