Spaces:
No application file
No application file
# Copyright 2001-2009 Brad Chapman. | |
# Revisions copyright 2009-2016 by Peter Cock. | |
# Revisions copyright 2009 by David Winter. | |
# Revisions copyright 2009-2010 by Leighton Pritchard. | |
# All rights reserved. | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Code to interact with and run various EMBOSS programs (OBSOLETE). | |
These classes follow the AbstractCommandline interfaces for running | |
programs. | |
We have decided to remove this module in future, and instead recommend | |
building your command and invoking it via the subprocess module directly. | |
""" | |
from Bio.Application import _Option, _Switch, AbstractCommandline | |
class _EmbossMinimalCommandLine(AbstractCommandline): | |
"""Base Commandline object for EMBOSS wrappers (PRIVATE). | |
This is provided for subclassing, it deals with shared options | |
common to all the EMBOSS tools: | |
Attributes: | |
- auto Turn off prompts | |
- stdout Write standard output | |
- filter Read standard input, write standard output | |
- options Prompt for standard and additional values | |
- debug Write debug output to program.dbg | |
- verbose Report some/full command line options | |
- help Report command line options. More | |
information on associated and general | |
qualifiers can be found with -help -verbose | |
- warning Report warnings | |
- error Report errors | |
- fatal Report fatal errors | |
- die Report dying program messages | |
""" | |
def __init__(self, cmd=None, **kwargs): | |
assert cmd is not None | |
extra_parameters = [ | |
_Switch( | |
["-auto", "auto"], | |
"Turn off prompts.\n\n" | |
"Automatic mode disables prompting, so we recommend you set this " | |
"argument all the time when calling an EMBOSS tool from Biopython.", | |
), | |
_Switch(["-stdout", "stdout"], "Write standard output."), | |
_Switch( | |
["-filter", "filter"], "Read standard input, write standard output." | |
), | |
_Switch( | |
["-options", "options"], | |
"Prompt for standard and additional values.\n\n" | |
"If you are calling an EMBOSS tool from within Biopython, " | |
"we DO NOT recommend using this option.", | |
), | |
_Switch(["-debug", "debug"], "Write debug output to program.dbg."), | |
_Switch(["-verbose", "verbose"], "Report some/full command line options"), | |
_Switch( | |
["-help", "help"], | |
"Report command line options.\n\n" | |
"More information on associated and general qualifiers " | |
"can be found with -help -verbose", | |
), | |
_Switch(["-warning", "warning"], "Report warnings."), | |
_Switch(["-error", "error"], "Report errors."), | |
_Switch(["-die", "die"], "Report dying program messages."), | |
] | |
try: | |
# Insert extra parameters - at the start just in case there | |
# are any arguments which must come last: | |
self.parameters = extra_parameters + self.parameters | |
except AttributeError: | |
# Should we raise an error? The subclass should have set this up! | |
self.parameters = extra_parameters | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class _EmbossCommandLine(_EmbossMinimalCommandLine): | |
"""Base Commandline object for EMBOSS wrappers (PRIVATE). | |
This is provided for subclassing, it deals with shared options | |
common to all the EMBOSS tools plus: | |
- outfile Output filename | |
""" | |
def __init__(self, cmd=None, **kwargs): | |
assert cmd is not None | |
extra_parameters = [ | |
_Option(["-outfile", "outfile"], "Output filename", filename=True) | |
] | |
try: | |
# Insert extra parameters - at the start just in case there | |
# are any arguments which must come last: | |
self.parameters = extra_parameters + self.parameters | |
except AttributeError: | |
# Should we raise an error? The subclass should have set this up! | |
self.parameters = extra_parameters | |
_EmbossMinimalCommandLine.__init__(self, cmd, **kwargs) | |
def _validate(self): | |
# Check the outfile, filter, or stdout option has been set. | |
# We can't simply do this via the required flag for the outfile | |
# output - this seems the simplest solution. | |
if not (self.outfile or self.filter or self.stdout): | |
raise ValueError( | |
"You must either set outfile (output filename), " | |
"or enable filter or stdout (output to stdout)." | |
) | |
return _EmbossMinimalCommandLine._validate(self) | |
class Primer3Commandline(_EmbossCommandLine): | |
"""Commandline object for the Primer3 interface from EMBOSS. | |
The precise set of supported arguments depends on your version of EMBOSS. | |
This version accepts arguments current at EMBOSS 6.1.0: | |
>>> cline = Primer3Commandline(sequence="mysequence.fas", auto=True, hybridprobe=True) | |
>>> cline.explainflag = True | |
>>> cline.osizeopt=20 | |
>>> cline.psizeopt=200 | |
>>> cline.outfile = "myresults.out" | |
>>> cline.bogusparameter = 1967 # Invalid parameter | |
Traceback (most recent call last): | |
... | |
ValueError: Option name bogusparameter was not found. | |
>>> print(cline) | |
eprimer3 -auto -outfile=myresults.out -sequence=mysequence.fas -hybridprobe=True -psizeopt=200 -osizeopt=20 -explainflag=True | |
""" | |
def __init__(self, cmd="eprimer3", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], | |
"Sequence to choose primers from.", | |
is_required=True, | |
), | |
_Option(["-task", "task"], "Tell eprimer3 what task to perform."), | |
_Option( | |
["-hybridprobe", "hybridprobe"], | |
"Find an internal oligo to use as a hyb probe.", | |
), | |
_Option( | |
["-numreturn", "numreturn"], "Maximum number of primer pairs to return." | |
), | |
_Option( | |
["-includedregion", "includedregion"], | |
"Subregion of the sequence in which to pick primers.", | |
), | |
_Option(["-target", "target"], "Sequence to target for flanking primers."), | |
_Option( | |
["-excludedregion", "excludedregion"], | |
"Regions to exclude from primer picking.", | |
), | |
_Option( | |
["-forwardinput", "forwardinput"], | |
"Sequence of a forward primer to check.", | |
), | |
_Option( | |
["-reverseinput", "reverseinput"], | |
"Sequence of a reverse primer to check.", | |
), | |
_Option( | |
["-gcclamp", "gcclamp"], | |
"The required number of Gs and Cs at the 3' of each primer.", | |
), | |
_Option(["-osize", "osize"], "Optimum length of a primer oligo."), | |
_Option(["-minsize", "minsize"], "Minimum length of a primer oligo."), | |
_Option(["-maxsize", "maxsize"], "Maximum length of a primer oligo."), | |
_Option( | |
["-otm", "otm"], | |
"Melting temperature for primer oligo (OBSOLETE).\n\n" | |
"Option replaced in EMBOSS 6.6.0 by -opttm", | |
), | |
_Option( | |
["-opttm", "opttm"], | |
"Optimum melting temperature for a primer oligo.\n\n" | |
"Option added in EMBOSS 6.6.0, replacing -otm", | |
), | |
_Option( | |
["-mintm", "mintm"], "Minimum melting temperature for a primer oligo." | |
), | |
_Option( | |
["-maxtm", "maxtm"], "Maximum melting temperature for a primer oligo." | |
), | |
_Option( | |
["-maxdifftm", "maxdifftm"], | |
"Maximum difference in melting temperatures between " | |
"forward and reverse primers.", | |
), | |
_Option(["-ogcpercent", "ogcpercent"], "Optimum GC% for a primer."), | |
_Option(["-mingc", "mingc"], "Minimum GC% for a primer."), | |
_Option(["-maxgc", "maxgc"], "Maximum GC% for a primer."), | |
_Option( | |
["-saltconc", "saltconc"], "Millimolar salt concentration in the PCR." | |
), | |
_Option( | |
["-dnaconc", "dnaconc"], | |
"Nanomolar concentration of annealing oligos in the PCR.", | |
), | |
_Option( | |
["-maxpolyx", "maxpolyx"], | |
"Maximum allowable mononucleotide repeat length in a primer.", | |
), | |
# Primer length: | |
_Option(["-psizeopt", "psizeopt"], "Optimum size for the PCR product."), | |
_Option( | |
["-prange", "prange"], "Acceptable range of length for the PCR product." | |
), | |
# Primer temperature: | |
_Option( | |
["-ptmopt", "ptmopt"], | |
"Optimum melting temperature for the PCR product.", | |
), | |
_Option( | |
["-ptmmin", "ptmmin"], | |
"Minimum allowed melting temperature for the amplicon.", | |
), | |
_Option( | |
["-ptmmax", "ptmmax"], | |
"Maximum allowed melting temperature for the amplicon.", | |
), | |
# Note to self, should be -oexcludedregion not -oexcluderegion | |
_Option( | |
["-oexcludedregion", "oexcludedregion"], | |
"Do not pick internal oligos in this region.", | |
), | |
_Option(["-oligoinput", "oligoinput"], "Sequence of the internal oligo."), | |
# Oligo length: | |
_Option(["-osizeopt", "osizeopt"], "Optimum length of internal oligo."), | |
_Option(["-ominsize", "ominsize"], "Minimum length of internal oligo."), | |
_Option(["-omaxsize", "omaxsize"], "Maximum length of internal oligo."), | |
# Oligo GC temperature: | |
_Option( | |
["-otmopt", "otmopt"], "Optimum melting temperature of internal oligo." | |
), | |
_Option( | |
["-otmmin", "otmmin"], "Minimum melting temperature of internal oligo." | |
), | |
_Option( | |
["-otmmax", "otmmax"], "Maximum melting temperature of internal oligo." | |
), | |
# Oligo GC percent: | |
_Option(["-ogcopt", "ogcopt"], "Optimum GC% for internal oligo."), | |
_Option(["-ogcmin", "ogcmin"], "Minimum GC% for internal oligo."), | |
_Option(["-ogcmax", "ogcmax"], "Maximum GC% for internal oligo."), | |
# Oligo salt concentration: | |
_Option( | |
["-osaltconc", "osaltconc"], | |
"Millimolar concentration of salt in the hybridisation.", | |
), | |
_Option( | |
["-odnaconc", "odnaconc"], | |
"Nanomolar concentration of internal oligo in the hybridisation.", | |
), | |
# Oligo self complementarity | |
_Option( | |
["-oanyself", "oanyself"], | |
"Maximum allowable alignment score for self-complementarity.", | |
), | |
_Option( | |
["-oendself", "oendself"], | |
"Max 3'-anchored self-complementarity global alignment score.", | |
), | |
_Option( | |
["-opolyxmax", "opolyxmax"], | |
"Maximum length of mononucleotide repeat in internal oligo.", | |
), | |
_Option( | |
["-mispriminglibraryfile", "mispriminglibraryfile"], | |
"File containing library of sequences to avoid amplifying", | |
), | |
_Option( | |
["-maxmispriming", "maxmispriming"], | |
"Maximum allowed similarity of primers to sequences in " | |
"library specified by -mispriminglibrary", | |
), | |
_Option( | |
["-omishybmax", "omishybmax"], | |
"Maximum alignment score for hybridisation of internal oligo to " | |
"library specified by -mishyblibraryfile.", | |
), | |
_Option( | |
["-mishyblibraryfile", "mishyblibraryfile"], | |
"Library file of seqs to avoid internal oligo hybridisation.", | |
), | |
_Option( | |
["-explainflag", "explainflag"], | |
"Produce output tags with eprimer3 statistics", | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class PrimerSearchCommandline(_EmbossCommandLine): | |
"""Commandline object for the primersearch program from EMBOSS.""" | |
def __init__(self, cmd="primersearch", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-seqall", "-sequences", "sequences", "seqall"], | |
"Sequence to look for the primer pairs in.", | |
is_required=True, | |
), | |
# When this wrapper was written primersearch used -sequences | |
# as the argument name. Since at least EMBOSS 5.0 (and | |
# perhaps earlier) this has been -seqall instead. | |
_Option( | |
["-infile", "-primers", "primers", "infile"], | |
"File containing the primer pairs to search for.", | |
filename=True, | |
is_required=True, | |
), | |
# When this wrapper was written primersearch used -primers | |
# as the argument name. Since at least EMBOSS 5.0 (and | |
# perhaps earlier) this has been -infile instead. | |
_Option( | |
["-mismatchpercent", "mismatchpercent"], | |
"Allowed percentage mismatch (any integer value, default 0).", | |
is_required=True, | |
), | |
_Option( | |
["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)" | |
), | |
_Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class FDNADistCommandline(_EmbossCommandLine): | |
"""Commandline object for the fdnadist program from EMBOSS. | |
fdnadist is an EMBOSS wrapper for the PHYLIP program dnadist for | |
calculating distance matrices from DNA sequence files. | |
""" | |
def __init__(self, cmd="fdnadist", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], | |
"seq file to use (phylip)", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-method", "method"], "sub. model [f,k,j,l,s]", is_required=True), | |
_Option(["-gamma", "gamma"], "gamma [g, i,n]"), | |
_Option( | |
["-ncategories", "ncategories"], "number of rate catergories (1-9)" | |
), | |
_Option(["-rate", "rate"], "rate for each category"), | |
_Option( | |
["-categories", "categories"], "File of substitution rate categories" | |
), | |
_Option(["-weights", "weights"], "weights file"), | |
_Option( | |
["-gammacoefficient", "gammacoefficient"], "value for gamma (> 0.001)" | |
), | |
_Option(["-invarfrac", "invarfrac"], "proportoin of invariant sites"), | |
_Option(["-ttratio", "ttratio"], "ts/tv ratio"), | |
_Option(["-freqsfrom", "freqsfrom"], "use emprical base freqs"), | |
_Option(["-basefreq", "basefreq"], "specify basefreqs"), | |
_Option(["-lower", "lower"], "lower triangle matrix (y/N)"), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class FTreeDistCommandline(_EmbossCommandLine): | |
"""Commandline object for the ftreedist program from EMBOSS. | |
ftreedist is an EMBOSS wrapper for the PHYLIP program treedist used for | |
calculating distance measures between phylogentic trees. | |
""" | |
def __init__(self, cmd="ftreedist", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-intreefile", "intreefile"], | |
"tree file to score (phylip)", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-dtype", "dtype"], "distance type ([S]ymetric, [b]ranch score)"), | |
_Option( | |
["-pairing", "pairing"], | |
"tree pairing method ([A]djacent pairs, all [p]ossible pairs)", | |
), | |
_Option(["-style", "style"], "output style - [V]erbose, [f]ill, [s]parse"), | |
_Option(["-noroot", "noroot"], "treat trees as rooted [N/y]"), | |
_Option( | |
["-outgrno", "outgrno"], | |
"which taxon to root the trees with (starts from 0)", | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class FNeighborCommandline(_EmbossCommandLine): | |
"""Commandline object for the fneighbor program from EMBOSS. | |
fneighbor is an EMBOSS wrapper for the PHYLIP program neighbor used for | |
calculating neighbor-joining or UPGMA trees from distance matrices. | |
""" | |
def __init__(self, cmd="fneighbor", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-datafile", "datafile"], | |
"dist file to use (phylip)", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-matrixtype", "matrixtype"], | |
"is matrix square (S), upper (U) or lower (L)", | |
), | |
_Option(["-treetype", "treetype"], "nj or UPGMA tree (n/u)"), | |
_Option(["-outgrno", "outgrno"], "taxon to use as OG"), | |
_Option(["-jumble", "jumble"], "randommise input order (Y/n)"), | |
_Option(["-seed", "seed"], "provide a random seed"), | |
_Option(["-trout", "trout"], "write tree (Y/n)"), | |
_Option(["-outtreefile", "outtreefile"], "filename for output tree"), | |
_Option(["-progress", "progress"], "print progress (Y/n)"), | |
_Option(["-treeprint", "treeprint"], "print tree (Y/n)"), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class FSeqBootCommandline(_EmbossCommandLine): | |
"""Commandline object for the fseqboot program from EMBOSS. | |
fseqboot is an EMBOSS wrapper for the PHYLIP program seqboot used to | |
pseudo-sample alignment files. | |
""" | |
def __init__(self, cmd="fseqboot", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], | |
"seq file to sample (phylip)", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-categories", "catergories"], "file of input categories"), | |
_Option(["-weights", "weights"], " weights file"), | |
_Option(["-test", "test"], "specify operation, default is bootstrap"), | |
_Option(["-regular", "regular"], "absolute number to resample"), | |
_Option(["-fracsample", "fracsample"], "fraction to resample"), | |
_Option( | |
["-rewriteformat", "rewriteformat"], | |
"output format ([P]hyilp, [n]exus, [x]ml", | |
), | |
_Option(["-seqtype", "seqtype"], "output format ([D]na, [p]rotein, [r]na"), | |
_Option(["-blocksize", "blocksize"], "print progress (Y/n)"), | |
_Option(["-reps", "reps"], "how many replicates, defaults to 100)"), | |
_Option( | |
["-justweights", "jusweights"], | |
"what to write out [D]atasets of just [w]eights", | |
), | |
_Option(["-seed", "seed"], "specify random seed"), | |
_Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]"), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class FDNAParsCommandline(_EmbossCommandLine): | |
"""Commandline object for the fdnapars program from EMBOSS. | |
fdnapars is an EMBOSS version of the PHYLIP program dnapars, for | |
estimating trees from DNA sequences using parsiomny. Calling this command | |
without providing a value for the option "-intreefile" will invoke | |
"interactive mode" (and as a result fail if called with subprocess) if | |
"-auto" is not set to true. | |
""" | |
def __init__(self, cmd="fdnapars", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], | |
"seq file to use (phylip)", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-intreefile", "intreefile"], "Phylip tree file"), | |
_Option(["-weights", "weights"], "weights file"), | |
_Option(["-maxtrees", "maxtrees"], "max trees to save during run"), | |
_Option(["-thorough", "thorough"], "more thorough search (Y/n)"), | |
_Option(["-rearrange", "rearrange"], "Rearrange on just 1 best tree (Y/n)"), | |
_Option( | |
["-transversion", "transversion"], "Use tranversion parsimony (y/N)" | |
), | |
_Option( | |
["-njumble", "njumble"], | |
"number of times to randomise input order (default is 0)", | |
), | |
_Option(["-seed", "seed"], "provide random seed"), | |
_Option(["-outgrno", "outgrno"], "Specify outgroup"), | |
_Option(["-thresh", "thresh"], "Use threshold parsimony (y/N)"), | |
_Option(["-threshold", "threshold"], "Threshold value"), | |
_Option(["-trout", "trout"], "Write trees to file (Y/n)"), | |
_Option(["-outtreefile", "outtreefile"], "filename for output tree"), | |
_Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]"), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class FProtParsCommandline(_EmbossCommandLine): | |
"""Commandline object for the fdnapars program from EMBOSS. | |
fprotpars is an EMBOSS version of the PHYLIP program protpars, for | |
estimating trees from protein sequences using parsiomny. Calling this | |
command without providing a value for the option "-intreefile" will invoke | |
"interactive mode" (and as a result fail if called with subprocess) if | |
"-auto" is not set to true. | |
""" | |
def __init__(self, cmd="fprotpars", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], | |
"seq file to use (phylip)", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-intreefile", "intreefile"], "Phylip tree file to score"), | |
_Option( | |
["-outtreefile", "outtreefile"], | |
"phylip tree output file", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-weights", "weights"], "weights file"), | |
_Option(["-whichcode", "whichcode"], "which genetic code, [U,M,V,F,Y]]"), | |
_Option( | |
["-njumble", "njumble"], | |
"number of times to randomise input order (default is 0)", | |
), | |
_Option(["-seed", "seed"], "provide random seed"), | |
_Option(["-outgrno", "outgrno"], "Specify outgroup"), | |
_Option(["-thresh", "thresh"], "Use threshold parsimony (y/N)"), | |
_Option(["-threshold", "threshold"], "Threshold value"), | |
_Option(["-trout", "trout"], "Write trees to file (Y/n)"), | |
_Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]"), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class FProtDistCommandline(_EmbossCommandLine): | |
"""Commandline object for the fprotdist program from EMBOSS. | |
fprotdist is an EMBOSS wrapper for the PHYLIP program protdist used to | |
estimate trees from protein sequences using parsimony | |
""" | |
def __init__(self, cmd="fprotdist", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], | |
"seq file to use (phylip)", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-ncategories", "ncategories"], "number of rate catergories (1-9)" | |
), | |
_Option(["-rate", "rate"], "rate for each category"), | |
_Option(["-catergories", "catergories"], "file of rates"), | |
_Option(["-weights", "weights"], "weights file"), | |
_Option(["-method", "method"], "sub. model [j,h,d,k,s,c]"), | |
_Option(["-gamma", "gamma"], "gamma [g, i,c]"), | |
_Option( | |
["-gammacoefficient", "gammacoefficient"], "value for gamma (> 0.001)" | |
), | |
_Option( | |
["-invarcoefficient", "invarcoefficient"], | |
"float for variation of substitution rate among sites", | |
), | |
_Option(["-aacateg", "aacateg"], "Choose the category to use [G,C,H]"), | |
_Option(["-whichcode", "whichcode"], "genetic code [c,m,v,f,y]"), | |
_Option(["-ease", "ease"], "Pob change catergory (float between -0 and 1)"), | |
_Option(["-ttratio", "ttratio"], "Transition/transversion ratio (0-1)"), | |
_Option( | |
["-basefreq", "basefreq"], "DNA base frequencies (space separated list)" | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class FConsenseCommandline(_EmbossCommandLine): | |
"""Commandline object for the fconsense program from EMBOSS. | |
fconsense is an EMBOSS wrapper for the PHYLIP program consense used to | |
calculate consensus trees. | |
""" | |
def __init__(self, cmd="fconsense", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-intreefile", "intreefile"], | |
"file with phylip trees to make consensus from", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-method", "method"], "consensus method [s, mr, MRE, ml]"), | |
_Option( | |
["-mlfrac", "mlfrac"], | |
"cut-off freq for branch to appear in consensus (0.5-1.0)", | |
), | |
_Option(["-root", "root"], "treat trees as rooted (YES, no)"), | |
_Option(["-outgrno", "outgrno"], "OTU to use as outgroup (starts from 0)"), | |
_Option(["-trout", "trout"], "treat trees as rooted (YES, no)"), | |
_Option( | |
["-outtreefile", "outtreefile"], "Phylip tree output file (optional)" | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class WaterCommandline(_EmbossCommandLine): | |
"""Commandline object for the water program from EMBOSS.""" | |
def __init__(self, cmd="water", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-asequence", "asequence"], | |
"First sequence to align", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-bsequence", "bsequence"], | |
"Second sequence to align", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True), | |
_Option( | |
["-gapextend", "gapextend"], "Gap extension penalty", is_required=True | |
), | |
_Option(["-datafile", "datafile"], "Matrix file", filename=True), | |
_Switch( | |
["-nobrief", "nobrief"], "Display extended identity and similarity" | |
), | |
_Switch(["-brief", "brief"], "Display brief identity and similarity"), | |
_Option( | |
["-similarity", "similarity"], "Display percent identity and similarity" | |
), | |
_Option( | |
["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)" | |
), | |
_Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"), | |
_Option( | |
["-aformat", "aformat"], | |
"Display output in a different specified output format", | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class NeedleCommandline(_EmbossCommandLine): | |
"""Commandline object for the needle program from EMBOSS.""" | |
def __init__(self, cmd="needle", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-asequence", "asequence"], | |
"First sequence to align", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-bsequence", "bsequence"], | |
"Second sequence to align", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True), | |
_Option( | |
["-gapextend", "gapextend"], "Gap extension penalty", is_required=True | |
), | |
_Option(["-datafile", "datafile"], "Matrix file", filename=True), | |
_Option(["-endweight", "endweight"], "Apply And gap penalties"), | |
_Option( | |
["-endopen", "endopen"], | |
"The score taken away when an end gap is created.", | |
), | |
_Option( | |
["-endextend", "endextend"], | |
"The score added to the end gap penalty for each base or " | |
"residue in the end gap.", | |
), | |
_Switch( | |
["-nobrief", "nobrief"], "Display extended identity and similarity" | |
), | |
_Switch(["-brief", "brief"], "Display brief identity and similarity"), | |
_Option( | |
["-similarity", "similarity"], "Display percent identity and similarity" | |
), | |
_Option( | |
["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)" | |
), | |
_Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"), | |
_Option( | |
["-aformat", "aformat"], | |
"Display output in a different specified output format", | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class NeedleallCommandline(_EmbossCommandLine): | |
"""Commandline object for the needleall program from EMBOSS.""" | |
def __init__(self, cmd="needleall", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-asequence", "asequence"], | |
"First sequence to align", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-bsequence", "bsequence"], | |
"Second sequence to align", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True), | |
_Option( | |
["-gapextend", "gapextend"], "Gap extension penalty", is_required=True | |
), | |
_Option(["-datafile", "datafile"], "Matrix file", filename=True), | |
_Option( | |
["-minscore", "minscore"], | |
"Exclude alignments with scores below this threshold score.", | |
), | |
_Option(["-errorfile", "errorfile"], "Error file to be written to."), | |
_Option(["-endweight", "endweight"], "Apply And gap penalties"), | |
_Option( | |
["-endopen", "endopen"], | |
"The score taken away when an end gap is created.", | |
), | |
_Option( | |
["-endextend", "endextend"], | |
"The score added to the end gap penalty for each base or " | |
"residue in the end gap.", | |
), | |
_Switch( | |
["-nobrief", "nobrief"], "Display extended identity and similarity" | |
), | |
_Switch(["-brief", "brief"], "Display brief identity and similarity"), | |
_Option( | |
["-similarity", "similarity"], "Display percent identity and similarity" | |
), | |
_Option( | |
["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)" | |
), | |
_Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"), | |
_Option( | |
["-aformat", "aformat"], | |
"Display output in a different specified output format", | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class StretcherCommandline(_EmbossCommandLine): | |
"""Commandline object for the stretcher program from EMBOSS.""" | |
def __init__(self, cmd="stretcher", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-asequence", "asequence"], | |
"First sequence to align", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-bsequence", "bsequence"], | |
"Second sequence to align", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-gapopen", "gapopen"], | |
"Gap open penalty", | |
is_required=True, | |
checker_function=lambda value: isinstance(value, int), | |
), | |
_Option( | |
["-gapextend", "gapextend"], | |
"Gap extension penalty", | |
is_required=True, | |
checker_function=lambda value: isinstance(value, int), | |
), | |
_Option(["-datafile", "datafile"], "Matrix file", filename=True), | |
_Option( | |
["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)" | |
), | |
_Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"), | |
_Option( | |
["-aformat", "aformat"], | |
"Display output in a different specified output format", | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class FuzznucCommandline(_EmbossCommandLine): | |
"""Commandline object for the fuzznuc program from EMBOSS.""" | |
def __init__(self, cmd="fuzznuc", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], "Sequence database USA", is_required=True | |
), | |
_Option( | |
["-pattern", "pattern"], | |
"Search pattern, using standard IUPAC one-letter codes", | |
is_required=True, | |
), | |
_Option(["-pmismatch", "pmismatch"], "Number of mismatches"), | |
_Option(["-complement", "complement"], "Search complementary strand"), | |
_Option(["-rformat", "rformat"], "Specify the report format to output in."), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class FuzzproCommandline(_EmbossCommandLine): | |
"""Commandline object for the fuzzpro program from EMBOSS.""" | |
def __init__(self, cmd="fuzzpro", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], "Sequence database USA", is_required=True | |
), | |
_Option( | |
["-pattern", "pattern"], | |
"Search pattern, using standard IUPAC one-letter codes", | |
is_required=True, | |
), | |
_Option(["-pmismatch", "pmismatch"], "Number of mismatches"), | |
_Option(["-rformat", "rformat"], "Specify the report format to output in."), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class Est2GenomeCommandline(_EmbossCommandLine): | |
"""Commandline object for the est2genome program from EMBOSS.""" | |
def __init__(self, cmd="est2genome", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option(["-est", "est"], "EST sequence(s)", is_required=True), | |
_Option(["-genome", "genome"], "Genomic sequence", is_required=True), | |
_Option(["-match", "match"], "Score for matching two bases"), | |
_Option(["-mismatch", "mismatch"], "Cost for mismatching two bases"), | |
_Option( | |
["-gappenalty", "gappenalty"], | |
"Cost for deleting a single base in either sequence, " | |
"excluding introns", | |
), | |
_Option( | |
["-intronpenalty", "intronpenalty"], | |
"Cost for an intron, independent of length.", | |
), | |
_Option( | |
["-splicepenalty", "splicepenalty"], | |
"Cost for an intron, independent of length " | |
"and starting/ending on donor-acceptor sites", | |
), | |
_Option( | |
["-minscore", "minscore"], | |
"Exclude alignments with scores below this threshold score.", | |
), | |
_Option( | |
["-reverse", "reverse"], "Reverse the orientation of the EST sequence" | |
), | |
_Option(["-splice", "splice"], "Use donor and acceptor splice sites."), | |
_Option( | |
["-mode", "mode"], | |
"This determines the comparison mode. 'both', 'forward', or 'reverse'", | |
), | |
_Option( | |
["-best", "best"], | |
"You can print out all comparisons instead of just the best", | |
), | |
_Option(["-space", "space"], "for linear-space recursion."), | |
_Option(["-shuffle", "shuffle"], "Shuffle"), | |
_Option(["-seed", "seed"], "Random number seed"), | |
_Option(["-align", "align"], "Show the alignment."), | |
_Option(["-width", "width"], "Alignment width"), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class ETandemCommandline(_EmbossCommandLine): | |
"""Commandline object for the etandem program from EMBOSS.""" | |
def __init__(self, cmd="etandem", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], "Sequence", filename=True, is_required=True | |
), | |
_Option( | |
["-minrepeat", "minrepeat"], "Minimum repeat size", is_required=True | |
), | |
_Option( | |
["-maxrepeat", "maxrepeat"], "Maximum repeat size", is_required=True | |
), | |
_Option(["-threshold", "threshold"], "Threshold score"), | |
_Option(["-mismatch", "mismatch"], "Allow N as a mismatch"), | |
_Option(["-uniform", "uniform"], "Allow uniform consensus"), | |
_Option(["-rformat", "rformat"], "Output report format"), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class EInvertedCommandline(_EmbossCommandLine): | |
"""Commandline object for the einverted program from EMBOSS.""" | |
def __init__(self, cmd="einverted", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], "Sequence", filename=True, is_required=True | |
), | |
_Option(["-gap", "gap"], "Gap penalty", filename=True, is_required=True), | |
_Option( | |
["-threshold", "threshold"], "Minimum score threshold", is_required=True | |
), | |
_Option(["-match", "match"], "Match score", is_required=True), | |
_Option(["-mismatch", "mismatch"], "Mismatch score", is_required=True), | |
_Option( | |
["-maxrepeat", "maxrepeat"], | |
"Maximum separation between the start and end of repeat", | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class PalindromeCommandline(_EmbossCommandLine): | |
"""Commandline object for the palindrome program from EMBOSS.""" | |
def __init__(self, cmd="palindrome", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], "Sequence", filename=True, is_required=True | |
), | |
_Option( | |
["-minpallen", "minpallen"], | |
"Minimum palindrome length", | |
is_required=True, | |
), | |
_Option( | |
["-maxpallen", "maxpallen"], | |
"Maximum palindrome length", | |
is_required=True, | |
), | |
_Option( | |
["-gaplimit", "gaplimit"], | |
"Maximum gap between repeats", | |
is_required=True, | |
), | |
_Option( | |
["-nummismatches", "nummismatches"], | |
"Number of mismatches allowed", | |
is_required=True, | |
), | |
_Option( | |
["-overlap", "overlap"], "Report overlapping matches", is_required=True | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class TranalignCommandline(_EmbossCommandLine): | |
"""Commandline object for the tranalign program from EMBOSS.""" | |
def __init__(self, cmd="tranalign", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-asequence", "asequence"], | |
"Nucleotide sequences to be aligned.", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-bsequence", "bsequence"], | |
"Protein sequence alignment", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-outseq", "outseq"], | |
"Output sequence file.", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-table", "table"], "Code to use"), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class DiffseqCommandline(_EmbossCommandLine): | |
"""Commandline object for the diffseq program from EMBOSS.""" | |
def __init__(self, cmd="diffseq", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-asequence", "asequence"], | |
"First sequence to compare", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-bsequence", "bsequence"], | |
"Second sequence to compare", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-wordsize", "wordsize"], | |
"Word size to use for comparisons (10 default)", | |
is_required=True, | |
), | |
_Option( | |
["-aoutfeat", "aoutfeat"], | |
"File for output of first sequence's features", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-boutfeat", "boutfeat"], | |
"File for output of second sequence's features", | |
filename=True, | |
is_required=True, | |
), | |
_Option(["-rformat", "rformat"], "Output report file format"), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
class IepCommandline(_EmbossCommandLine): | |
"""Commandline for EMBOSS iep: calculated isoelectric point and charge. | |
Examples | |
-------- | |
>>> from Bio.Emboss.Applications import IepCommandline | |
>>> iep_cline = IepCommandline(sequence="proteins.faa", | |
... outfile="proteins.txt") | |
>>> print(iep_cline) | |
iep -outfile=proteins.txt -sequence=proteins.faa | |
You would typically run the command line with iep_cline() or via the | |
Python subprocess module, as described in the Biopython tutorial. | |
""" | |
def __init__(self, cmd="iep", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], | |
"Protein sequence(s) filename", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-amino", "amino"], | |
"""Number of N-termini | |
Integer 0 (default) or more. | |
""", | |
), | |
_Option( | |
["-carboxyl", "carboxyl"], | |
"""Number of C-termini | |
Integer 0 (default) or more. | |
""", | |
), | |
_Option( | |
["-lysinemodified", "lysinemodified"], | |
"""Number of modified lysines | |
Integer 0 (default) or more. | |
""", | |
), | |
_Option( | |
["-disulphides", "disulphides"], | |
"""Number of disulphide bridges | |
Integer 0 (default) or more. | |
""", | |
), | |
# Should we implement the -termini switch as well? | |
_Option( | |
["-notermini", "notermini"], | |
"Exclude (True) or include (False) charge at N and C terminus.", | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
# seqret uses -outseq, not -outfile, so use the base class: | |
class SeqretCommandline(_EmbossMinimalCommandLine): | |
"""Commandline object for the seqret program from EMBOSS. | |
This tool allows you to interconvert between different sequence file | |
formats (e.g. GenBank to FASTA). Combining Biopython's Bio.SeqIO module | |
with seqret using a suitable intermediate file format can allow you to | |
read/write to an even wider range of file formats. | |
This wrapper currently only supports the core functionality, things like | |
feature tables (in EMBOSS 6.1.0 onwards) are not yet included. | |
""" | |
def __init__(self, cmd="seqret", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], "Input sequence(s) filename", filename=True | |
), | |
_Option(["-outseq", "outseq"], "Output sequence file.", filename=True), | |
_Option( | |
["-sformat", "sformat"], | |
"Input sequence(s) format (e.g. fasta, genbank)", | |
), | |
_Option( | |
["-osformat", "osformat"], | |
"Output sequence(s) format (e.g. fasta, genbank)", | |
), | |
] | |
_EmbossMinimalCommandLine.__init__(self, cmd, **kwargs) | |
def _validate(self): | |
# Check the outfile, filter, or stdout option has been set. | |
# We can't simply do this via the required flag for the outfile | |
# output - this seems the simplest solution. | |
if not (self.outseq or self.filter or self.stdout): | |
raise ValueError( | |
"You must either set outfile (output filename), " | |
"or enable filter or stdout (output to stdout)." | |
) | |
if not (self.sequence or self.filter or self.stdint): | |
raise ValueError( | |
"You must either set sequence (input filename), " | |
"or enable filter or stdin (input from stdin)." | |
) | |
return _EmbossMinimalCommandLine._validate(self) | |
class SeqmatchallCommandline(_EmbossCommandLine): | |
"""Commandline object for the seqmatchall program from EMBOSS. | |
e.g. | |
>>> cline = SeqmatchallCommandline(sequence="opuntia.fasta", outfile="opuntia.txt") | |
>>> cline.auto = True | |
>>> cline.wordsize = 18 | |
>>> cline.aformat = "pair" | |
>>> print(cline) | |
seqmatchall -auto -outfile=opuntia.txt -sequence=opuntia.fasta -wordsize=18 -aformat=pair | |
""" | |
def __init__(self, cmd="seqmatchall", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Option( | |
["-sequence", "sequence"], | |
"Readable set of sequences", | |
filename=True, | |
is_required=True, | |
), | |
_Option( | |
["-wordsize", "wordsize"], "Word size (Integer 2 or more, default 4)" | |
), | |
_Option( | |
["-aformat", "aformat"], | |
"Display output in a different specified output format", | |
), | |
] | |
_EmbossCommandLine.__init__(self, cmd, **kwargs) | |
if __name__ == "__main__": | |
from Bio._utils import run_doctest | |
run_doctest() | |