Spaces:
No application file
No application file
# Copyright 2013 by Nate Sutton. | |
# Based on code in _Phyml.py by Eric Talevich. | |
# All rights reserved. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Command-line wrapper for tree inference program Fasttree.""" | |
from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline | |
def _is_int(x): | |
"""Test whether the argument can be serialized as an integer (PRIVATE).""" | |
return isinstance(x, int) or str(x).isdigit() | |
def _is_numeric(x): | |
"""Test whether the argument can be serialized as a number (PRIVATE).""" | |
try: | |
float(str(x)) | |
return True | |
except ValueError: | |
return False | |
class FastTreeCommandline(AbstractCommandline): | |
r"""Command-line wrapper for FastTree. | |
Only the ``input`` and ``out`` parameters are mandatory. | |
From the terminal command line use ``fasttree.exe -help`` or ``fasttree.exe -expert`` | |
for more explanation of usage options. | |
Homepage: http://www.microbesonline.org/fasttree/ | |
References | |
---------- | |
Price, M.N., Dehal, P.S., and Arkin, A.P. (2010) FastTree 2 -- Approximately | |
Maximum-Likelihood Trees for Large Alignments. PLoS ONE, 5(3):e9490. | |
https://doi.org/10.1371/journal.pone.0009490. | |
Examples | |
-------- | |
This is an example on Windows:: | |
import _Fasttree | |
fasttree_exe = r"C:\FasttreeWin32\fasttree.exe" | |
cmd = _Fasttree.FastTreeCommandline(fasttree_exe, | |
... input=r'C:\Input\ExampleAlignment.fsa', | |
... out=r'C:\Output\ExampleTree.tree') | |
print(cmd) | |
out, err = cmd() | |
print(out) | |
print(err) | |
""" | |
def __init__(self, cmd="fasttree", **kwargs): | |
"""Initialize the class.""" | |
self.parameters = [ | |
_Switch( | |
["-nt", "nt"], | |
"By default FastTree expects protein alignments, use -nt for nucleotides", | |
), | |
_Option( | |
["-n", "n"], | |
"""-n -- read N multiple alignments in. | |
This only works with phylip interleaved format. For example, you can | |
use it with the output from phylip's seqboot. If you use -n, FastTree | |
will write 1 tree per line to standard output. | |
""", | |
checker_function=_is_int, | |
equate=False, | |
), | |
_Switch( | |
["-quote", "quote"], | |
"""-quote -- add quotes to sequence names in output. | |
Quote sequence names in the output and allow spaces, commas, | |
parentheses, and colons in them but not ' characters (fasta files only). | |
""", | |
), | |
_Option( | |
["-pseudo", "pseudo"], | |
"""-pseudo [weight] -- Pseudocounts are used with sequence distance estimation. | |
Use pseudocounts to estimate distances between sequences with little or no | |
overlap. (Off by default.) Recommended if analyzing the alignment has | |
sequences with little or no overlap. | |
If the weight is not specified, it is 1.0 | |
""", | |
checker_function=_is_numeric, | |
equate=False, | |
), | |
_Option( | |
["-boot", "boot"], | |
"""Specify the number of resamples for support values. | |
Support value options: | |
By default, FastTree computes local support values by resampling the site | |
likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome, | |
it will compute minimum-evolution bootstrap supports instead | |
In either case, the support values are proportions ranging from 0 to 1 | |
Use -nosupport to turn off support values or -boot 100 to use just 100 resamples. | |
""", | |
checker_function=_is_int, | |
equate=False, | |
), | |
_Switch( | |
["-nosupport", "nosupport"], | |
"""Turn off support values. | |
Support value options: | |
By default, FastTree computes local support values by resampling the site | |
likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome, | |
it will compute minimum-evolution bootstrap supports instead | |
In either case, the support values are proportions ranging from 0 to 1 | |
Use -nosupport to turn off support values or -boot 100 to use just 100 resamples. | |
""", | |
), | |
_Option( | |
["-intree", "intree"], | |
"""-intree newickfile -- read the starting tree in from newickfile. | |
Any branch lengths in the starting trees are ignored. | |
-intree with -n will read a separate starting tree for each alignment. | |
""", | |
filename=True, | |
equate=False, | |
), | |
_Option( | |
["-intree1", "intree1"], | |
"intree1 newickfile -- read the same starting tree for each alignment.", | |
filename=True, | |
equate=False, | |
), | |
_Switch( | |
["-quiet", "quiet"], | |
"""-quiet -- do not write to standard error during normal operation | |
(no progress indicator, no options summary, no likelihood values, etc.) | |
""", | |
), | |
_Switch( | |
["-nopr", "nopr"], | |
"-nopr -- do not write the progress indicator to stderr.", | |
), | |
_Option( | |
["-nni", "nni"], | |
"""Set the rounds of minimum-evolution nearest-neighbor interchanges | |
Topology refinement: | |
By default, FastTree tries to improve the tree with up to 4*log2(N) | |
rounds of minimum-evolution nearest-neighbor interchanges (NNI), | |
where N is the number of unique sequences, 2 rounds of | |
subtree-prune-regraft (SPR) moves (also min. evo.), and | |
up to 2*log(N) rounds of maximum-likelihood NNIs. | |
Use -nni to set the number of rounds of min. evo. NNIs. | |
""", | |
checker_function=_is_int, | |
equate=False, | |
), | |
_Option( | |
["-spr", "spr"], | |
"""Set the rounds of subtree-prune-regraft moves | |
Topology refinement: | |
By default, FastTree tries to improve the tree with up to 4*log2(N) | |
rounds of minimum-evolution nearest-neighbor interchanges (NNI), | |
where N is the number of unique sequences, 2 rounds of | |
subtree-prune-regraft (SPR) moves (also min. evo.), and | |
up to 2*log(N) rounds of maximum-likelihood NNIs. | |
Use -nni to set the number of rounds of min. evo. NNIs, | |
and -spr to set the rounds of SPRs. | |
""", | |
checker_function=_is_int, | |
equate=False, | |
), | |
_Switch( | |
["-noml", "noml"], | |
"""Deactivate min-evo NNIs and SPRs. | |
Topology refinement: | |
By default, FastTree tries to improve the tree with up to 4*log2(N) | |
rounds of minimum-evolution nearest-neighbor interchanges (NNI), | |
where N is the number of unique sequences, 2 rounds of | |
subtree-prune-regraft (SPR) moves (also min. evo.), and | |
up to 2*log(N) rounds of maximum-likelihood NNIs. | |
Use -nni to set the number of rounds of min. evo. NNIs, | |
and -spr to set the rounds of SPRs. | |
Use -noml to turn off both min-evo NNIs and SPRs (useful if refining | |
an approximately maximum-likelihood tree with further NNIs). | |
""", | |
), | |
_Switch( | |
["-mllen", "mllen"], | |
"""Optimize branch lengths on a fixed topology. | |
Topology refinement: | |
By default, FastTree tries to improve the tree with up to 4*log2(N) | |
rounds of minimum-evolution nearest-neighbor interchanges (NNI), | |
where N is the number of unique sequences, 2 rounds of | |
subtree-prune-regraft (SPR) moves (also min. evo.), and | |
up to 2*log(N) rounds of maximum-likelihood NNIs. | |
Use -nni to set the number of rounds of min. evo. NNIs, | |
and -spr to set the rounds of SPRs. | |
Use -mllen to optimize branch lengths without ML NNIs | |
Use -mllen -nome with -intree to optimize branch lengths on a fixed topology. | |
""", | |
), | |
_Switch( | |
["-nome", "nome"], | |
"""Changes support values calculation to a minimum-evolution bootstrap method. | |
Topology refinement: | |
By default, FastTree tries to improve the tree with up to 4*log2(N) | |
rounds of minimum-evolution nearest-neighbor interchanges (NNI), | |
where N is the number of unique sequences, 2 rounds of | |
subtree-prune-regraft (SPR) moves (also min. evo.), and | |
up to 2*log(N) rounds of maximum-likelihood NNIs. | |
Use -nni to set the number of rounds of min. evo. NNIs, | |
and -spr to set the rounds of SPRs. | |
Use -mllen to optimize branch lengths without ML NNIs | |
Use -mllen -nome with -intree to optimize branch lengths on a fixed topology | |
Support value options: | |
By default, FastTree computes local support values by resampling the site | |
likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome, | |
it will compute minimum-evolution bootstrap supports instead | |
In either case, the support values are proportions ranging from 0 to 1. | |
""", | |
), | |
_Option( | |
["-mlnni", "mlnni"], | |
"""Set the number of rounds of maximum-likelihood NNIs. | |
Topology refinement: | |
By default, FastTree tries to improve the tree with up to 4*log2(N) | |
rounds of minimum-evolution nearest-neighbor interchanges (NNI), | |
where N is the number of unique sequences, 2 rounds of | |
subtree-prune-regraft (SPR) moves (also min. evo.), and | |
up to 2*log(N) rounds of maximum-likelihood NNIs. | |
Use -nni to set the number of rounds of min. evo. NNIs, | |
and -spr to set the rounds of SPRs. | |
Use -mlnni to set the number of rounds of maximum-likelihood NNIs. | |
""", | |
checker_function=_is_int, | |
equate=False, | |
), | |
_Option( | |
["-mlacc", "mlacc"], | |
"""Option for optimization of branches at each NNI. | |
Topology refinement: | |
By default, FastTree tries to improve the tree with up to 4*log2(N) | |
rounds of minimum-evolution nearest-neighbor interchanges (NNI), | |
where N is the number of unique sequences, 2 rounds of | |
subtree-prune-regraft (SPR) moves (also min. evo.), and | |
up to 2*log(N) rounds of maximum-likelihood NNIs. | |
Use -nni to set the number of rounds of min. evo. NNIs, | |
and -spr to set the rounds of SPRs. | |
Use -mlacc 2 or -mlacc 3 to always optimize all 5 branches at each NNI, | |
and to optimize all 5 branches in 2 or 3 rounds. | |
""", | |
checker_function=_is_int, | |
equate=False, | |
), | |
_Switch( | |
["-slownni", "slownni"], | |
"""Turn off heuristics to avoid constant subtrees with NNIs. | |
Topology refinement: | |
By default, FastTree tries to improve the tree with up to 4*log2(N) | |
rounds of minimum-evolution nearest-neighbor interchanges (NNI), | |
where N is the number of unique sequences, 2 rounds of | |
subtree-prune-regraft (SPR) moves (also min. evo.), and | |
up to 2*log(N) rounds of maximum-likelihood NNIs. | |
Use -nni to set the number of rounds of min. evo. NNIs, | |
and -spr to set the rounds of SPRs. | |
Use -slownni to turn off heuristics to avoid constant subtrees | |
(affects both ML and ME NNIs). | |
""", | |
), | |
_Switch( | |
["-wag", "wag"], | |
"""Maximum likelihood model options. | |
Whelan-And-Goldman 2001 model instead of (default) | |
Jones-Taylor-Thorton 1992 model (a.a. only) | |
""", | |
), | |
_Switch( | |
["-gtr", "gtr"], | |
"""Maximum likelihood model options. | |
Use generalized time-reversible instead of (default) | |
Jukes-Cantor (nt only) | |
""", | |
), | |
_Option( | |
["-cat", "cat"], | |
"""Maximum likelihood model options. | |
Specify the number of rate categories of sites (default 20).""", | |
checker_function=_is_int, | |
equate=False, | |
), | |
_Switch( | |
["-nocat", "nocat"], | |
"Maximum likelihood model options: No CAT model (just 1 category)", | |
), | |
_Switch( | |
["-gamma", "gamma"], | |
"""Report the likelihood under the discrete gamma model. | |
Maximum likelihood model options: | |
-gamma -- after the final round of optimizing branch lengths with the CAT model, | |
report the likelihood under the discrete gamma model with the same | |
number of categories. FastTree uses the same branch lengths but | |
optimizes the gamma shape parameter and the scale of the lengths. | |
The final tree will have rescaled lengths. Used with -log, this | |
also generates per-site likelihoods for use with CONSEL, see | |
GammaLogToPaup.pl and documentation on the FastTree web site. | |
""", | |
), | |
_Switch( | |
["-slow", "slow"], | |
"""Use an exhaustive search. | |
Searching for the best join: | |
By default, FastTree combines the 'visible set' of fast neighbor-joining with | |
local hill-climbing as in relaxed neighbor-joining | |
-slow -- exhaustive search (like NJ or BIONJ, but different gap handling) | |
-slow takes half an hour instead of 8 seconds for 1,250 proteins | |
""", | |
), | |
_Switch( | |
["-fastest", "fastest"], | |
"""Search the visible set (the top hit for each node) only. | |
Searching for the best join: | |
By default, FastTree combines the 'visible set' of fast neighbor-joining with | |
local hill-climbing as in relaxed neighbor-joining | |
-fastest -- search the visible set (the top hit for each node) only | |
Unlike the original fast neighbor-joining, -fastest updates visible(C) | |
after joining A and B if join(AB,C) is better than join(C,visible(C)) | |
-fastest also updates out-distances in a very lazy way, | |
-fastest sets -2nd on as well, use -fastest -no2nd to avoid this | |
""", | |
), | |
_Switch( | |
["-2nd", "second"], | |
"""Turn 2nd-level top hits heuristic on. | |
Top-hit heuristics: | |
By default, FastTree uses a top-hit list to speed up search | |
Use -notop (or -slow) to turn this feature off | |
and compare all leaves to each other, | |
and all new joined nodes to each other | |
-2nd or -no2nd to turn 2nd-level top hits heuristic on or off | |
This reduces memory usage and running time but may lead to | |
marginal reductions in tree quality. | |
(By default, -fastest turns on -2nd.) | |
""", | |
), | |
_Switch( | |
["-no2nd", "no2nd"], | |
"""Turn 2nd-level top hits heuristic off. | |
Top-hit heuristics: | |
By default, FastTree uses a top-hit list to speed up search | |
Use -notop (or -slow) to turn this feature off | |
and compare all leaves to each other, | |
and all new joined nodes to each other | |
-2nd or -no2nd to turn 2nd-level top hits heuristic on or off | |
This reduces memory usage and running time but may lead to | |
marginal reductions in tree quality. | |
(By default, -fastest turns on -2nd.) | |
""", | |
), | |
_Option( | |
["-seed", "seed"], | |
"""Use -seed to initialize the random number generator. | |
Support value options: | |
By default, FastTree computes local support values by resampling the site | |
likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome, | |
it will compute minimum-evolution bootstrap supports instead | |
In either case, the support values are proportions ranging from 0 to 1. | |
""", | |
checker_function=_is_int, | |
equate=False, | |
), | |
_Switch( | |
["-top", "top"], | |
"""Top-hit list to speed up search | |
Top-hit heuristics: | |
By default, FastTree uses a top-hit list to speed up search | |
Use -notop (or -slow) to turn this feature off | |
and compare all leaves to each other, | |
and all new joined nodes to each other. | |
""", | |
), | |
_Switch( | |
["-notop", "notop"], | |
"""Turn off top-hit list to speed up search | |
Top-hit heuristics: | |
By default, FastTree uses a top-hit list to speed up search | |
Use -notop (or -slow) to turn this feature off | |
and compare all leaves to each other, | |
and all new joined nodes to each other. | |
""", | |
), | |
_Option( | |
["-topm", "topm"], | |
"""Change the top hits calculation method | |
Top-hit heuristics: | |
By default, FastTree uses a top-hit list to speed up search | |
-topm 1.0 -- set the top-hit list size to parameter*sqrt(N) | |
FastTree estimates the top m hits of a leaf from the | |
top 2*m hits of a 'close' neighbor, where close is | |
defined as d(seed,close) < 0.75 * d(seed, hit of rank 2*m), | |
and updates the top-hits as joins proceed. | |
""", | |
checker_function=_is_numeric, | |
equate=False, | |
), | |
_Option( | |
["-close", "close"], | |
"""Modify the close heuristic for the top-hit list | |
Top-hit heuristics: | |
By default, FastTree uses a top-hit list to speed up search | |
-close 0.75 -- modify the close heuristic, lower is more conservative. | |
""", | |
checker_function=_is_numeric, | |
equate=False, | |
), | |
_Option( | |
["-refresh", "refresh"], | |
"""Parameter for conditions that joined nodes are compared to other nodes | |
Top-hit heuristics: | |
By default, FastTree uses a top-hit list to speed up search | |
-refresh 0.8 -- compare a joined node to all other nodes if its | |
top-hit list is less than 80% of the desired length, | |
or if the age of the top-hit list is log2(m) or greater. | |
""", | |
checker_function=_is_numeric, | |
equate=False, | |
), | |
_Option( | |
["-matrix", "matrix"], | |
"""Specify a matrix for nucleotide or amino acid distances | |
Distances: | |
Default: For protein sequences, log-corrected distances and an | |
amino acid dissimilarity matrix derived from BLOSUM45 | |
or for nucleotide sequences, Jukes-Cantor distances | |
To specify a different matrix, use -matrix FilePrefix or -nomatrix | |
""", | |
filename=True, | |
equate=False, | |
), | |
_Switch( | |
["-nomatrix", "nomatrix"], | |
"""Specify that no matrix should be used for nucleotide or amino acid distances | |
Distances: | |
Default: For protein sequences, log-corrected distances and an | |
amino acid dissimilarity matrix derived from BLOSUM45 | |
or for nucleotide sequences, Jukes-Cantor distances | |
To specify a different matrix, use -matrix FilePrefix or -nomatrix | |
""", | |
), | |
_Switch( | |
["-nj", "nj"], | |
"Join options: regular (unweighted) neighbor-joining (default)", | |
), | |
_Switch( | |
["-bionj", "bionj"], | |
"""Join options: weighted joins as in BIONJ. | |
FastTree will also weight joins during NNIs. | |
""", | |
), | |
_Option( | |
["-gtrrates", "gtrrates"], "-gtrrates ac ag at cg ct gt", equate=False | |
), | |
_Option(["-gtrfreq", "gtrfreq"], "-gtrfreq A C G T", equate=False), | |
_Option( | |
["-constraints", "constraints"], | |
"""Specifies an alignment file for use with constrained topology searching | |
Constrained topology search options: | |
-constraints alignmentfile -- an alignment with values of 0, 1, and - | |
Not all sequences need be present. A column of 0s and 1s defines a | |
constrained split. Some constraints may be violated | |
(see 'violating constraints:' in standard error). | |
""", | |
filename=True, | |
equate=False, | |
), | |
_Option( | |
["-constraintWeight", "constraintWeight"], | |
"""Weight strength of constraints in topology searching. | |
Constrained topology search options: | |
-constraintWeight -- how strongly to weight the constraints. A value of 1 | |
means a penalty of 1 in tree length for violating a constraint | |
Default: 100.0 | |
""", | |
checker_function=_is_numeric, | |
equate=False, | |
), | |
_Option( | |
["-log", "log"], | |
"""Create log files of data such as intermediate trees and per-site rates | |
-log logfile -- save intermediate trees so you can extract | |
the trees and restart long-running jobs if they crash | |
-log also reports the per-site rates (1 means slowest category). | |
""", | |
filename=True, | |
equate=False, | |
), | |
_Option( | |
["-makematrix", "makematrix"], | |
"-makematrix [alignment]", | |
filename=True, | |
equate=False, | |
), | |
_Switch( | |
["-rawdist", "rawdist"], | |
"""Turn off or adjust log-correction in AA or NT distances. | |
Use -rawdist to turn the log-correction off or to use | |
%different instead of Jukes-Cantor in AA or NT distances | |
Distances: | |
Default: For protein sequences, log-corrected distances and an | |
amino acid dissimilarity matrix derived from BLOSUM45 | |
or for nucleotide sequences, Jukes-Cantor distances | |
To specify a different matrix, use -matrix FilePrefix or -nomatrix | |
""", | |
), | |
_Option( | |
["-sprlength", "sprlength"], | |
"""Set maximum SPR move length in topology refinement (default 10). | |
Topology refinement: | |
By default, FastTree tries to improve the tree with up to 4*log2(N) | |
rounds of minimum-evolution nearest-neighbor interchanges (NNI), | |
where N is the number of unique sequences, 2 rounds of | |
subtree-prune-regraft (SPR) moves (also min. evo.), and | |
up to 2*log(N) rounds of maximum-likelihood NNIs. | |
Use -nni to set the number of rounds of min. evo. NNIs, | |
and -spr to set the rounds of SPRs. | |
""", | |
checker_function=_is_int, | |
equate=False, | |
), | |
_Switch(["-help", "help"], "Show the help."), | |
_Switch(["-expert", "expert"], "Show the expert level help."), | |
_Option( | |
["-out", "out"], | |
"""Enter <output file> | |
The path to a Newick Tree output file needs to be specified. | |
""", | |
filename=True, | |
equate=False, | |
), | |
_Argument( | |
["input"], | |
"""Enter <input file> | |
An input file of sequence alignments in fasta or phylip format | |
is needed. By default FastTree expects protein | |
alignments, use -nt for nucleotides. | |
""", | |
filename=True, | |
is_required=True, | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |