Spaces:
No application file
No application file
File size: 10,846 Bytes
b7731cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 |
# Copyright 2011 by Eric Talevich. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command-line wrapper for the tree inference program PhyML."""
from Bio.Application import _Option, _Switch, AbstractCommandline
class PhymlCommandline(AbstractCommandline):
"""Command-line wrapper for the tree inference program PhyML.
Homepage: http://www.atgc-montpellier.fr/phyml
References
----------
Guindon S, Gascuel O.
A simple, fast, and accurate algorithm to estimate large phylogenies by maximum
likelihood.
Systematic Biology, 2003 Oct;52(5):696-704.
PubMed PMID: 14530136.
Guindon S, Dufayard JF, Lefort V, Anisimova M, Hordijk W, Gascuel O.
New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing
the Performance of PhyML 3.0.
Systematic Biology, 2010 59(3):307-21.
"""
def __init__(self, cmd="phyml", **kwargs):
"""Initialize the class."""
self.parameters = [
_Option(
["-i", "--input", "input"],
"PHYLIP format input nucleotide or amino-acid sequence filenam.",
filename=True,
is_required=True,
equate=False,
),
_Option(
["-d", "--datatype", "datatype"],
"Datatype 'nt' for nucleotide (default) or 'aa' for amino-acids.",
checker_function=lambda x: x in ("nt", "aa"),
equate=False,
),
_Switch(
["-q", "--sequential", "sequential"],
"Changes interleaved format (default) to sequential format.",
),
_Option(
["-n", "--multiple", "multiple"],
"Number of data sets to analyse (integer).",
checker_function=(lambda x: isinstance(x, int) or x.isdigit()),
equate=False,
),
_Switch(
["-p", "--pars", "pars"],
"""Use a minimum parsimony starting tree.
This option is taken into account when the '-u' option is absent
and when tree topology modifications are to be done.
""",
),
_Option(
["-b", "--bootstrap", "bootstrap"],
r"""Number of bootstrap replicates, if value is > 0.
Otherwise:
0: neither approximate likelihood ratio test nor bootstrap
values are computed.
-1: approximate likelihood ratio test returning aLRT statistics.
-2: approximate likelihood ratio test returning Chi2-based
parametric branch supports.
-4: SH-like branch supports alone.
""",
equate=False,
),
_Option(
["-m", "--model", "model"],
"""Substitution model name.
Nucleotide-based models:
HKY85 (default) | JC69 | K80 | F81 | F84 | TN93 | GTR | custom
For the custom option, a string of six digits identifies the
model. For instance, 000000 corresponds to F81 (or JC69,
provided the distribution of nucleotide frequencies is uniform).
012345 corresponds to GTR. This option can be used for encoding
any model that is a nested within GTR.
Amino-acid based models:
LG (default) | WAG | JTT | MtREV | Dayhoff | DCMut | RtREV |
CpREV | VT | Blosum62 | MtMam | MtArt | HIVw | HIVb | custom
""",
checker_function=(
lambda x: x
in (
# Nucleotide models:
"HKY85",
"JC69",
"K80",
"F81",
"F84",
"TN93",
"GTR",
# Amino acid models:
"LG",
"WAG",
"JTT",
"MtREV",
"Dayhoff",
"DCMut",
"RtREV",
"CpREV",
"VT",
"Blosum62",
"MtMam",
"MtArt",
"HIVw",
"HIVb",
)
or isinstance(x, int)
),
equate=False,
),
_Option(
["-f", "frequencies"],
"""Character frequencies.
-f e, m, or "fA fC fG fT"
e : Empirical frequencies, determined as follows :
- Nucleotide sequences: (Empirical) the equilibrium base
frequencies are estimated by counting the occurrence
of the different bases in the alignment.
- Amino-acid sequences: (Empirical) the equilibrium
amino-acid frequencies are estimated by counting the
occurrence of the different amino-acids in the alignment.
m : ML/model-based frequencies, determined as follows :
- Nucleotide sequences: (ML) the equilibrium base
frequencies are estimated using maximum likelihood
- Amino-acid sequences: (Model) the equilibrium amino-acid
frequencies are estimated using the frequencies defined by
the substitution model.
"fA fC fG fT" : only valid for nucleotide-based models.
fA, fC, fG and fT are floating-point numbers that correspond
to the frequencies of A, C, G and T, respectively.
""",
filename=True, # ensure ".25 .25 .25 .25" stays quoted
equate=False,
),
_Option(
["-t", "--ts/tv", "ts_tv_ratio"],
"""Transition/transversion ratio. (DNA sequences only.)
Can be a fixed positive value (ex:4.0) or e to get the
maximum-likelihood estimate.
""",
equate=False,
),
_Option(
["-v", "--pinv", "prop_invar"],
"""Proportion of invariable sites.
Can be a fixed value in the range [0,1], or 'e' to get the
maximum-likelihood estimate.
""",
equate=False,
),
_Option(
["-c", "--nclasses", "nclasses"],
"""Number of relative substitution rate categories.
Default 1. Must be a positive integer.
""",
equate=False,
),
_Option(
["-a", "--alpha", "alpha"],
"""Distribution of the gamma distribution shape parameter.
Can be a fixed positive value, or 'e' to get the
maximum-likelihood estimate.
""",
equate=False,
),
_Option(
["-s", "--search", "search"],
"""Tree topology search operation option.
Can be one of:
NNI : default, fast
SPR : a bit slower than NNI
BEST : best of NNI and SPR search
""",
checker_function=lambda x: x in ("NNI", "SPR", "BEST"),
equate=False,
),
# alt name: user_tree_file
_Option(
["-u", "--inputtree", "input_tree"],
"Starting tree filename. The tree must be in Newick format.",
filename=True,
equate=False,
),
_Option(
["-o", "optimize"],
r"""Specific parameter optimisation.
tlr : tree topology (t), branch length (l) and
rate parameters (r) are optimised.
tl : tree topology and branch length are optimised.
lr : branch length and rate parameters are optimised.
l : branch length are optimised.
r : rate parameters are optimised.
n : no parameter is optimised.
""",
equate=False,
),
_Switch(
["--rand_start", "rand_start"],
"""Sets the initial tree to random.
Only valid if SPR searches are to be performed.
""",
),
_Option(
["--n_rand_starts", "n_rand_starts"],
"""Number of initial random trees to be used.
Only valid if SPR searches are to be performed.
""",
equate=False,
),
_Option(
["--r_seed", "r_seed"],
"""Seed used to initiate the random number generator.
Must be an integer.
""",
equate=False,
),
_Switch(
["--print_site_lnl", "print_site_lnl"],
r"Print the likelihood for each site in file \*_phyml_lk.txt.",
),
_Switch(
["--print_trace", "print_trace"],
r"""
Print each phylogeny explored during the tree search process
in file \*_phyml_trace.txt.""",
),
_Option(
["--run_id", "run_id"],
"""Append the given string at the end of each PhyML output file.
This option may be useful when running simulations involving
PhyML.
""",
checker_function=lambda x: isinstance(x, str),
equate=False,
),
# XXX should this always be set to True?
_Switch(
["--quiet", "quiet"],
"No interactive questions (for running in batch mode).",
),
]
AbstractCommandline.__init__(self, cmd, **kwargs)
|