Spaces:
No application file
No application file
# Copyright 2009 by Cymon J. Cox. All rights reserved. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Command line wrapper for the multiple alignment program PRANK.""" | |
from Bio.Application import _Option, _Switch, AbstractCommandline | |
class PrankCommandline(AbstractCommandline): | |
"""Command line wrapper for the multiple alignment program PRANK. | |
http://www.ebi.ac.uk/goldman-srv/prank/prank/ | |
Notes | |
----- | |
Last checked against version: 081202 | |
References | |
---------- | |
Loytynoja, A. and Goldman, N. 2005. An algorithm for progressive | |
multiple alignment of sequences with insertions. Proceedings of | |
the National Academy of Sciences, 102: 10557--10562. | |
Loytynoja, A. and Goldman, N. 2008. Phylogeny-aware gap placement | |
prevents errors in sequence alignment and evolutionary analysis. | |
Science, 320: 1632. | |
Examples | |
-------- | |
To align a FASTA file (unaligned.fasta) with the output in aligned | |
FASTA format with the output filename starting with "aligned" (you | |
can't pick the filename explicitly), no tree output and no XML output, | |
use: | |
>>> from Bio.Align.Applications import PrankCommandline | |
>>> prank_cline = PrankCommandline(d="unaligned.fasta", | |
... o="aligned", # prefix only! | |
... f=8, # FASTA output | |
... notree=True, noxml=True) | |
>>> print(prank_cline) | |
prank -d=unaligned.fasta -o=aligned -f=8 -noxml -notree | |
You would typically run the command line with prank_cline() or via | |
the Python subprocess module, as described in the Biopython tutorial. | |
""" | |
def __init__(self, cmd="prank", **kwargs): | |
"""Initialize the class.""" | |
OUTPUT_FORMAT_VALUES = list(range(1, 18)) | |
self.parameters = [ | |
# ################# input/output parameters: ################## | |
# -d=sequence_file | |
_Option(["-d", "d"], "Input filename", filename=True, is_required=True), | |
# -t=tree_file [default: no tree, generate approximate NJ tree] | |
_Option(["-t", "t"], "Input guide tree filename", filename=True), | |
# -tree="tree_string" [tree in newick format; in double quotes] | |
_Option(["-tree", "tree"], "Input guide tree as Newick string"), | |
# -m=model_file [default: HKY2/WAG] | |
_Option( | |
["-m", "m"], "User-defined alignment model filename. Default: HKY2/WAG" | |
), | |
# -o=output_file [default: 'output'] | |
_Option( | |
["-o", "o"], | |
"Output filenames prefix. Default: 'output'\n " | |
"Will write: output.?.fas (depending on requested " | |
"format), output.?.xml and output.?.dnd", | |
filename=True, | |
), | |
# -f=output_format [default: 8] | |
_Option( | |
["-f", "f"], | |
"Output alignment format. Default: 8 FASTA\n" | |
"Option are:\n" | |
"1. IG/Stanford 8. Pearson/Fasta\n" | |
"2. GenBank/GB 11. Phylip3.2\n" | |
"3. NBRF 12. Phylip\n" | |
"4. EMBL 14. PIR/CODATA\n" | |
"6. DNAStrider 15. MSF\n" | |
"7. Fitch 17. PAUP/NEXUS", | |
checker_function=lambda x: x in OUTPUT_FORMAT_VALUES, | |
), | |
_Switch( | |
["-noxml", "noxml"], | |
"Do not output XML files (PRANK versions earlier than v.120626)", | |
), | |
_Switch( | |
["-notree", "notree"], | |
"Do not output dnd tree files (PRANK versions earlier than v.120626)", | |
), | |
_Switch( | |
["-showxml", "showxml"], "Output XML files (PRANK v.120626 and later)" | |
), | |
_Switch( | |
["-showtree", "showtree"], | |
"Output dnd tree files (PRANK v.120626 and later)", | |
), | |
_Switch(["-shortnames", "shortnames"], "Truncate names at first space"), | |
_Switch(["-quiet", "quiet"], "Reduce verbosity"), | |
# ###################### model parameters: ###################### | |
# +F [force insertions to be always skipped] | |
# -F [equivalent] | |
_Switch( | |
["-F", "+F", "F"], "Force insertions to be always skipped: same as +F" | |
), | |
# -dots [show insertion gaps as dots] | |
_Switch(["-dots", "dots"], "Show insertion gaps as dots"), | |
# -gaprate=# [gap opening rate; default: dna 0.025 / prot 0.0025] | |
_Option( | |
["-gaprate", "gaprate"], | |
"Gap opening rate. Default: dna 0.025 prot 0.0025", | |
checker_function=lambda x: isinstance(x, float), | |
), | |
# -gapext=# [gap extension probability; default: dna 0.5 / prot 0.5] | |
_Option( | |
["-gapext", "gapext"], | |
"Gap extension probability. Default: dna 0.5 / prot 0.5", | |
checker_function=lambda x: isinstance(x, float), | |
), | |
# -dnafreqs=#,#,#,# [ACGT; default: empirical] | |
_Option( | |
["-dnafreqs", "dnafreqs"], | |
"DNA frequencies - 'A,C,G,T'. eg '25,25,25,25' as a quote " | |
"surrounded string value. Default: empirical", | |
checker_function=lambda x: isinstance(x, bytes), | |
), | |
# -kappa=# [ts/tv rate ratio; default:2] | |
_Option( | |
["-kappa", "kappa"], | |
"Transition/transversion ratio. Default: 2", | |
checker_function=lambda x: isinstance(x, int), | |
), | |
# -rho=# [pur/pyr rate ratio; default:1] | |
_Option( | |
["-rho", "rho"], | |
"Purine/pyrimidine ratio. Default: 1", | |
checker_function=lambda x: isinstance(x, int), | |
), | |
# -codon [for DNA: use empirical codon model] | |
_Switch(["-codon", "codon"], "Codon aware alignment or not"), | |
# -termgap [penalise terminal gaps normally] | |
_Switch(["-termgap", "termgap"], "Penalise terminal gaps normally"), | |
# ############### other parameters: ################################ | |
# -nopost [do not compute posterior support; default: compute] | |
_Switch( | |
["-nopost", "nopost"], | |
"Do not compute posterior support. Default: compute", | |
), | |
# -pwdist=# [expected pairwise distance for computing guidetree; | |
# default: dna 0.25 / prot 0.5] | |
_Option( | |
["-pwdist", "pwdist"], | |
"Expected pairwise distance for computing guidetree. " | |
"Default: dna 0.25 / prot 0.5", | |
checker_function=lambda x: isinstance(x, float), | |
), | |
_Switch( | |
["-once", "once"], "Run only once. Default: twice if no guidetree given" | |
), | |
_Switch(["-twice", "twice"], "Always run twice"), | |
_Switch(["-skipins", "skipins"], "Skip insertions in posterior support"), | |
_Switch( | |
["-uselogs", "uselogs"], | |
"Slower but should work for a greater number of sequences", | |
), | |
_Switch(["-writeanc", "writeanc"], "Output ancestral sequences"), | |
_Switch( | |
["-printnodes", "printnodes"], "Output each node; mostly for debugging" | |
), | |
# -matresize=# [matrix resizing multiplier] | |
# Doesn't specify type but Float and Int work | |
_Option( | |
["-matresize", "matresize"], | |
"Matrix resizing multiplier", | |
checker_function=lambda x: (isinstance(x, float) or isinstance(x, int)), | |
), | |
# -matinitsize=# [matrix initial size multiplier] | |
# Doesn't specify type but Float and Int work | |
_Option( | |
["-matinitsize", "matinitsize"], | |
"Matrix initial size multiplier", | |
checker_function=lambda x: (isinstance(x, float) or isinstance(x, int)), | |
), | |
_Switch(["-longseq", "longseq"], "Save space in pairwise alignments"), | |
_Switch(["-pwgenomic", "pwgenomic"], "Do pairwise alignment, no guidetree"), | |
# -pwgenomicdist=# [distance for pairwise alignment; default: 0.3] | |
_Option( | |
["-pwgenomicdist", "pwgenomicdist"], | |
"Distance for pairwise alignment. Default: 0.3", | |
checker_function=lambda x: isinstance(x, float), | |
), | |
# -scalebranches=# [scale branch lengths; default: dna 1 / prot 2] | |
_Option( | |
["-scalebranches", "scalebranches"], | |
"Scale branch lengths. Default: dna 1 / prot 2", | |
checker_function=lambda x: isinstance(x, int), | |
), | |
# -fixedbranches=# [use fixed branch lengths] | |
# Assume looking for a float | |
_Option( | |
["-fixedbranches", "fixedbranches"], | |
"Use fixed branch lengths of input value", | |
checker_function=lambda x: isinstance(x, float), | |
), | |
# -maxbranches=# [set maximum branch length] | |
# Assume looking for a float | |
_Option( | |
["-maxbranches", "maxbranches"], | |
"Use maximum branch lengths of input value", | |
checker_function=lambda x: isinstance(x, float), | |
), | |
# -realbranches [disable branch length truncation] | |
_Switch( | |
["-realbranches", "realbranches"], "Disable branch length truncation" | |
), | |
_Switch(["-translate", "translate"], "Translate to protein"), | |
_Switch( | |
["-mttranslate", "mttranslate"], "Translate to protein using mt table" | |
), | |
# ##################### other: #################### | |
_Switch( | |
["-convert", "convert"], | |
"Convert input alignment to new format. Do not perform alignment", | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
if __name__ == "__main__": | |
from Bio._utils import run_doctest | |
run_doctest() | |