Spaces:
No application file
No application file
# Copyright 2009 by Cymon J. Cox. All rights reserved. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Command line wrapper for the multiple alignment program DIALIGN2-2.""" | |
from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline | |
class DialignCommandline(AbstractCommandline): | |
"""Command line wrapper for the multiple alignment program DIALIGN2-2. | |
http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html | |
Notes | |
----- | |
Last checked against version: 2.2 | |
References | |
---------- | |
B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence | |
Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36. | |
Examples | |
-------- | |
To align a FASTA file (unaligned.fasta) with the output files names | |
aligned.* including a FASTA output file (aligned.fa), use: | |
>>> from Bio.Align.Applications import DialignCommandline | |
>>> dialign_cline = DialignCommandline(input="unaligned.fasta", | |
... fn="aligned", fa=True) | |
>>> print(dialign_cline) | |
dialign2-2 -fa -fn aligned unaligned.fasta | |
You would typically run the command line with dialign_cline() or via | |
the Python subprocess module, as described in the Biopython tutorial. | |
""" | |
def __init__(self, cmd="dialign2-2", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_Switch( | |
["-afc", "afc"], | |
r"Creates additional output file '\*.afc' " | |
"containing data of all fragments considered " | |
"for alignment WARNING: this file can be HUGE !", | |
), | |
_Switch( | |
["-afc_v", "afc_v"], | |
"Like '-afc' but verbose: fragments are explicitly " | |
"printed. WARNING: this file can be EVEN BIGGER !", | |
), | |
_Switch( | |
["-anc", "anc"], | |
"Anchored alignment. Requires a file <seq_file>.anc " | |
"containing anchor points.", | |
), | |
_Switch( | |
["-cs", "cs"], | |
"If segments are translated, not only the 'Watson " | |
"strand' but also the 'Crick strand' is looked at.", | |
), | |
_Switch(["-cw", "cw"], "Additional output file in CLUSTAL W format."), | |
_Switch( | |
["-ds", "ds"], | |
"'dna alignment speed up' - non-translated nucleic acid " | |
"fragments are taken into account only if they start " | |
"with at least two matches. Speeds up DNA alignment at " | |
"the expense of sensitivity.", | |
), | |
_Switch(["-fa", "fa"], "Additional output file in FASTA format."), | |
_Switch( | |
["-ff", "ff"], | |
r"Creates file \*.frg containing information about all " | |
"fragments that are part of the respective optimal " | |
"pairwise alignmnets plus information about " | |
"consistency in the multiple alignment", | |
), | |
_Option( | |
["-fn", "fn"], | |
"Output files are named <out_file>.<extension>.", | |
equate=False, | |
), | |
_Switch( | |
["-fop", "fop"], | |
r"Creates file \*.fop containing coordinates of all " | |
"fragments that are part of the respective pairwise alignments.", | |
), | |
_Switch( | |
["-fsm", "fsm"], | |
r"Creates file \*.fsm containing coordinates of all " | |
"fragments that are part of the final alignment", | |
), | |
_Switch( | |
["-iw", "iw"], | |
"Overlap weights switched off (by default, overlap " | |
"weights are used if up to 35 sequences are aligned). " | |
"This option speeds up the alignment but may lead " | |
"to reduced alignment quality.", | |
), | |
_Switch( | |
["-lgs", "lgs"], | |
"'long genomic sequences' - combines the following " | |
"options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, " | |
"-fop, -ff, -cs, -ds, -pst ", | |
), | |
_Switch( | |
["-lgs_t", "lgs_t"], | |
"Like '-lgs' but with all segment pairs assessed " | |
"at the peptide level (rather than 'mixed alignments' " | |
"as with the '-lgs' option). Therefore faster than " | |
"-lgs but not very sensitive for non-coding regions.", | |
), | |
_Option( | |
["-lmax", "lmax"], | |
"Maximum fragment length = x (default: x = 40 or " | |
"x = 120 for 'translated' fragments). Shorter x " | |
"speeds up the program but may affect alignment quality.", | |
checker_function=lambda x: isinstance(x, int), | |
equate=False, | |
), | |
_Switch( | |
["-lo", "lo"], | |
r"(Long Output) Additional file \*.log with information " | |
"about fragments selected for pairwise alignment and " | |
"about consistency in multi-alignment procedure.", | |
), | |
_Switch( | |
["-ma", "ma"], | |
"'mixed alignments' consisting of P-fragments and " | |
"N-fragments if nucleic acid sequences are aligned.", | |
), | |
_Switch( | |
["-mask", "mask"], | |
"Residues not belonging to selected fragments are " | |
r"replaced by '\*' characters in output alignment " | |
"(rather than being printed in lower-case characters)", | |
), | |
_Switch( | |
["-mat", "mat"], | |
r"Creates file \*mat with substitution counts derived " | |
"from the fragments that have been selected for alignment.", | |
), | |
_Switch( | |
["-mat_thr", "mat_thr"], | |
"Like '-mat' but only fragments with weight score " | |
"> t are considered", | |
), | |
_Switch( | |
["-max_link", "max_link"], | |
"'maximum linkage' clustering used to construct " | |
"sequence tree (instead of UPGMA).", | |
), | |
_Switch(["-min_link", "min_link"], "'minimum linkage' clustering used."), | |
_Option(["-mot", "mot"], "'motif' option.", equate=False), | |
_Switch(["-msf", "msf"], "Separate output file in MSF format."), | |
_Switch( | |
["-n", "n"], | |
"Input sequences are nucleic acid sequences. " | |
"No translation of fragments.", | |
), | |
_Switch( | |
["-nt", "nt"], | |
"Input sequences are nucleic acid sequences and " | |
"'nucleic acid segments' are translated to 'peptide " | |
"segments'.", | |
), | |
_Switch( | |
["-nta", "nta"], | |
"'no textual alignment' - textual alignment suppressed. " | |
"This option makes sense if other output files are of " | |
"interest -- e.g. the fragment files created with -ff, " | |
"-fop, -fsm or -lo.", | |
), | |
_Switch( | |
["-o", "o"], | |
"Fast version, resulting alignments may be slightly different.", | |
), | |
_Switch( | |
["-ow", "ow"], | |
"Overlap weights enforced (By default, overlap weights " | |
"are used only if up to 35 sequences are aligned since " | |
"calculating overlap weights is time consuming).", | |
), | |
_Switch( | |
["-pst", "pst"], | |
r"'print status'. Creates and updates a file \*.sta with " | |
"information about the current status of the program " | |
"run. This option is recommended if large data sets " | |
"are aligned since it allows the user to estimate the " | |
"remaining running time.", | |
), | |
_Switch( | |
["-smin", "smin"], | |
"Minimum similarity value for first residue pair " | |
"(or codon pair) in fragments. Speeds up protein " | |
"alignment or alignment of translated DNA fragments " | |
"at the expense of sensitivity.", | |
), | |
_Option( | |
["-stars", "stars"], | |
r"Maximum number of '\*' characters indicating degree " | |
"of local similarity among sequences. By default, no " | |
"stars are used but numbers between 0 and 9, instead.", | |
checker_function=lambda x: x in range(0, 10), | |
equate=False, | |
), | |
_Switch(["-stdo", "stdo"], "Results written to standard output."), | |
_Switch( | |
["-ta", "ta"], | |
"Standard textual alignment printed (overrides " | |
"suppression of textual alignments in special " | |
"options, e.g. -lgs)", | |
), | |
_Option( | |
["-thr", "thr"], | |
"Threshold T = x.", | |
checker_function=lambda x: isinstance(x, int), | |
equate=False, | |
), | |
_Switch( | |
["-xfr", "xfr"], | |
"'exclude fragments' - list of fragments can be " | |
"specified that are NOT considered for pairwise alignment", | |
), | |
_Argument( | |
["input"], | |
"Input file name. Must be FASTA format", | |
filename=True, | |
is_required=True, | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
if __name__ == "__main__": | |
from Bio._utils import run_doctest | |
run_doctest() | |