File size: 10,101 Bytes
b7731cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# Copyright 2009 by Cymon J. Cox.  All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command line wrapper for the multiple alignment program DIALIGN2-2."""

from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline


class DialignCommandline(AbstractCommandline):
    """Command line wrapper for the multiple alignment program DIALIGN2-2.

    http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html

    Notes
    -----
    Last checked against version: 2.2

    References
    ----------
    B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence
    Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36.

    Examples
    --------
    To align a FASTA file (unaligned.fasta) with the output files names
    aligned.* including a FASTA output file (aligned.fa), use:

    >>> from Bio.Align.Applications import DialignCommandline
    >>> dialign_cline = DialignCommandline(input="unaligned.fasta",
    ...                                    fn="aligned", fa=True)
    >>> print(dialign_cline)
    dialign2-2 -fa -fn aligned unaligned.fasta

    You would typically run the command line with dialign_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

    """

    def __init__(self, cmd="dialign2-2", **kwargs):
        """Initialize the class."""
        self.program_name = cmd
        self.parameters = [
            _Switch(
                ["-afc", "afc"],
                r"Creates additional output file '\*.afc' "
                "containing data of all fragments considered "
                "for alignment WARNING: this file can be HUGE !",
            ),
            _Switch(
                ["-afc_v", "afc_v"],
                "Like '-afc' but verbose: fragments are explicitly "
                "printed. WARNING: this file can be EVEN BIGGER !",
            ),
            _Switch(
                ["-anc", "anc"],
                "Anchored alignment. Requires a file <seq_file>.anc "
                "containing anchor points.",
            ),
            _Switch(
                ["-cs", "cs"],
                "If segments are translated, not only the 'Watson "
                "strand' but also the 'Crick strand' is looked at.",
            ),
            _Switch(["-cw", "cw"], "Additional output file in CLUSTAL W format."),
            _Switch(
                ["-ds", "ds"],
                "'dna alignment speed up' - non-translated nucleic acid "
                "fragments are taken into account only if they start "
                "with at least two matches. Speeds up DNA alignment at "
                "the expense of sensitivity.",
            ),
            _Switch(["-fa", "fa"], "Additional output file in FASTA format."),
            _Switch(
                ["-ff", "ff"],
                r"Creates file \*.frg containing information about all "
                "fragments that are part of the respective optimal "
                "pairwise alignmnets plus information about "
                "consistency in the multiple alignment",
            ),
            _Option(
                ["-fn", "fn"],
                "Output files are named <out_file>.<extension>.",
                equate=False,
            ),
            _Switch(
                ["-fop", "fop"],
                r"Creates file \*.fop containing coordinates of all "
                "fragments that are part of the respective pairwise alignments.",
            ),
            _Switch(
                ["-fsm", "fsm"],
                r"Creates file \*.fsm containing coordinates of all "
                "fragments that are part of the final alignment",
            ),
            _Switch(
                ["-iw", "iw"],
                "Overlap weights switched off (by default, overlap "
                "weights are used if up to 35 sequences are aligned). "
                "This option speeds up the alignment but may lead "
                "to reduced alignment quality.",
            ),
            _Switch(
                ["-lgs", "lgs"],
                "'long genomic sequences' - combines the following "
                "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, "
                "-fop, -ff, -cs, -ds, -pst ",
            ),
            _Switch(
                ["-lgs_t", "lgs_t"],
                "Like '-lgs' but with all segment pairs assessed "
                "at the peptide level (rather than 'mixed alignments' "
                "as with the '-lgs' option). Therefore faster than "
                "-lgs but not very sensitive for non-coding regions.",
            ),
            _Option(
                ["-lmax", "lmax"],
                "Maximum fragment length = x  (default: x = 40 or "
                "x = 120 for 'translated' fragments). Shorter x "
                "speeds up the program but may affect alignment quality.",
                checker_function=lambda x: isinstance(x, int),
                equate=False,
            ),
            _Switch(
                ["-lo", "lo"],
                r"(Long Output) Additional file \*.log with information "
                "about fragments selected for pairwise alignment and "
                "about consistency in multi-alignment procedure.",
            ),
            _Switch(
                ["-ma", "ma"],
                "'mixed alignments' consisting of P-fragments and "
                "N-fragments if nucleic acid sequences are aligned.",
            ),
            _Switch(
                ["-mask", "mask"],
                "Residues not belonging to selected fragments are "
                r"replaced by '\*' characters in output alignment "
                "(rather than being printed in lower-case characters)",
            ),
            _Switch(
                ["-mat", "mat"],
                r"Creates file \*mat with substitution counts derived "
                "from the fragments that have been selected for alignment.",
            ),
            _Switch(
                ["-mat_thr", "mat_thr"],
                "Like '-mat' but only fragments with weight score "
                "> t are considered",
            ),
            _Switch(
                ["-max_link", "max_link"],
                "'maximum linkage' clustering used to construct "
                "sequence tree (instead of UPGMA).",
            ),
            _Switch(["-min_link", "min_link"], "'minimum linkage' clustering used."),
            _Option(["-mot", "mot"], "'motif' option.", equate=False),
            _Switch(["-msf", "msf"], "Separate output file in MSF format."),
            _Switch(
                ["-n", "n"],
                "Input sequences are nucleic acid sequences. "
                "No translation of fragments.",
            ),
            _Switch(
                ["-nt", "nt"],
                "Input sequences are nucleic acid sequences and "
                "'nucleic acid segments' are translated to 'peptide "
                "segments'.",
            ),
            _Switch(
                ["-nta", "nta"],
                "'no textual alignment' - textual alignment suppressed. "
                "This option makes sense if other output files are of "
                "interest -- e.g. the fragment files created with -ff, "
                "-fop, -fsm or -lo.",
            ),
            _Switch(
                ["-o", "o"],
                "Fast version, resulting alignments may be slightly different.",
            ),
            _Switch(
                ["-ow", "ow"],
                "Overlap weights enforced (By default, overlap weights "
                "are used only if up to 35 sequences are aligned since "
                "calculating overlap weights is time consuming).",
            ),
            _Switch(
                ["-pst", "pst"],
                r"'print status'. Creates and updates a file \*.sta with "
                "information about the current status of the program "
                "run.  This option is recommended if large data sets "
                "are aligned since it allows the user to estimate the "
                "remaining running time.",
            ),
            _Switch(
                ["-smin", "smin"],
                "Minimum similarity value for first residue pair "
                "(or codon pair) in fragments. Speeds up protein "
                "alignment or alignment of translated DNA fragments "
                "at the expense of sensitivity.",
            ),
            _Option(
                ["-stars", "stars"],
                r"Maximum number of '\*' characters indicating degree "
                "of local similarity among sequences. By default, no "
                "stars are used but numbers between 0 and 9, instead.",
                checker_function=lambda x: x in range(0, 10),
                equate=False,
            ),
            _Switch(["-stdo", "stdo"], "Results written to standard output."),
            _Switch(
                ["-ta", "ta"],
                "Standard textual alignment printed (overrides "
                "suppression of textual alignments in special "
                "options, e.g. -lgs)",
            ),
            _Option(
                ["-thr", "thr"],
                "Threshold T = x.",
                checker_function=lambda x: isinstance(x, int),
                equate=False,
            ),
            _Switch(
                ["-xfr", "xfr"],
                "'exclude fragments' - list of fragments can be "
                "specified that are NOT considered for pairwise alignment",
            ),
            _Argument(
                ["input"],
                "Input file name. Must be FASTA format",
                filename=True,
                is_required=True,
            ),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)


if __name__ == "__main__":
    from Bio._utils import run_doctest

    run_doctest()