Spaces:
No application file
No application file
File size: 10,101 Bytes
b7731cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
# Copyright 2009 by Cymon J. Cox. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command line wrapper for the multiple alignment program DIALIGN2-2."""
from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline
class DialignCommandline(AbstractCommandline):
"""Command line wrapper for the multiple alignment program DIALIGN2-2.
http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html
Notes
-----
Last checked against version: 2.2
References
----------
B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence
Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36.
Examples
--------
To align a FASTA file (unaligned.fasta) with the output files names
aligned.* including a FASTA output file (aligned.fa), use:
>>> from Bio.Align.Applications import DialignCommandline
>>> dialign_cline = DialignCommandline(input="unaligned.fasta",
... fn="aligned", fa=True)
>>> print(dialign_cline)
dialign2-2 -fa -fn aligned unaligned.fasta
You would typically run the command line with dialign_cline() or via
the Python subprocess module, as described in the Biopython tutorial.
"""
def __init__(self, cmd="dialign2-2", **kwargs):
"""Initialize the class."""
self.program_name = cmd
self.parameters = [
_Switch(
["-afc", "afc"],
r"Creates additional output file '\*.afc' "
"containing data of all fragments considered "
"for alignment WARNING: this file can be HUGE !",
),
_Switch(
["-afc_v", "afc_v"],
"Like '-afc' but verbose: fragments are explicitly "
"printed. WARNING: this file can be EVEN BIGGER !",
),
_Switch(
["-anc", "anc"],
"Anchored alignment. Requires a file <seq_file>.anc "
"containing anchor points.",
),
_Switch(
["-cs", "cs"],
"If segments are translated, not only the 'Watson "
"strand' but also the 'Crick strand' is looked at.",
),
_Switch(["-cw", "cw"], "Additional output file in CLUSTAL W format."),
_Switch(
["-ds", "ds"],
"'dna alignment speed up' - non-translated nucleic acid "
"fragments are taken into account only if they start "
"with at least two matches. Speeds up DNA alignment at "
"the expense of sensitivity.",
),
_Switch(["-fa", "fa"], "Additional output file in FASTA format."),
_Switch(
["-ff", "ff"],
r"Creates file \*.frg containing information about all "
"fragments that are part of the respective optimal "
"pairwise alignmnets plus information about "
"consistency in the multiple alignment",
),
_Option(
["-fn", "fn"],
"Output files are named <out_file>.<extension>.",
equate=False,
),
_Switch(
["-fop", "fop"],
r"Creates file \*.fop containing coordinates of all "
"fragments that are part of the respective pairwise alignments.",
),
_Switch(
["-fsm", "fsm"],
r"Creates file \*.fsm containing coordinates of all "
"fragments that are part of the final alignment",
),
_Switch(
["-iw", "iw"],
"Overlap weights switched off (by default, overlap "
"weights are used if up to 35 sequences are aligned). "
"This option speeds up the alignment but may lead "
"to reduced alignment quality.",
),
_Switch(
["-lgs", "lgs"],
"'long genomic sequences' - combines the following "
"options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, "
"-fop, -ff, -cs, -ds, -pst ",
),
_Switch(
["-lgs_t", "lgs_t"],
"Like '-lgs' but with all segment pairs assessed "
"at the peptide level (rather than 'mixed alignments' "
"as with the '-lgs' option). Therefore faster than "
"-lgs but not very sensitive for non-coding regions.",
),
_Option(
["-lmax", "lmax"],
"Maximum fragment length = x (default: x = 40 or "
"x = 120 for 'translated' fragments). Shorter x "
"speeds up the program but may affect alignment quality.",
checker_function=lambda x: isinstance(x, int),
equate=False,
),
_Switch(
["-lo", "lo"],
r"(Long Output) Additional file \*.log with information "
"about fragments selected for pairwise alignment and "
"about consistency in multi-alignment procedure.",
),
_Switch(
["-ma", "ma"],
"'mixed alignments' consisting of P-fragments and "
"N-fragments if nucleic acid sequences are aligned.",
),
_Switch(
["-mask", "mask"],
"Residues not belonging to selected fragments are "
r"replaced by '\*' characters in output alignment "
"(rather than being printed in lower-case characters)",
),
_Switch(
["-mat", "mat"],
r"Creates file \*mat with substitution counts derived "
"from the fragments that have been selected for alignment.",
),
_Switch(
["-mat_thr", "mat_thr"],
"Like '-mat' but only fragments with weight score "
"> t are considered",
),
_Switch(
["-max_link", "max_link"],
"'maximum linkage' clustering used to construct "
"sequence tree (instead of UPGMA).",
),
_Switch(["-min_link", "min_link"], "'minimum linkage' clustering used."),
_Option(["-mot", "mot"], "'motif' option.", equate=False),
_Switch(["-msf", "msf"], "Separate output file in MSF format."),
_Switch(
["-n", "n"],
"Input sequences are nucleic acid sequences. "
"No translation of fragments.",
),
_Switch(
["-nt", "nt"],
"Input sequences are nucleic acid sequences and "
"'nucleic acid segments' are translated to 'peptide "
"segments'.",
),
_Switch(
["-nta", "nta"],
"'no textual alignment' - textual alignment suppressed. "
"This option makes sense if other output files are of "
"interest -- e.g. the fragment files created with -ff, "
"-fop, -fsm or -lo.",
),
_Switch(
["-o", "o"],
"Fast version, resulting alignments may be slightly different.",
),
_Switch(
["-ow", "ow"],
"Overlap weights enforced (By default, overlap weights "
"are used only if up to 35 sequences are aligned since "
"calculating overlap weights is time consuming).",
),
_Switch(
["-pst", "pst"],
r"'print status'. Creates and updates a file \*.sta with "
"information about the current status of the program "
"run. This option is recommended if large data sets "
"are aligned since it allows the user to estimate the "
"remaining running time.",
),
_Switch(
["-smin", "smin"],
"Minimum similarity value for first residue pair "
"(or codon pair) in fragments. Speeds up protein "
"alignment or alignment of translated DNA fragments "
"at the expense of sensitivity.",
),
_Option(
["-stars", "stars"],
r"Maximum number of '\*' characters indicating degree "
"of local similarity among sequences. By default, no "
"stars are used but numbers between 0 and 9, instead.",
checker_function=lambda x: x in range(0, 10),
equate=False,
),
_Switch(["-stdo", "stdo"], "Results written to standard output."),
_Switch(
["-ta", "ta"],
"Standard textual alignment printed (overrides "
"suppression of textual alignments in special "
"options, e.g. -lgs)",
),
_Option(
["-thr", "thr"],
"Threshold T = x.",
checker_function=lambda x: isinstance(x, int),
equate=False,
),
_Switch(
["-xfr", "xfr"],
"'exclude fragments' - list of fragments can be "
"specified that are NOT considered for pairwise alignment",
),
_Argument(
["input"],
"Input file name. Must be FASTA format",
filename=True,
is_required=True,
),
]
AbstractCommandline.__init__(self, cmd, **kwargs)
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
|