File size: 8,844 Bytes
b7731cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# Copyright (C) 2011, 2018 by Brandon Invergo ([email protected])
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.

"""Classes for the support of CODEML.

Maximum likelihood analysis using codon substitution models.
"""


import os.path
from ._paml import Paml
from . import _parse_codeml


class CodemlError(EnvironmentError):
    """CODEML failed. Run with verbose=True to view CODEML's error message."""


class Codeml(Paml):
    """An interface to CODEML, part of the PAML package."""

    def __init__(self, alignment=None, tree=None, working_dir=None, out_file=None):
        """Initialize the codeml instance.

        The user may optionally pass in strings specifying the locations
        of the input alignment and tree files, the working directory and
        the final output file. Other options found in the CODEML control
        have typical settings by default to run site class models 0, 1 and
        2 on a nucleotide alignment.
        """
        Paml.__init__(self, alignment, working_dir, out_file)
        if tree is not None:
            if not os.path.exists(tree):
                raise FileNotFoundError("The specified tree file does not exist.")
        self.tree = tree
        self.ctl_file = "codeml.ctl"
        self._options = {
            "noisy": None,
            "verbose": None,
            "runmode": None,
            "seqtype": None,
            "CodonFreq": None,
            "ndata": None,
            "clock": None,
            "aaDist": None,
            "aaRatefile": None,
            "model": None,
            "NSsites": None,
            "icode": None,
            "Mgene": None,
            "fix_kappa": None,
            "kappa": None,
            "fix_omega": None,
            "omega": None,
            "fix_alpha": None,
            "alpha": None,
            "Malpha": None,
            "ncatG": None,
            "getSE": None,
            "RateAncestor": None,
            "Small_Diff": None,
            "cleandata": None,
            "fix_blength": None,
            "method": None,
            "rho": None,
            "fix_rho": None,
        }

    def write_ctl_file(self):
        """Dynamically build a CODEML control file from the options.

        The control file is written to the location specified by the
        ctl_file property of the codeml class.
        """
        # Make sure all paths are relative to the working directory
        self._set_rel_paths()
        with open(self.ctl_file, "w") as ctl_handle:
            ctl_handle.write(f"seqfile = {self._rel_alignment}\n")
            ctl_handle.write(f"outfile = {self._rel_out_file}\n")
            ctl_handle.write(f"treefile = {self._rel_tree}\n")
            for option in self._options.items():
                if option[1] is None:
                    # If an option has a value of None, there's no need
                    # to write it in the control file; it's normally just
                    # commented out.
                    continue
                if option[0] == "NSsites":
                    # NSsites is stored in Python as a list but in the
                    # control file it is specified as a series of numbers
                    # separated by spaces.
                    NSsites = " ".join(str(site) for site in option[1])
                    ctl_handle.write(f"{option[0]} = {NSsites}\n")
                else:
                    ctl_handle.write(f"{option[0]} = {option[1]}\n")

    def read_ctl_file(self, ctl_file):
        """Parse a control file and load the options into the Codeml instance."""
        temp_options = {}
        if not os.path.isfile(ctl_file):
            raise FileNotFoundError(f"File not found: {ctl_file!r}")
        else:
            with open(ctl_file) as ctl_handle:
                for line in ctl_handle:
                    line = line.strip()
                    uncommented = line.split("*", 1)[0]
                    if uncommented != "":
                        if "=" not in uncommented:
                            raise AttributeError(
                                f"Malformed line in control file:\n{line!r}"
                            )
                        (option, value) = uncommented.split("=", 1)
                        option = option.strip()
                        value = value.strip()
                        if option == "seqfile":
                            self.alignment = value
                        elif option == "treefile":
                            self.tree = value
                        elif option == "outfile":
                            self.out_file = value
                        elif option == "NSsites":
                            site_classes = value.split(" ")
                            for n in range(len(site_classes)):
                                try:
                                    site_classes[n] = int(site_classes[n])
                                except ValueError:
                                    raise TypeError(
                                        f"Invalid site class: {site_classes[n]}"
                                    ) from None
                            temp_options["NSsites"] = site_classes
                        elif option not in self._options:
                            raise KeyError(f"Invalid option: {option}")
                        else:
                            if "." in value:
                                try:
                                    converted_value = float(value)
                                except ValueError:
                                    converted_value = value
                            else:
                                try:
                                    converted_value = int(value)
                                except ValueError:
                                    converted_value = value
                            temp_options[option] = converted_value
        for option in self._options:
            if option in temp_options:
                self._options[option] = temp_options[option]
            else:
                self._options[option] = None

    def print_options(self):
        """Print out all of the options and their current settings."""
        for option in self._options.items():
            if option[0] == "NSsites" and option[1] is not None:
                # NSsites is stored in Python as a list but in the
                # control file it is specified as a series of numbers
                # separated by spaces.
                NSsites = " ".join(str(site) for site in option[1])
                print(f"{option[0]} = {NSsites}")
            else:
                print(f"{option[0]} = {option[1]}")

    def _set_rel_paths(self):
        """Make all file/directory paths relative to the PWD (PRIVATE).

        CODEML requires that all paths specified in the control file be
        relative to the directory from which it is called rather than
        absolute paths.
        """
        Paml._set_rel_paths(self)
        if self.tree is not None:
            self._rel_tree = os.path.relpath(self.tree, self.working_dir)

    def run(self, ctl_file=None, verbose=False, command="codeml", parse=True):
        """Run codeml using the current configuration.

        If parse is True then read and return the results, otherwise
        return None.

        The arguments may be passed as either absolute or relative
        paths, despite the fact that CODEML requires relative paths.
        """
        if self.tree is None:
            raise ValueError("Tree file not specified.")
        if not os.path.exists(self.tree):
            raise FileNotFoundError("The specified tree file does not exist.")
        Paml.run(self, ctl_file, verbose, command)
        if parse:
            return read(self.out_file)
        return None


def read(results_file):
    """Parse a CODEML results file."""
    results = {}
    if not os.path.exists(results_file):
        raise FileNotFoundError("Results file does not exist.")
    with open(results_file) as handle:
        lines = handle.readlines()
    if not lines:
        raise ValueError(
            "Empty results file.  Did CODEML exit successfully?  "
            "Run 'Codeml.run()' with 'verbose=True'."
        )
    (results, multi_models, multi_genes) = _parse_codeml.parse_basics(lines, results)
    results = _parse_codeml.parse_nssites(lines, results, multi_models, multi_genes)
    results = _parse_codeml.parse_pairwise(lines, results)
    results = _parse_codeml.parse_distances(lines, results)
    if not results:
        raise ValueError("Invalid results file")
    return results