File size: 5,731 Bytes
b7731cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Copyright (C) 2011, 2018 by Brandon Invergo ([email protected])
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.

"""Classes for the support of yn00.

Yang and Nielsen 2000,  estimating synonymous and nonsynonymous substitution
rates in pairwise comparison of protein-coding DNA sequences.
"""

import os.path
from ._paml import Paml
from . import _parse_yn00


class Yn00Error(EnvironmentError):
    """yn00 failed. Run with verbose=True to view yn00's error message."""


class Yn00(Paml):
    """An interface to yn00, part of the PAML package."""

    def __init__(self, alignment=None, working_dir=None, out_file=None):
        """Initialize the Yn00 instance.

        The user may optionally pass in strings specifying the locations
        of the input alignment, the working directory and
        the final output file.
        """
        Paml.__init__(self, alignment, working_dir, out_file)
        self.ctl_file = "yn00.ctl"
        self._options = {
            "verbose": None,
            "icode": None,
            "weighting": None,
            "commonf3x4": None,
            "ndata": None,
        }

    def write_ctl_file(self):
        """Dynamically build a yn00 control file from the options.

        The control file is written to the location specified by the
        ctl_file property of the yn00 class.
        """
        # Make sure all paths are relative to the working directory
        self._set_rel_paths()
        with open(self.ctl_file, "w") as ctl_handle:
            ctl_handle.write(f"seqfile = {self._rel_alignment}\n")
            ctl_handle.write(f"outfile = {self._rel_out_file}\n")
            for option in self._options.items():
                if option[1] is None:
                    # If an option has a value of None, there's no need
                    # to write it in the control file; it's normally just
                    # commented out.
                    continue
                ctl_handle.write(f"{option[0]} = {option[1]}\n")

    def read_ctl_file(self, ctl_file):
        """Parse a control file and load the options into the yn00 instance."""
        temp_options = {}
        if not os.path.isfile(ctl_file):
            raise FileNotFoundError(f"File not found: {ctl_file!r}")
        else:
            with open(ctl_file) as ctl_handle:
                for line in ctl_handle:
                    line = line.strip()
                    uncommented = line.split("*", 1)[0]
                    if uncommented != "":
                        if "=" not in uncommented:
                            raise AttributeError(
                                f"Malformed line in control file:\n{line!r}"
                            )
                        (option, value) = uncommented.split("=")
                        option = option.strip()
                        value = value.strip()
                        if option == "seqfile":
                            self.alignment = value
                        elif option == "outfile":
                            self.out_file = value
                        elif option not in self._options:
                            raise KeyError(f"Invalid option: {option}")
                        else:
                            if "." in value or "e-" in value:
                                try:
                                    converted_value = float(value)
                                except ValueError:
                                    converted_value = value
                            else:
                                try:
                                    converted_value = int(value)
                                except ValueError:
                                    converted_value = value
                            temp_options[option] = converted_value
        for option in self._options:
            if option in temp_options:
                self._options[option] = temp_options[option]
            else:
                self._options[option] = None

    def run(self, ctl_file=None, verbose=False, command="yn00", parse=True):
        """Run yn00 using the current configuration.

        If parse is True then read and return the result, otherwise
        return None.
        """
        Paml.run(self, ctl_file, verbose, command)
        if parse:
            return read(self.out_file)
        return None


def read(results_file):
    """Parse a yn00 results file."""
    results = {}
    if not os.path.exists(results_file):
        raise FileNotFoundError("Results file does not exist.")
    with open(results_file) as handle:
        lines = handle.readlines()
    if not lines:
        raise ValueError(
            "Empty results file.  Did YN00 exit successfully?  "
            "Run 'Yn00.run()' with 'verbose=True'."
        )
    for line_num, line in enumerate(lines):
        if "(A) Nei-Gojobori (1986) method" in line:
            ng86_start = line_num + 1
        elif "(B) Yang & Nielsen (2000) method" in line:
            (results, sequences) = _parse_yn00.parse_ng86(
                lines[ng86_start:line_num], results
            )
            yn00_start = line_num + 1
        elif "(C) LWL85, LPB93 & LWLm methods" in line:
            results = _parse_yn00.parse_yn00(
                lines[yn00_start:line_num], results, sequences
            )
            results = _parse_yn00.parse_others(
                lines[line_num + 1 :], results, sequences
            )
    if not results:
        raise ValueError("Invalid results file.")
    return results