Spaces:
No application file
No application file
# Copyright (C) 2011, 2018 by Brandon Invergo ([email protected]) | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Classes for the support of CODEML. | |
Maximum likelihood analysis using codon substitution models. | |
""" | |
import os.path | |
from ._paml import Paml | |
from . import _parse_codeml | |
class CodemlError(EnvironmentError): | |
"""CODEML failed. Run with verbose=True to view CODEML's error message.""" | |
class Codeml(Paml): | |
"""An interface to CODEML, part of the PAML package.""" | |
def __init__(self, alignment=None, tree=None, working_dir=None, out_file=None): | |
"""Initialize the codeml instance. | |
The user may optionally pass in strings specifying the locations | |
of the input alignment and tree files, the working directory and | |
the final output file. Other options found in the CODEML control | |
have typical settings by default to run site class models 0, 1 and | |
2 on a nucleotide alignment. | |
""" | |
Paml.__init__(self, alignment, working_dir, out_file) | |
if tree is not None: | |
if not os.path.exists(tree): | |
raise FileNotFoundError("The specified tree file does not exist.") | |
self.tree = tree | |
self.ctl_file = "codeml.ctl" | |
self._options = { | |
"noisy": None, | |
"verbose": None, | |
"runmode": None, | |
"seqtype": None, | |
"CodonFreq": None, | |
"ndata": None, | |
"clock": None, | |
"aaDist": None, | |
"aaRatefile": None, | |
"model": None, | |
"NSsites": None, | |
"icode": None, | |
"Mgene": None, | |
"fix_kappa": None, | |
"kappa": None, | |
"fix_omega": None, | |
"omega": None, | |
"fix_alpha": None, | |
"alpha": None, | |
"Malpha": None, | |
"ncatG": None, | |
"getSE": None, | |
"RateAncestor": None, | |
"Small_Diff": None, | |
"cleandata": None, | |
"fix_blength": None, | |
"method": None, | |
"rho": None, | |
"fix_rho": None, | |
} | |
def write_ctl_file(self): | |
"""Dynamically build a CODEML control file from the options. | |
The control file is written to the location specified by the | |
ctl_file property of the codeml class. | |
""" | |
# Make sure all paths are relative to the working directory | |
self._set_rel_paths() | |
with open(self.ctl_file, "w") as ctl_handle: | |
ctl_handle.write(f"seqfile = {self._rel_alignment}\n") | |
ctl_handle.write(f"outfile = {self._rel_out_file}\n") | |
ctl_handle.write(f"treefile = {self._rel_tree}\n") | |
for option in self._options.items(): | |
if option[1] is None: | |
# If an option has a value of None, there's no need | |
# to write it in the control file; it's normally just | |
# commented out. | |
continue | |
if option[0] == "NSsites": | |
# NSsites is stored in Python as a list but in the | |
# control file it is specified as a series of numbers | |
# separated by spaces. | |
NSsites = " ".join(str(site) for site in option[1]) | |
ctl_handle.write(f"{option[0]} = {NSsites}\n") | |
else: | |
ctl_handle.write(f"{option[0]} = {option[1]}\n") | |
def read_ctl_file(self, ctl_file): | |
"""Parse a control file and load the options into the Codeml instance.""" | |
temp_options = {} | |
if not os.path.isfile(ctl_file): | |
raise FileNotFoundError(f"File not found: {ctl_file!r}") | |
else: | |
with open(ctl_file) as ctl_handle: | |
for line in ctl_handle: | |
line = line.strip() | |
uncommented = line.split("*", 1)[0] | |
if uncommented != "": | |
if "=" not in uncommented: | |
raise AttributeError( | |
f"Malformed line in control file:\n{line!r}" | |
) | |
(option, value) = uncommented.split("=", 1) | |
option = option.strip() | |
value = value.strip() | |
if option == "seqfile": | |
self.alignment = value | |
elif option == "treefile": | |
self.tree = value | |
elif option == "outfile": | |
self.out_file = value | |
elif option == "NSsites": | |
site_classes = value.split(" ") | |
for n in range(len(site_classes)): | |
try: | |
site_classes[n] = int(site_classes[n]) | |
except ValueError: | |
raise TypeError( | |
f"Invalid site class: {site_classes[n]}" | |
) from None | |
temp_options["NSsites"] = site_classes | |
elif option not in self._options: | |
raise KeyError(f"Invalid option: {option}") | |
else: | |
if "." in value: | |
try: | |
converted_value = float(value) | |
except ValueError: | |
converted_value = value | |
else: | |
try: | |
converted_value = int(value) | |
except ValueError: | |
converted_value = value | |
temp_options[option] = converted_value | |
for option in self._options: | |
if option in temp_options: | |
self._options[option] = temp_options[option] | |
else: | |
self._options[option] = None | |
def print_options(self): | |
"""Print out all of the options and their current settings.""" | |
for option in self._options.items(): | |
if option[0] == "NSsites" and option[1] is not None: | |
# NSsites is stored in Python as a list but in the | |
# control file it is specified as a series of numbers | |
# separated by spaces. | |
NSsites = " ".join(str(site) for site in option[1]) | |
print(f"{option[0]} = {NSsites}") | |
else: | |
print(f"{option[0]} = {option[1]}") | |
def _set_rel_paths(self): | |
"""Make all file/directory paths relative to the PWD (PRIVATE). | |
CODEML requires that all paths specified in the control file be | |
relative to the directory from which it is called rather than | |
absolute paths. | |
""" | |
Paml._set_rel_paths(self) | |
if self.tree is not None: | |
self._rel_tree = os.path.relpath(self.tree, self.working_dir) | |
def run(self, ctl_file=None, verbose=False, command="codeml", parse=True): | |
"""Run codeml using the current configuration. | |
If parse is True then read and return the results, otherwise | |
return None. | |
The arguments may be passed as either absolute or relative | |
paths, despite the fact that CODEML requires relative paths. | |
""" | |
if self.tree is None: | |
raise ValueError("Tree file not specified.") | |
if not os.path.exists(self.tree): | |
raise FileNotFoundError("The specified tree file does not exist.") | |
Paml.run(self, ctl_file, verbose, command) | |
if parse: | |
return read(self.out_file) | |
return None | |
def read(results_file): | |
"""Parse a CODEML results file.""" | |
results = {} | |
if not os.path.exists(results_file): | |
raise FileNotFoundError("Results file does not exist.") | |
with open(results_file) as handle: | |
lines = handle.readlines() | |
if not lines: | |
raise ValueError( | |
"Empty results file. Did CODEML exit successfully? " | |
"Run 'Codeml.run()' with 'verbose=True'." | |
) | |
(results, multi_models, multi_genes) = _parse_codeml.parse_basics(lines, results) | |
results = _parse_codeml.parse_nssites(lines, results, multi_models, multi_genes) | |
results = _parse_codeml.parse_pairwise(lines, results) | |
results = _parse_codeml.parse_distances(lines, results) | |
if not results: | |
raise ValueError("Invalid results file") | |
return results | |