File size: 9,696 Bytes
a3f3d91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
# -*- coding: utf-8 -*-
import os
import errno
import shutil
from . import logger
from os.path import join, abspath
from subprocess import Popen, PIPE
from glob import glob
from pkg_resources import resource_filename
import json
import tarfile
from .postProcessing import prepare_output
from .analysis import aggregation_analysis as analyze
from collections import OrderedDict
_name = "CABS"
def run_cabs(config, pdb_input="input.pdb"):
"""Takes an Aggrescan3D Job config dictionary and should never change it"""
real_work_dir = config["work_dir"]
os.chdir(real_work_dir)
real_cabs_dir = os.path.join(real_work_dir, "CABS_sim")
real_models_dir = os.path.join(real_cabs_dir, "output_pdbs")
try:
_makedir(real_cabs_dir)
_makedir("models")
_makedir("stats")
shutil.copyfile(pdb_input, os.path.join(real_cabs_dir,
pdb_input))
except OSError:
raise logger.CabsError("Failed to prepare CABS directory at: %s" % real_cabs_dir)
os.chdir(real_cabs_dir)
logger.info(module_name=_name,
msg="Running CABS flex simulation")
cabs_cmd = _prepare_command(pdb_input=pdb_input, cabs_dir=config["cabs_dir"],
cabs_config=config["cabs_config"], n_models=config["n_models"])
logger.debug(module_name=_name,
msg="CABS ran with: %s" % " ".join(cabs_cmd))
out, err = Popen(cabs_cmd, stdout=PIPE, stderr=PIPE).communicate()
if err:
with open(join(real_work_dir, "CABSerror"), 'w') as f:
f.write(err)
_cleanup_files(work_dir=real_work_dir, cabs_dir=real_cabs_dir, clean=False)
raise logger.CabsError("Please see CABSerror file within your work directory for more details",
err_file="CABSerror")
try:
_check_output(models_dir=real_models_dir, n_models=config["n_models"])
except logger.CabsError:
shutil.move(join(real_cabs_dir, "CABS.log"), join(real_work_dir, "CABS.log"))
_cleanup_files(work_dir=real_work_dir, cabs_dir=real_cabs_dir, clean=False)
raise logger.CabsError("Please see CABS.log file within your work directory for more details",
err_file="CABS.log")
shutil.copyfile(pdb_input, join("output_pdbs", pdb_input))
os.chdir("output_pdbs")
models = glob("model*.pdb")
top = ""
max_avg = -100
averages = {}
all_models_data = []
for model in models:
model_path = abspath(model)
analyze(config=config, target=model_path, working_dir=real_models_dir, agg_work_dir=real_work_dir)
data, stats = prepare_output(work_dir=real_models_dir, final=False,
model_name=model.split(".")[0], scores_to_pdb=True, get_data=True)
all_models_data.append(data["All"])
current_avg = stats["All"]["avg_value"]
averages[model] = current_avg
if current_avg > max_avg:
max_avg = stats["All"]["avg_value"]
top = model
shutil.move("A3D.csv", join(real_work_dir, "stats", model.split(".")[0] + ".csv"))
shutil.move(model, join(real_work_dir, "models", model))
analyze(config=config, target=pdb_input, working_dir=real_models_dir, agg_work_dir=real_work_dir)
stats = prepare_output(work_dir=real_models_dir, final=False,
model_name=pdb_input.split(".")[0], scores_to_pdb=True, get_data=False)
current_avg = stats["All"]["avg_value"]
averages[pdb_input] = current_avg
if current_avg > max_avg:
top = pdb_input
shutil.copyfile(pdb_input, "CABS_average.pdb") # Create a "fake" average model from input
with open("A3D.csv", 'r') as f:
a3d_file_backup = f.readlines()
shutil.move("A3D.csv", join(real_work_dir, "stats", pdb_input.split(".")[0] + ".csv"))
shutil.move(pdb_input, join(real_work_dir, "models", pdb_input)) # Move the actual input out
_create_avg_A3D(all_models_data, a3d_file_backup, real_models_dir) # Create a "fake" A3D file that has averages of all CABS models for each residue
stats = prepare_output(work_dir='', final=False,
model_name="CABS_average", scores_to_pdb=True, get_data=False)
shutil.move("CABS_average.pdb", join(real_work_dir, "models", "CABS_average.pdb"))
shutil.move("A3D.csv", join(real_work_dir, "stats", "CABS_average" + ".csv"))
averages["CABS_average.pdb"] = stats["All"]["avg_value"]
with open('averages', 'w') as avg:
json.dump(_sort_dict(my_dict=averages), avg)
os.chdir(real_work_dir)
_cleanup_files(work_dir=real_work_dir, cabs_dir=real_cabs_dir, input_pdb=pdb_input, top=top, clean=True)
superimpose(first_model="input.pdb", second_model="folded.pdb")
def _create_avg_A3D(data, a3d_content, work_dir):
"""Assuming the order is the same in data and a3d_content this should be the case as data is read from an A3D file"""
new_data = [0 for i in range(len(data[0]))]
for model in data:
for res_index in range(len(model)):
new_data[res_index] += model[res_index][1]
new_data = [i/len(data) for i in new_data]
with open(join(work_dir, "A3D.csv"), "w") as f:
counter = 0
first = True
suma = 0
for line in a3d_content:
if first is True:
f.write(line)
first = False
else:
parsed = line.split(",")
parsed[-1] = "%.4f\r\n" % new_data[counter]
newline = ",".join(parsed)
suma += new_data[counter]
f.write(newline)
counter += 1
def superimpose(first_model, second_model):
try:
pymol_cmd = ["pymol", "-cq", resource_filename("aggrescan", join("data", "superimpose.pml")), "--", first_model, second_model]
out, err = Popen(pymol_cmd, stdout=PIPE, stderr=PIPE).communicate()
if err:
logger.warning(module_name="Pymol",
msg="Pymol reports an error: %s" % err)
shutil.move("superimposed.png", "CABSflex_supe.png")
except OSError:
logger.warning(module_name=_name,
msg="Pymol failed to launch (most likely not present on the system)."
"Couldn't create a superimposed picture of CABS input and output ")
except (shutil.Error, IOError):
logger.critical(module_name="Pymol",
msg="Pymol failed to create a superimposed image for input and "
"most aggregation prone CABS model")
def _prepare_command(pdb_input="input.pdb", cabs_dir=".", cabs_config='', n_models=12):
"""Prepare CABS settings according to user input"""
cabs_cmd = []
if cabs_dir:
cabs_cmd.extend(["python", cabs_dir, "flex"])
else:
cabs_cmd.append("CABSflex")
if cabs_config:
cabs_cmd.extend(["-c", cabs_config])
else:
cabs_cmd.extend(["--image-file-format", "png", "-v", "4"])
cabs_cmd.extend(["--input", pdb_input, "--clustering-medoids", str(n_models), "--aa-rebuild", "--log"])
return cabs_cmd
def _cleanup_files(work_dir="", cabs_dir="", input_pdb="", top="", clean=True):
"""If clean some files will be saved, else only remove all created files"""
if clean:
shutil.move(join(cabs_dir, "plots", "RMSF_seq.png"), join(work_dir, "CABSflex_rmsf.png"))
shutil.move(join(cabs_dir, "plots", "RMSF.csv"), join(work_dir, "CABSflex_rmsf.csv"))
shutil.copyfile(join(work_dir, "models", top.strip()), join(work_dir, "folded.pdb"))
shutil.copyfile(join(cabs_dir, "output_pdbs", "averages"), "averages")
if logger.get_log_level() >= 2 and clean:
logger.log_file(module_name="CABS",
msg="Saving top CABS models as %s" % "models.tar.gz")
with tarfile.open(join(work_dir, "models.tar.gz"), "w:gz") as tar:
tar.add(join(work_dir, "models"), arcname=os.path.sep)
logger.log_file(module_name="CABS",
msg="Saving Aggrescan3D statistics for all CABS models as %s" % "stats.tar.gz")
with tarfile.open(join(work_dir, "stats.tar.gz"), "w:gz") as tar:
tar.add(join(work_dir, "stats"), arcname=os.path.sep)
shutil.rmtree(join(work_dir, "stats"), ignore_errors=True)
shutil.rmtree(join(work_dir, "models"), ignore_errors=True)
_del_cabs_dir(cabs_dir=cabs_dir)
def _del_cabs_dir(cabs_dir="CABS_sim"):
shutil.rmtree(cabs_dir, ignore_errors=True)
def _check_output(models_dir, n_models):
"""Check if all the required files were created"""
_file_list = ["CABS.log"]
_file_list.extend([join(models_dir, "model_%s.pdb" % str(i)) for i in range(n_models)])
_file_list.append(join("plots", "RMSF_seq*"))
_file_list.append(join("plots", "RMSF*"))
for filename in _file_list:
if not glob(filename):
logger.critical(module_name="CABS",
msg="File %s which CABS should have generated was not found." % filename)
raise logger.CabsError
def _sort_dict(my_dict):
"""Return a reverse-sorted by value, OrderedDict of a regular dictionary with number values"""
new_dict = OrderedDict()
for key, value in sorted(iter(my_dict.items()), key=lambda k_v: (k_v[1], k_v[0]), reverse=True):
new_dict[key] = value
return new_dict
def _makedir(path):
"""Ignore error if path exists"""
try:
os.makedirs(path)
except OSError as e:
if e.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
|