# -*- coding: utf-8 -*- import os import errno import shutil from . import logger from os.path import join, abspath from subprocess import Popen, PIPE from glob import glob from pkg_resources import resource_filename import json import tarfile from .postProcessing import prepare_output from .analysis import aggregation_analysis as analyze from collections import OrderedDict _name = "CABS" def run_cabs(config, pdb_input="input.pdb"): """Takes an Aggrescan3D Job config dictionary and should never change it""" real_work_dir = config["work_dir"] os.chdir(real_work_dir) real_cabs_dir = os.path.join(real_work_dir, "CABS_sim") real_models_dir = os.path.join(real_cabs_dir, "output_pdbs") try: _makedir(real_cabs_dir) _makedir("models") _makedir("stats") shutil.copyfile(pdb_input, os.path.join(real_cabs_dir, pdb_input)) except OSError: raise logger.CabsError("Failed to prepare CABS directory at: %s" % real_cabs_dir) os.chdir(real_cabs_dir) logger.info(module_name=_name, msg="Running CABS flex simulation") cabs_cmd = _prepare_command(pdb_input=pdb_input, cabs_dir=config["cabs_dir"], cabs_config=config["cabs_config"], n_models=config["n_models"]) logger.debug(module_name=_name, msg="CABS ran with: %s" % " ".join(cabs_cmd)) out, err = Popen(cabs_cmd, stdout=PIPE, stderr=PIPE).communicate() if err: with open(join(real_work_dir, "CABSerror"), 'w') as f: f.write(err) _cleanup_files(work_dir=real_work_dir, cabs_dir=real_cabs_dir, clean=False) raise logger.CabsError("Please see CABSerror file within your work directory for more details", err_file="CABSerror") try: _check_output(models_dir=real_models_dir, n_models=config["n_models"]) except logger.CabsError: shutil.move(join(real_cabs_dir, "CABS.log"), join(real_work_dir, "CABS.log")) _cleanup_files(work_dir=real_work_dir, cabs_dir=real_cabs_dir, clean=False) raise logger.CabsError("Please see CABS.log file within your work directory for more details", err_file="CABS.log") shutil.copyfile(pdb_input, join("output_pdbs", pdb_input)) os.chdir("output_pdbs") models = glob("model*.pdb") top = "" max_avg = -100 averages = {} all_models_data = [] for model in models: model_path = abspath(model) analyze(config=config, target=model_path, working_dir=real_models_dir, agg_work_dir=real_work_dir) data, stats = prepare_output(work_dir=real_models_dir, final=False, model_name=model.split(".")[0], scores_to_pdb=True, get_data=True) all_models_data.append(data["All"]) current_avg = stats["All"]["avg_value"] averages[model] = current_avg if current_avg > max_avg: max_avg = stats["All"]["avg_value"] top = model shutil.move("A3D.csv", join(real_work_dir, "stats", model.split(".")[0] + ".csv")) shutil.move(model, join(real_work_dir, "models", model)) analyze(config=config, target=pdb_input, working_dir=real_models_dir, agg_work_dir=real_work_dir) stats = prepare_output(work_dir=real_models_dir, final=False, model_name=pdb_input.split(".")[0], scores_to_pdb=True, get_data=False) current_avg = stats["All"]["avg_value"] averages[pdb_input] = current_avg if current_avg > max_avg: top = pdb_input shutil.copyfile(pdb_input, "CABS_average.pdb") # Create a "fake" average model from input with open("A3D.csv", 'r') as f: a3d_file_backup = f.readlines() shutil.move("A3D.csv", join(real_work_dir, "stats", pdb_input.split(".")[0] + ".csv")) shutil.move(pdb_input, join(real_work_dir, "models", pdb_input)) # Move the actual input out _create_avg_A3D(all_models_data, a3d_file_backup, real_models_dir) # Create a "fake" A3D file that has averages of all CABS models for each residue stats = prepare_output(work_dir='', final=False, model_name="CABS_average", scores_to_pdb=True, get_data=False) shutil.move("CABS_average.pdb", join(real_work_dir, "models", "CABS_average.pdb")) shutil.move("A3D.csv", join(real_work_dir, "stats", "CABS_average" + ".csv")) averages["CABS_average.pdb"] = stats["All"]["avg_value"] with open('averages', 'w') as avg: json.dump(_sort_dict(my_dict=averages), avg) os.chdir(real_work_dir) _cleanup_files(work_dir=real_work_dir, cabs_dir=real_cabs_dir, input_pdb=pdb_input, top=top, clean=True) superimpose(first_model="input.pdb", second_model="folded.pdb") def _create_avg_A3D(data, a3d_content, work_dir): """Assuming the order is the same in data and a3d_content this should be the case as data is read from an A3D file""" new_data = [0 for i in range(len(data[0]))] for model in data: for res_index in range(len(model)): new_data[res_index] += model[res_index][1] new_data = [i/len(data) for i in new_data] with open(join(work_dir, "A3D.csv"), "w") as f: counter = 0 first = True suma = 0 for line in a3d_content: if first is True: f.write(line) first = False else: parsed = line.split(",") parsed[-1] = "%.4f\r\n" % new_data[counter] newline = ",".join(parsed) suma += new_data[counter] f.write(newline) counter += 1 def superimpose(first_model, second_model): try: pymol_cmd = ["pymol", "-cq", resource_filename("aggrescan", join("data", "superimpose.pml")), "--", first_model, second_model] out, err = Popen(pymol_cmd, stdout=PIPE, stderr=PIPE).communicate() if err: logger.warning(module_name="Pymol", msg="Pymol reports an error: %s" % err) shutil.move("superimposed.png", "CABSflex_supe.png") except OSError: logger.warning(module_name=_name, msg="Pymol failed to launch (most likely not present on the system)." "Couldn't create a superimposed picture of CABS input and output ") except (shutil.Error, IOError): logger.critical(module_name="Pymol", msg="Pymol failed to create a superimposed image for input and " "most aggregation prone CABS model") def _prepare_command(pdb_input="input.pdb", cabs_dir=".", cabs_config='', n_models=12): """Prepare CABS settings according to user input""" cabs_cmd = [] if cabs_dir: cabs_cmd.extend(["python", cabs_dir, "flex"]) else: cabs_cmd.append("CABSflex") if cabs_config: cabs_cmd.extend(["-c", cabs_config]) else: cabs_cmd.extend(["--image-file-format", "png", "-v", "4"]) cabs_cmd.extend(["--input", pdb_input, "--clustering-medoids", str(n_models), "--aa-rebuild", "--log"]) return cabs_cmd def _cleanup_files(work_dir="", cabs_dir="", input_pdb="", top="", clean=True): """If clean some files will be saved, else only remove all created files""" if clean: shutil.move(join(cabs_dir, "plots", "RMSF_seq.png"), join(work_dir, "CABSflex_rmsf.png")) shutil.move(join(cabs_dir, "plots", "RMSF.csv"), join(work_dir, "CABSflex_rmsf.csv")) shutil.copyfile(join(work_dir, "models", top.strip()), join(work_dir, "folded.pdb")) shutil.copyfile(join(cabs_dir, "output_pdbs", "averages"), "averages") if logger.get_log_level() >= 2 and clean: logger.log_file(module_name="CABS", msg="Saving top CABS models as %s" % "models.tar.gz") with tarfile.open(join(work_dir, "models.tar.gz"), "w:gz") as tar: tar.add(join(work_dir, "models"), arcname=os.path.sep) logger.log_file(module_name="CABS", msg="Saving Aggrescan3D statistics for all CABS models as %s" % "stats.tar.gz") with tarfile.open(join(work_dir, "stats.tar.gz"), "w:gz") as tar: tar.add(join(work_dir, "stats"), arcname=os.path.sep) shutil.rmtree(join(work_dir, "stats"), ignore_errors=True) shutil.rmtree(join(work_dir, "models"), ignore_errors=True) _del_cabs_dir(cabs_dir=cabs_dir) def _del_cabs_dir(cabs_dir="CABS_sim"): shutil.rmtree(cabs_dir, ignore_errors=True) def _check_output(models_dir, n_models): """Check if all the required files were created""" _file_list = ["CABS.log"] _file_list.extend([join(models_dir, "model_%s.pdb" % str(i)) for i in range(n_models)]) _file_list.append(join("plots", "RMSF_seq*")) _file_list.append(join("plots", "RMSF*")) for filename in _file_list: if not glob(filename): logger.critical(module_name="CABS", msg="File %s which CABS should have generated was not found." % filename) raise logger.CabsError def _sort_dict(my_dict): """Return a reverse-sorted by value, OrderedDict of a regular dictionary with number values""" new_dict = OrderedDict() for key, value in sorted(iter(my_dict.items()), key=lambda k_v: (k_v[1], k_v[0]), reverse=True): new_dict[key] = value return new_dict def _makedir(path): """Ignore error if path exists""" try: os.makedirs(path) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir(path): pass else: raise