jfaustin's picture
secretion-scores (#4)
a3f3d91 verified
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import shutil
import json
from pkg_resources import resource_filename
from subprocess import Popen, PIPE
from os.path import exists, isdir, join, isfile
from . import logger
from . import pdb
from .postProcessing import prepare_output
from .analysis import aggregation_analysis as analyze
from .dynamic_module import run_cabs
from .foldx_module import FoldxWrap as fold
from glob import glob
from .optparser import save_config_file
from .auto_mutation import run_auto_mutation
__all__ = ["Job"]
_name = "runJob"
'''
Note to future self:
The Job atributes are kinda global variables here they
Pdb files fate during simulation is as follows:
original_input.pdb is kept during all the simulation and then renamed as input.pdb
input.pdb is the foldx result at each simulation stage, and is the "current" file
folded.pdb is a product of calculations usually soon renamed to input.pdb
output.pdb is the final product of the simulation
Whole thing is a mess mostly due to how foldx works (which has now improved but this program is still behind on it)
'''
class Job:
def __init__(self, config):
for argName, argValue in list(config.items()):
setattr(self, argName, argValue)
logger.debug(module_name=_name, msg="Setting %s to %s" % (argName, argValue))
self.config = config
if exists(self.work_dir):
if not isdir(self.work_dir):
raise logger.AggrescanError('Selected working directory: %s already exists and is not a directory. '
'Quitting.' % self.work_dir,
module_name=_name,
work_dir_error=True)
if self.overwrite and isfile(join(self.work_dir, "output.pdb")):
raise logger.AggrescanError("The --overwrite options was seen. "
"\nStopping the program to avoid overwriting files "
"(workdir exists and contains output.pdb).",
module_name=_name,
work_dir_error=True)
else:
logger.warning(module_name=_name,
msg='Working directory already exists (possibly overwriting previous results -ow '
'to prevent this behavior)')
else:
try:
os.makedirs(self.work_dir)
except OSError:
raise logger.AggrescanError("Could not create working directory at %s" % self.work_dir,
module_name=_name,
work_dir_error=True)
try:
os.mkdir(self.tmp_dir)
except OSError:
pass
if self.foldx:
self.foldx_handler = fold(foldx_dir=self.foldx, work_dir=self.work_dir,
skip_minimization=self.subprocess, ph=self.ph)
save_config_file(config=config, work_dir=self.work_dir)
def run_job(self):
logger.info(module_name=_name, msg='Starting aggrescan3d job on: %s with %s chain(s) selected' %
(self.protein, self.chain or "all"))
logger.info(module_name=_name, msg="Creating pdb object from: %s" % self.protein)
pdbObj = pdb.Pdb(self.protein, output=join(self.work_dir, 'input.pdb'), chain=self.chain)
pdbObj.validate()
pdbObj.savePdbFile(path=join(self.work_dir, "original_input.pdb"))
pdbObj.savePdbFile(path=join(self.tmp_dir, "input.pdb"))
if self.mutate:
if self.foldx:
mutation = self.find_mutations(pdb_obj=pdbObj)
self.foldx_handler.build_mutant(working_dir=self.tmp_dir, mutation_list=mutation)
else:
raise logger.AggrescanError("FoldX required for mutation analysis. To run aggrescan on a mutant without"
" FoldX provide a mutant pdb file and run Aggrescan3D on it.",
module_name=_name)
if self.foldx:
self.foldx_handler.minimize_energy(working_dir=self.tmp_dir)
else:
logger.info(module_name=_name,
msg="FoldX not utilized. Treating input pdb file as it was already optimized.")
pdbObj.savePdbFile(path=join(self.tmp_dir, "folded.pdb"))
if self.dynamic:
os.chdir(self.work_dir)
shutil.move(join(self.tmp_dir, "folded.pdb"), "input.pdb")
run_cabs(config=self.config)
shutil.move(join(self.work_dir, "folded.pdb"), join(self.tmp_dir, "folded.pdb"))
analyze(config=self.config, target="folded.pdb", working_dir=self.tmp_dir, agg_work_dir=self.work_dir)
if self.movie:
self.create_movie()
self.post_processing(work_dir=self.tmp_dir, final=True, model_name="folded")
self.cleanup()
if self.auto_mutation:
self.check_auto_mut(pdb_obj=pdbObj) # This will also modify the auto_mutation excluded part slightly
run_auto_mutation(work_dir=self.work_dir, options=self.auto_mutation,
foldx_loc=self.foldx, distance=self.distance, ph=self.ph)
def post_processing(self, work_dir="", final=True, model_name="folded"): # TODO remove this function?
prepare_output(work_dir=work_dir, final=final, model_name=model_name)
def find_mutations(self, pdb_obj=None):
chain_numbering = pdb_obj.getChainIdxResname()
t = json.loads(chain_numbering)
to_mutate = []
for row in self.mutate:
oi = str(row['idx'])
on = str(row['oldres'])
nn = str(row['newres'])
ch = str(row['chain'])
found = False
try:
for r in t[ch]:
if r['resname'] == on and r['chain'] == ch \
and r['residx'] == oi:
to_mutate.append(on + ch + oi + nn)
found = True
break
except KeyError:
logger.warning(module_name=_name, msg="Mutation %s likely tried to mutate a chain "
"that doesn't exist." % json.dumps(row))
logger.info(module_name=_name, msg="Available chains: %s" % list(t.keys()))
if not found:
logger.warning(module_name=_name, msg="Could not find the requested mutation: %s" % json.dumps(row))
if len(to_mutate) == 0:
raise logger.AggrescanError("Mutations table provided but its parsing failed. "
"Most likely all the provided mutations were incorrect "
"(referring to non existing residues, numbering errors, etc.)",
module_name=_name)
mutation = ",".join(to_mutate).strip() + ";"
logger.debug(module_name=_name, msg="Mutation list: %s" % mutation)
return mutation
def check_auto_mut(self, pdb_obj=None):
print(self.auto_mutation)
if len(self.auto_mutation) > 2:
chain_numbering = pdb_obj.getChainIdxResname()
t = json.loads(chain_numbering)
counted = 0
for row in self.auto_mutation[2]:
oi = str(row['idx'])
ch = str(row['chain'])
found = False
try:
for r in t[ch]:
if r['chain'] == ch \
and r['residx'] == oi:
found = True
counted += 1
break
except KeyError:
logger.warning(module_name="Auto_mut", msg="Attempted to exclude a residue that probably"
"doesn't exist. (%s)" % json.dumps(row))
logger.info(module_name="Auto_mut", msg="Available chains: %s" % list(t.keys()))
if not found:
logger.warning(module_name="Auto_mut", msg="Couldn't find the residue number %s in chain %s to exclude from "
"auto mutation" % (oi, ch))
if counted == 0:
logger.critical(msg="Residues to exclude from automated mutations provided but none "
"could be found in the pdb file.",
module_name="Auto_mut")
# Parse it into "mutation syntax" for easier comparisons later on
self.auto_mutation = list(self.auto_mutation)
self.auto_mutation[2] = [i['idx'] + i["chain"] for i in self.auto_mutation[2]]
def cleanup(self):
"""Move output from the temporary directory to work directory before the former is deleted"""
os.chdir(self.tmp_dir)
shutil.move(join(self.tmp_dir, "A3D.csv"), join(self.work_dir, "A3D.csv"))
shutil.move(join(self.tmp_dir, "output.pdb"), join(self.work_dir, "output.pdb"))
shutil.move(join(self.tmp_dir, "folded_stats"), join(self.work_dir, "A3D_summary.json"))
shutil.move(join(self.work_dir, "original_input.pdb"), join(self.work_dir, "input.pdb"))
extensions = [".svg", ".png"]
for ext in extensions:
for f in glob("*%s" % ext):
shutil.move(f, join(self.work_dir, f))
def create_movie(self):
"""
First uses paintit.py to create movie frame png, then run avconv to create a movie
This is a legacy function while it should work it's no longer really relevant or updated/tested
"""
os.chdir(self.tmp_dir)
pymCmd = "pymol -qc %s -- s input.pdb input_output ." % resource_filename("aggrescan", join("data", "paintit.py"))
logger.debug(module_name="pyMol", msg="Pymol ran with: %s" % pymCmd)
try:
out,err = Popen(pymCmd,stdout=PIPE,stderr=PIPE,shell=True).communicate()
if err:
logger.critical(module_name="pyMol",
msg="Pymol encountered an error: %s Movie creation failed." % err.strip("\n"))
return
except OSError as e:
logger.debug(module_name="pyMol", msg="Exception caught: %s" % e)
logger.critical(module_name="pyMol", msg="OSError while launching pymol. Perhaps it's not installed?")
return
self.movie = self.movie.strip()
if self.movie == "mp4":
av_cmd = 'avconv -r 8 -i mov%05d.png -vcodec libx264 -pix_fmt yuv420p -profile:v baseline ' \
'-preset slower -crf 18 -vf "scale=trunc(in_w/2)*2:trunc(in_h/2)*2" clip.mp4'
logger.info(module_name="Movie",msg="Creting movie with %s format" % self.movie)
elif self.movie == "webm":
av_cmd = 'avconv -r 8 -i mov%05d.png -c:v libvpx -c:a libvorbis -pix_fmt yuv420p ' \
'-b:v 2M -crf 5 -vf "scale=trunc(in_w/2)*2:trunc(in_h/2)*2" clip.webm'
logger.info(module_name="Movie",msg="Creting movie with %s format" % self.movie)
else:
logger.warning(module_name="Movie",msg="Wrong movie format specified: %s Using webm instead." % self.movie)
av_cmd = 'avconv -r 8 -i mov%05d.png -c:v libvpx -c:a libvorbis -pix_fmt yuv420p ' \
'-b:v 2M -crf 5 -vf "scale=trunc(in_w/2)*2:trunc(in_h/2)*2" clip.webm'
self.movie = "webm"
try:
logger.debug(module_name="Avconv", msg="Avconv ran with: %s" % av_cmd)
out, err = Popen(av_cmd,stdout=PIPE, stderr=PIPE, shell=True).communicate()
if err:
logger.debug(module_name="Avconv",
msg="Avconv output: %s" % err.strip("\n"))
if self.movie == "mp4":
shutil.move("clip.mp4", join(self.work_dir,"clip.mp4"))
else:
shutil.move("clip.webm", join(self.work_dir,"clip.webm"))
except OSError as e:
logger.critical(module_name="Avconv", msg="OSError while launching avconv. Perhaps it's not installed?")
logger.debug(module_name="Avconv", msg="Exception caught: %s" % e)
except IOError as e2:
logger.critical(module_name="Avconv", msg="Couldn't move the clip to working directory. "
"Movie creation likely failed")
logger.debug(module_name="Avconv", msg="Exception caught: %s" % e2)
finally:
file_list = os.listdir(self.tmp_dir)
for f in file_list:
if ".png" in f:
os.remove(f)
def get_tempdir(self):
return self.tmp_dir