protpardelle / ProteinMPNN /helper_scripts /make_pssm_input_dict.py
Simon Duerr
add proteinmpnn
00aa807
raw
history blame
1.72 kB
import argparse
def main(args):
import json
import numpy as np
with open(args.jsonl_input_path, 'r') as json_file:
json_list = list(json_file)
my_dict = {}
for json_str in json_list:
result = json.loads(json_str)
all_chain_list = [item[-1:] for item in list(result) if item[:9]=='seq_chain']
path_to_PSSM = args.PSSM_input_path+"/"+result['name'] + ".npz"
print(path_to_PSSM)
pssm_input = np.load(path_to_PSSM)
pssm_dict = {}
for chain in all_chain_list:
pssm_dict[chain] = {}
pssm_dict[chain]['pssm_coef'] = pssm_input[chain+'_coef'].tolist() #[L] per position coefficient to trust PSSM; 0.0 - do not use it; 1.0 - just use PSSM only
pssm_dict[chain]['pssm_bias'] = pssm_input[chain+'_bias'].tolist() #[L,21] probability (sums up to 1.0 over alphabet of size 21) from PSSM
pssm_dict[chain]['pssm_log_odds'] = pssm_input[chain+'_odds'].tolist() #[L,21] log_odds ratios coming from PSSM; optional/not needed
my_dict[result['name']] = pssm_dict
#Write output to:
with open(args.output_path, 'w') as f:
f.write(json.dumps(my_dict) + '\n')
if __name__ == "__main__":
argparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
argparser.add_argument("--PSSM_input_path", type=str, help="Path to PSSMs saved as npz files.")
argparser.add_argument("--jsonl_input_path", type=str, help="Path where to load .jsonl dictionary of parsed pdbs.")
argparser.add_argument("--output_path", type=str, help="Path where to save .jsonl dictionary with PSSM bias.")
args = argparser.parse_args()
main(args)