{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "72f2cbf5", "metadata": {}, "outputs": [], "source": [ "import json\n", "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 3, "id": "2ff4120b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/justas/projects/github/ProteinMPNN/outputs/example_pssm_outputs\r\n" ] } ], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": 4, "id": "d0600b01", "metadata": {}, "outputs": [], "source": [ "input_path = \"./parsed_pdbs.jsonl\"\n", "output_path = \"../../inputs/PSSM_inputs/\"\n", "mpnn_alphabet = 'ACDEFGHIKLMNPQRSTVWYX'" ] }, { "cell_type": "code", "execution_count": 5, "id": "71e81c08", "metadata": {}, "outputs": [], "source": [ "def softmax(x, T):\n", " return np.exp(x/T)/np.sum(np.exp(x/T), -1, keepdims=True)" ] }, { "cell_type": "code", "execution_count": 6, "id": "ac59ccb3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "A 143\n", "B 143\n", "C 143\n", "A 227\n", "B 4\n", "C 227\n", "D 4\n", "E 227\n", "F 4\n" ] } ], "source": [ "X_mask = np.concatenate([np.zeros([1,20]), np.ones([1,1])], -1)\n", "\n", "with open(input_path, 'r') as json_file:\n", " json_list = list(json_file)\n", "\n", "my_dict = {}\n", "for json_str in json_list:\n", " result = json.loads(json_str)\n", " all_chain_list = [item[-1:] for item in list(result) if item[:9]=='seq_chain']\n", " output_dict = {}\n", " for chain in all_chain_list:\n", " print(chain, len(result[f'seq_chain_{chain}']))\n", " output_dict[chain+\"_coef\"] = np.ones(len(result[f'seq_chain_{chain}']), dtype=np.float32)\n", " output_dict[chain+\"_odds\"] = np.ones((len(result[f'seq_chain_{chain}']),21))\n", " random_bias = np.random.normal(size=(len(result[f'seq_chain_{chain}']),21)) #input your own PSSM bias\n", " output_dict[chain+\"_bias\"] = softmax(random_bias-X_mask*1e8, 0.5) #making into probability distribution\n", " np.savez(output_path+result['name'], **output_dict)" ] }, { "cell_type": "code", "execution_count": 8, "id": "ea3a4ed6", "metadata": {}, "outputs": [], "source": [ "a = np.load(output_path+'/3HTN.npz')" ] }, { "cell_type": "code", "execution_count": 9, "id": "63dcd23e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, 'PSSM probability')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot(a['A_bias'][10,:]);\n", "plt.xticks(np.arange(21), mpnn_alphabet);\n", "plt.grid(True)\n", "plt.ylabel('PSSM probability')" ] }, { "cell_type": "code", "execution_count": null, "id": "e34f2ce5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 5 }