jinysun commited on
Commit
4ddb141
ยท
1 Parent(s): bed9190

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +37 -19
  2. run.py +1 -4
  3. screen.py +121 -0
  4. test.ckpt +3 -0
app.py CHANGED
@@ -4,25 +4,27 @@ import rdkit
4
  import streamlit_ketcher
5
  from streamlit_ketcher import st_ketcher
6
  import run
 
7
 
8
  # Page setup
9
  st.set_page_config(page_title="DeepDAP", page_icon="๐Ÿ”‹", layout="wide")
10
  st.title("๐Ÿ”‹DeepDAP")
11
-
12
  # Connect to the Google Sheet
13
 
14
  url1= r"https://docs.google.com/spreadsheets/d/1AKkZS04VF3osFT36aNHIb4iUbV8D1uNfsldcpHXogj0/gviz/tq?tqx=out:csv&sheet=dap"
15
  df1 = pd.read_csv(url1, dtype=str, encoding='utf-8')
16
  col1, col2 = st.columns(2)
17
  with col1:
18
- text_search = st.text_input("๐Ÿ”Search papers or molecules", value="")
 
19
  m1 = df1["Donor_Name"].str.contains(text_search)
20
  m2 = df1["reference"].str.contains(text_search)
21
  m3 = df1["Acceptor_Name"].str.contains(text_search)
22
  df_search = df1[m1 | m2|m3]
23
  with col2:
24
- st.link_button("๐Ÿ“DATABASE", r"https://docs.google.com/spreadsheets/d/1AKkZS04VF3osFT36aNHIb4iUbV8D1uNfsldcpHXogj0")
25
- st.caption('๐ŸŽ‰If you want to update the database, click the button.')
26
  if text_search:
27
  st.write(df_search)
28
  st.download_button( "โฌ‡๏ธDownload edited files as .csv", df_search.to_csv(), "df_search.csv", use_container_width=True)
@@ -31,24 +33,40 @@ edited_df = st.data_editor(df1, num_rows="dynamic")
31
  st.download_button(
32
  "โฌ‡๏ธ Download edited files as .csv", edited_df.to_csv(), "edited_df.csv", use_container_width=True
33
  )
34
-
35
- option = st.selectbox(
36
- "๐Ÿ‘‡Select the type of active layer...",
37
- ("Donor", "Acceptor"), placeholder="Choose the type of active layer...",index = None
38
  )
39
- if option == 'Acceptor':
40
-
41
- molecule = st.text_input("๐Ÿ‘จโ€๐Ÿ”ฌAcceptor Molecule" )
42
  acceptor= st_ketcher(molecule )
43
- st.markdown(f"๐Ÿ†New SMILES of edited acceptor molecules: {acceptor}")
44
- donor= st.text_input("๐Ÿ“‹ Donor Molecule")
45
- if option =='Donor':
46
- do= st.text_input("๐Ÿ‘จโ€๐Ÿ”ฌDonor Molecule" )
 
 
47
  donor = st_ketcher(do)
48
- st.markdown(f"๐Ÿ†New SMILES of edited donor molecules: {donor}")
49
- acceptor = st.text_input("๐Ÿ“‹ Acceptor Molecule")
 
50
  try:
51
  pce = run.smiles_aas_test( str(acceptor ), str(donor) )
52
- st.markdown(f"โšกPCE: ``{pce}``")
53
  except:
54
- st.markdown(f"โšกPCE: None ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import streamlit_ketcher
5
  from streamlit_ketcher import st_ketcher
6
  import run
7
+ import screen
8
 
9
  # Page setup
10
  st.set_page_config(page_title="DeepDAP", page_icon="๐Ÿ”‹", layout="wide")
11
  st.title("๐Ÿ”‹DeepDAP")
12
+ st.subheader('',divider='rainbow')
13
  # Connect to the Google Sheet
14
 
15
  url1= r"https://docs.google.com/spreadsheets/d/1AKkZS04VF3osFT36aNHIb4iUbV8D1uNfsldcpHXogj0/gviz/tq?tqx=out:csv&sheet=dap"
16
  df1 = pd.read_csv(url1, dtype=str, encoding='utf-8')
17
  col1, col2 = st.columns(2)
18
  with col1:
19
+ st.header("๐Ÿ”**Search papers or molecules**")
20
+ text_search = st.text_input(label="_", value="",label_visibility="hidden" )
21
  m1 = df1["Donor_Name"].str.contains(text_search)
22
  m2 = df1["reference"].str.contains(text_search)
23
  m3 = df1["Acceptor_Name"].str.contains(text_search)
24
  df_search = df1[m1 | m2|m3]
25
  with col2:
26
+ st.link_button(":black[๐Ÿ“**DATABASE**]", r"https://docs.google.com/spreadsheets/d/1AKkZS04VF3osFT36aNHIb4iUbV8D1uNfsldcpHXogj0")
27
+ st.caption(':black[๐Ÿ‘†If you want to update the origin database, click the button.]')
28
  if text_search:
29
  st.write(df_search)
30
  st.download_button( "โฌ‡๏ธDownload edited files as .csv", df_search.to_csv(), "df_search.csv", use_container_width=True)
 
33
  st.download_button(
34
  "โฌ‡๏ธ Download edited files as .csv", edited_df.to_csv(), "edited_df.csv", use_container_width=True
35
  )
36
+ st.subheader("๐Ÿ‘‡ :red[***Select the type of active layer...***]")
37
+ option = st.radio(
38
+ "๐Ÿ‘‡ :red[**Select the type of active layer...**]",
39
+ [":black[**Donor**]", ":black[**Acceptor**]"], label_visibility="hidden"
40
  )
41
+ if option ==":black[**Acceptor**]":
42
+ st.subheader("๐Ÿ‘จโ€๐Ÿ”ฌ**Input the SMILES of Acceptor Molecule**")
43
+ molecule = st.text_input("๐Ÿ‘จโ€๐Ÿ”ฌ**Input the SMILES of Acceptor Molecule**", label_visibility="hidden" )
44
  acceptor= st_ketcher(molecule )
45
+ st.subheader(f"๐Ÿ†**New SMILES of edited acceptor molecules**: {acceptor}")
46
+ st.subheader(":black[**๐ŸงกInput the SMILES of Donor Molecule**]")
47
+ donor= st.text_input(":black[**๐ŸงกInput the SMILES of Donor Molecule**]", label_visibility="hidden")
48
+ if option ==":black[**Donor**]":
49
+ st.subheader("๐Ÿ‘จโ€๐Ÿ”ฌ**Input the SMILES of Donor Molecule**" )
50
+ do= st.text_input("๐Ÿ‘จโ€๐Ÿ”ฌ**Input the SMILES of Donor Molecule**" , label_visibility="hidden")
51
  donor = st_ketcher(do)
52
+ st.subheader(f"๐Ÿ†**New SMILES of edited donor molecules**: {donor}")
53
+ st.subheader(":black[**๐ŸงกInput the SMILES of Acceptor Molecule**]")
54
+ acceptor = st.text_input(":black[**๐ŸงกInput the SMILES of Acceptor Molecule**]", label_visibility="hidden")
55
  try:
56
  pce = run.smiles_aas_test( str(acceptor ), str(donor) )
57
+ st.subheader(f"โšก**PCE**: ``{pce}``")
58
  except:
59
+ st.subheader(f"โšก**PCE**: None ")
60
+ st.subheader(":black[**๐ŸงกBatch screening for high-performance D/A pairs**]")
61
+ uploaded_files = st.file_uploader("Choose a CSV file")
62
+ st.write( "๐ŸŽˆupload a csv file containing ['donor' ] and ['acceptor']")
63
+ if st.button("๐Ÿ“‘PREDICT"):
64
+ if uploaded_files is not None:
65
+ text = st.markdown(":red[Predictions are being made... Please wait...]")
66
+ st.progress(100, text=None)
67
+ x = screen.smiles_aas_test(uploaded_files )
68
+ x = pd.DataFrame(x)
69
+
70
+ st.download_button( "โฌ‡๏ธDownload the predicted files as .csv", x.to_csv(), "predict results.csv", use_container_width=True)
71
+ else:
72
+ st.markdown(":red[Please upload the file first!]")
run.py CHANGED
@@ -4,8 +4,6 @@ import pandas as pd
4
  import torch
5
  from torch.nn import functional as F
6
  from transformers import AutoTokenizer
7
- import sys
8
- sys.path.insert(0, "jinysun/DeepDAP")
9
 
10
  from util.utils import *
11
 
@@ -97,8 +95,7 @@ def smiles_aas_test(smile_acc,smile_don):
97
  return {'Error_message': e}
98
 
99
 
100
- if __name__ == "__main__":
101
- a = smiles_aas_test(smile_acc,smile_don)
102
 
103
 
104
 
 
4
  import torch
5
  from torch.nn import functional as F
6
  from transformers import AutoTokenizer
 
 
7
 
8
  from util.utils import *
9
 
 
95
  return {'Error_message': e}
96
 
97
 
98
+
 
99
 
100
 
101
 
screen.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+
4
+ import torch
5
+ from torch.nn import functional as F
6
+ from transformers import AutoTokenizer
7
+
8
+ from util.utils import *
9
+
10
+ from tqdm import tqdm
11
+ from train import markerModel
12
+
13
+ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
14
+ os.environ["CUDA_VISIBLE_DEVICES"] = '0 '
15
+
16
+ device_count = torch.cuda.device_count()
17
+ device_biomarker = torch.device('cuda' if torch.cuda.is_available() else "cpu")
18
+
19
+ device = torch.device('cpu')
20
+ a_model_name = 'DeepChem/ChemBERTa-10M-MLM'
21
+ d_model_name = 'DeepChem/ChemBERTa-10M-MTR'
22
+
23
+ tokenizer = AutoTokenizer.from_pretrained(a_model_name)
24
+ d_tokenizer = AutoTokenizer.from_pretrained(d_model_name)
25
+
26
+ #--biomarker Model
27
+ ##-- hyper param config file Load --##
28
+ config = load_hparams('config/predict.json')
29
+ config = DictX(config)
30
+ model = markerModel(config.d_model_name, config.p_model_name,
31
+ config.lr, config.dropout, config.layer_features, config.loss_fn, config.layer_limit, config.pretrained['chem'], config.pretrained['prot'])
32
+ #model.load_state_dict(torch.load(r"J:\libray\DeepDAP\DeepDAP\OSC\dap.pt"))
33
+ # model = BiomarkerModel.load_from_checkpoint('./biomarker_bindingdb_train8595_pretopre/3477h3wf/checkpoints/epoch=30-step=7284.ckpt').to(device_biomarker)
34
+ model = markerModel.load_from_checkpoint(config.load_checkpoint,strict=False)
35
+ model.eval()
36
+ model.freeze()
37
+
38
+ if device_biomarker.type == 'cuda':
39
+ model = torch.nn.DataParallel(model)
40
+
41
+ def get_marker(drug_inputs, prot_inputs):
42
+ output_preds = model(drug_inputs, prot_inputs)
43
+
44
+ predict = torch.squeeze( (output_preds)).tolist()
45
+
46
+ # output_preds = torch.relu(output_preds)
47
+ # predict = torch.tanh(output_preds)
48
+ # predict = predict.squeeze(dim=1).tolist()
49
+
50
+ return predict
51
+
52
+
53
+ def marker_prediction(smiles, aas):
54
+ try:
55
+ aas_input = []
56
+ for ass_data in aas:
57
+ aas_input.append(' '.join(list(ass_data)))
58
+
59
+ a_inputs = tokenizer(smiles, padding='max_length', max_length=510, truncation=True, return_tensors="pt")
60
+ # d_inputs = tokenizer(smiles, truncation=True, return_tensors="pt")
61
+ a_input_ids = a_inputs['input_ids'].to(device)
62
+ a_attention_mask = a_inputs['attention_mask'].to(device)
63
+ a_inputs = {'input_ids': a_input_ids, 'attention_mask': a_attention_mask}
64
+
65
+ d_inputs = d_tokenizer(aas_input, padding='max_length', max_length=510, truncation=True, return_tensors="pt")
66
+ # p_inputs = prot_tokenizer(aas_input, truncation=True, return_tensors="pt")
67
+ d_input_ids = d_inputs['input_ids'].to(device)
68
+ d_attention_mask = d_inputs['attention_mask'].to(device)
69
+ d_inputs = {'input_ids': d_input_ids, 'attention_mask': d_attention_mask}
70
+
71
+ output_predict = get_marker(a_inputs, d_inputs)
72
+
73
+ output_list = [{'acceptor': smiles[i], 'donor': aas[i], 'predict': output_predict[i]} for i in range(0,len(aas))]
74
+
75
+ return output_list
76
+
77
+ except Exception as e:
78
+ print(e)
79
+ return {'Error_message': e}
80
+
81
+
82
+ def smiles_aas_test(file):
83
+
84
+ batch_size = 80
85
+ try:
86
+ datas = []
87
+ marker_list = []
88
+ marker_datas = []
89
+
90
+ smiles_aas = pd.read_csv(file)
91
+
92
+ ## -- 1 to 1 pair predict check -- ##
93
+ for data in smiles_aas.values:
94
+ marker_datas.append([data[2 ], data[1]])
95
+ if len(marker_datas) == batch_size:
96
+ marker_list.append(list(marker_datas))
97
+ marker_datas.clear()
98
+
99
+ if len(marker_datas) != 0:
100
+ marker_list.append(list(marker_datas))
101
+ marker_datas.clear()
102
+
103
+ for marker_datas in tqdm(marker_list, total=len(marker_list)):
104
+ smiles_d , smiles_a = zip(*marker_datas)
105
+ output_pred = marker_prediction(list(smiles_d), list(smiles_a) )
106
+ if len(datas) == 0:
107
+ datas = output_pred
108
+ else:
109
+ datas = datas + output_pred
110
+ datas = pd.DataFrame(datas)
111
+ # ## -- Export result data to csv -- ##
112
+ # df = pd.DataFrame(datas)
113
+ # df.to_csv('./results/predictData_nontonon_bindingdb_test.csv', index=None)
114
+
115
+ # print(df)
116
+ return datas
117
+
118
+ except Exception as e:
119
+ print(e)
120
+ return {'Error_message': e}
121
+
test.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1081233b2f0b3c77752a98b3c9e4ae065cb21aae4e3e5d31f8d673a1c2069ded
3
+ size 81596523