Spaces:
Configuration error
Configuration error
shaktidharreddy08
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from PIL import Image
|
4 |
+
import subprocess
|
5 |
+
import os
|
6 |
+
import base64
|
7 |
+
import pickle
|
8 |
+
|
9 |
+
# Molecular descriptor calculator
|
10 |
+
def desc_calc():
|
11 |
+
# Performs the descriptor calculation
|
12 |
+
bashCommand = "java -Xms2G -Xmx2G -Djava.awt.headless=true -jar ./PaDEL-Descriptor/PaDEL-Descriptor.jar -removesalt -standardizenitro -fingerprints -descriptortypes ./PaDEL-Descriptor/PubchemFingerprinter.xml -dir ./ -file descriptors_output.csv"
|
13 |
+
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE,shell=True)
|
14 |
+
output, error = process.communicate()
|
15 |
+
os.remove('molecule.smi')
|
16 |
+
|
17 |
+
# File download
|
18 |
+
def filedownload(df):
|
19 |
+
csv = df.to_csv(index=False)
|
20 |
+
b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions
|
21 |
+
href = f'<a href="data:file/csv;base64,{b64}" download="prediction.csv">Download Predictions</a>'
|
22 |
+
return href
|
23 |
+
|
24 |
+
# Model building
|
25 |
+
def build_model(input_data):
|
26 |
+
# Reads in saved regression model
|
27 |
+
load_model = pickle.load(open('acetylcholinesterase_model.pkl', 'rb'))
|
28 |
+
# Apply model to make predictions
|
29 |
+
prediction = load_model.predict(input_data)
|
30 |
+
st.header('**Prediction output**')
|
31 |
+
prediction_output = pd.Series(prediction, name='pIC50')
|
32 |
+
molecule_name = pd.Series(load_data[1], name='molecule_name')
|
33 |
+
df = pd.concat([molecule_name, prediction_output], axis=1)
|
34 |
+
st.write(df)
|
35 |
+
st.markdown(filedownload(df), unsafe_allow_html=True)
|
36 |
+
|
37 |
+
# Logo image
|
38 |
+
image = Image.open('logo.png')
|
39 |
+
|
40 |
+
st.image(image, use_column_width=True)
|
41 |
+
|
42 |
+
# Page title
|
43 |
+
st.markdown("""
|
44 |
+
# Better Than a Placebo - Optum Hackathon 2022
|
45 |
+
|
46 |
+
This app allows you to predict the bioactivity towards inhibting the `Acetylcholinesterase` enzyme. `Acetylcholinesterase` is a drug target for Alzheimer's disease.
|
47 |
+
|
48 |
+
**Credits**
|
49 |
+
- App built in `Python` + `Streamlit` by Better Than a Placebo team - Optum Hackathon 2022
|
50 |
+
- Descriptor calculated using [PaDEL-Descriptor](http://www.yapcwsoft.com/dd/padeldescriptor/) [[Read the Paper]](https://doi.org/10.1002/jcc.21707).
|
51 |
+
---
|
52 |
+
""")
|
53 |
+
|
54 |
+
# Sidebar
|
55 |
+
with st.sidebar.header('1. Upload your CSV data'):
|
56 |
+
uploaded_file = st.sidebar.file_uploader("Upload your input file", type=['txt'])
|
57 |
+
st.sidebar.markdown("""
|
58 |
+
[Example input file](https://raw.githubusercontent.com/dataprofessor/bioactivity-prediction-app/main/example_acetylcholinesterase.txt)
|
59 |
+
""")
|
60 |
+
|
61 |
+
if st.sidebar.button('Predict'):
|
62 |
+
load_data = pd.read_table(uploaded_file, sep=' ', header=None)
|
63 |
+
load_data.to_csv('molecule.smi', sep = '\t', header = False, index = False)
|
64 |
+
|
65 |
+
st.header('**Original input data**')
|
66 |
+
st.write(load_data)
|
67 |
+
|
68 |
+
with st.spinner("Calculating descriptors..."):
|
69 |
+
desc_calc()
|
70 |
+
|
71 |
+
# Read in calculated descriptors and display the dataframe
|
72 |
+
st.header('**Calculated molecular descriptors**')
|
73 |
+
desc = pd.read_csv('descriptors_output.csv')
|
74 |
+
st.write(desc)
|
75 |
+
st.write(desc.shape)
|
76 |
+
|
77 |
+
# Read descriptor list used in previously built model
|
78 |
+
st.header('**Subset of descriptors from previously built models**')
|
79 |
+
Xlist = list(pd.read_csv('descriptor_list.csv').columns)
|
80 |
+
desc_subset = desc[Xlist]
|
81 |
+
st.write(desc_subset)
|
82 |
+
st.write(desc_subset.shape)
|
83 |
+
|
84 |
+
# Apply trained model to make prediction on query compounds
|
85 |
+
build_model(desc_subset)
|
86 |
+
else:
|
87 |
+
st.info('Upload input data in the sidebar to start!')
|