updated app format
Browse files- app.py +6 -6
- chemdata.py +2 -55
app.py
CHANGED
@@ -25,7 +25,7 @@ from rdkit.Chem.Draw import rdMolDraw2D
|
|
25 |
import pandas as pd
|
26 |
from st_keyup import st_keyup
|
27 |
|
28 |
-
st.set_page_config(layout="
|
29 |
|
30 |
|
31 |
def local_css(file_name):
|
@@ -60,7 +60,7 @@ formatted_text = (
|
|
60 |
"<h1 style='text-align: center;'>"
|
61 |
"<span style='color: red;'>Pharmacokinetics</span>"
|
62 |
"<span style='color: black;'> of </span>"
|
63 |
-
"<span style='color: blue;'>🤫confidential
|
64 |
"<span style='color: black;'> molecules</span>"
|
65 |
"</h1>"
|
66 |
)
|
@@ -82,9 +82,9 @@ The server on which the prediction is computed will never see the molecule in cl
|
|
82 |
Why is this **magic**? Because this is equivalent to computing the prediction on the molecule in clear text, but without sharing the molecule with the server.
|
83 |
Even if organization "B" - or in fact any other party - would try to steal the data, they would only see the encrypted molecular data.
|
84 |
**Only the party that has the private key (organization "A") can decrypt the prediction**. This is possible using a method called "Fully Homomorphic Encryption" (FHE).
|
85 |
-
This special encryption scheme allows to perform computations on encrypted data.
|
86 |
|
87 |
-
We use the open-source library <a href="https://
|
88 |
|
89 |
The code used for the FHE prediction is available in the open-source library
|
90 |
\n
|
@@ -103,7 +103,7 @@ st.divider()
|
|
103 |
|
104 |
st.markdown(
|
105 |
"<p style='text-align: center; color: grey;'>"
|
106 |
-
+ img_to_html("scheme2.png", width="
|
107 |
+ "</p>",
|
108 |
unsafe_allow_html=True,
|
109 |
)
|
@@ -652,7 +652,7 @@ if __name__ == "__main__":
|
|
652 |
st.markdown(
|
653 |
"""
|
654 |
<div style="width: 100%; text-align: center; padding: 10px;">
|
655 |
-
The app was built with <a href="https://
|
656 |
an open-source library by <a href="https://www.zama.ai/" target="_blank">Zama</a>.
|
657 |
</div>
|
658 |
""",
|
|
|
25 |
import pandas as pd
|
26 |
from st_keyup import st_keyup
|
27 |
|
28 |
+
st.set_page_config(layout="wide", page_title="VaultChem")
|
29 |
|
30 |
|
31 |
def local_css(file_name):
|
|
|
60 |
"<h1 style='text-align: center;'>"
|
61 |
"<span style='color: red;'>Pharmacokinetics</span>"
|
62 |
"<span style='color: black;'> of </span>"
|
63 |
+
"<span style='color: blue;'>🤫confidential</span>"
|
64 |
"<span style='color: black;'> molecules</span>"
|
65 |
"</h1>"
|
66 |
)
|
|
|
82 |
Why is this **magic**? Because this is equivalent to computing the prediction on the molecule in clear text, but without sharing the molecule with the server.
|
83 |
Even if organization "B" - or in fact any other party - would try to steal the data, they would only see the encrypted molecular data.
|
84 |
**Only the party that has the private key (organization "A") can decrypt the prediction**. This is possible using a method called "Fully Homomorphic Encryption" (FHE).
|
85 |
+
This special encryption scheme allows to perform computations on encrypted data, to learn more about FHE, click [here](https://fhe.org/resources/).
|
86 |
|
87 |
+
We use the open-source library <a href="https://github.com/zama-ai/concrete-ml" target="_blank">Concrete-ML</a> to develop safe and robust encryption technology.
|
88 |
|
89 |
The code used for the FHE prediction is available in the open-source library
|
90 |
\n
|
|
|
103 |
|
104 |
st.markdown(
|
105 |
"<p style='text-align: center; color: grey;'>"
|
106 |
+
+ img_to_html("scheme2.png", width="65%")
|
107 |
+ "</p>",
|
108 |
unsafe_allow_html=True,
|
109 |
)
|
|
|
652 |
st.markdown(
|
653 |
"""
|
654 |
<div style="width: 100%; text-align: center; padding: 10px;">
|
655 |
+
The app was built with <a href="https://github.com/zama-ai/concrete-ml" target="_blank">Concrete-ML</a>,
|
656 |
an open-source library by <a href="https://www.zama.ai/" target="_blank">Zama</a>.
|
657 |
</div>
|
658 |
""",
|
chemdata.py
CHANGED
@@ -153,60 +153,6 @@ def compute_descriptors_from_smiles_list(SMILES):
|
|
153 |
return np.array(X)
|
154 |
|
155 |
|
156 |
-
class ProcessToxChemData:
|
157 |
-
def __init__(self, bits=256):
|
158 |
-
self.bits = int(bits)
|
159 |
-
if not os.path.exists("data"):
|
160 |
-
os.makedirs("data")
|
161 |
-
self.save_file = "data/" + "save_file_Tox" + str(self.bits) + ".pkl"
|
162 |
-
|
163 |
-
if os.path.exists(self.save_file):
|
164 |
-
with open(self.save_file, "rb") as file:
|
165 |
-
self.adjusted_valid_entries_per_task = pickle.load(file)
|
166 |
-
else:
|
167 |
-
url = "https://github.com/deepchem/deepchem/blob/master/datasets/tox21.csv.gz?raw=true"
|
168 |
-
response = requests.get(url)
|
169 |
-
content = gzip.decompress(response.content)
|
170 |
-
self.df = pd.read_csv(BytesIO(content))
|
171 |
-
self.process()
|
172 |
-
self.save_adjusted_data()
|
173 |
-
|
174 |
-
def process(self):
|
175 |
-
self.adjusted_valid_entries_per_task = {}
|
176 |
-
|
177 |
-
# Iterating through each task column and extracting valid entries
|
178 |
-
for task in self.df.columns[
|
179 |
-
:-2
|
180 |
-
]: # Excluding mol_id and smiles from the iteration
|
181 |
-
valid_entries = self.df.dropna(subset=[task])[["mol_id", "smiles", task]]
|
182 |
-
|
183 |
-
valid_entries["fps"] = valid_entries["smiles"].apply(
|
184 |
-
lambda x: generate_fingerprint(x, radius=2, bits=self.bits)
|
185 |
-
)
|
186 |
-
valid_entries = valid_entries.dropna(subset=["fps"])
|
187 |
-
valid_entries["descriptors"] = valid_entries["smiles"].apply(
|
188 |
-
lambda x: compute_descriptors_from_smiles_list([x])[0]
|
189 |
-
)
|
190 |
-
valid_entries = valid_entries.dropna(subset=["descriptors"])
|
191 |
-
# Shuffle the rows
|
192 |
-
valid_entries = valid_entries.sample(frac=1, random_state=42).reset_index(
|
193 |
-
drop=True
|
194 |
-
)
|
195 |
-
self.adjusted_valid_entries_per_task[task] = valid_entries
|
196 |
-
self.adjusted_valid_entries_per_task[
|
197 |
-
task
|
198 |
-
] = self.adjusted_valid_entries_per_task[task].rename(columns={task: "y"})
|
199 |
-
|
200 |
-
def save_adjusted_data(self):
|
201 |
-
with open(self.save_file, "wb") as file:
|
202 |
-
pickle.dump(self.adjusted_valid_entries_per_task, file)
|
203 |
-
|
204 |
-
def get_X_y(self, task):
|
205 |
-
X = np.float_(np.stack(self.adjusted_valid_entries_per_task[task].fps.values))
|
206 |
-
y = self.adjusted_valid_entries_per_task[task].y.values.astype(int)
|
207 |
-
return X, y
|
208 |
-
|
209 |
-
|
210 |
class ProcessADMEChemData:
|
211 |
def __init__(self, bits=512, radius=2):
|
212 |
self.bits = int(bits)
|
@@ -291,7 +237,8 @@ def load_ADME_data(task, bits=256, radius=2):
|
|
291 |
"""
|
292 |
data = ProcessADMEChemData(bits=bits, radius=radius)
|
293 |
X, y = data.get_X_y(task)
|
294 |
-
|
|
|
295 |
|
296 |
|
297 |
class ProcessGenericChemData:
|
|
|
153 |
return np.array(X)
|
154 |
|
155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
class ProcessADMEChemData:
|
157 |
def __init__(self, bits=512, radius=2):
|
158 |
self.bits = int(bits)
|
|
|
237 |
"""
|
238 |
data = ProcessADMEChemData(bits=bits, radius=radius)
|
239 |
X, y = data.get_X_y(task)
|
240 |
+
SMILES = data.adjusted_valid_entries_per_task[task]["smiles"].values
|
241 |
+
return train_test_split(SMILES,X, y, test_size=0.2, random_state=42)
|
242 |
|
243 |
|
244 |
class ProcessGenericChemData:
|