Spaces:
Running
Running
Eachan Johnson
commited on
Commit
·
028bbd0
1
Parent(s):
c35d034
Major refactor
Browse files- .gitignore +3 -2
- app.py +480 -364
- example-data/examples.json +122 -0
.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
-
|
2 |
-
/cache/
|
|
|
|
1 |
+
/cache/duvida/models--*
|
2 |
+
/cache/downloads/
|
3 |
+
*.log
|
app.py
CHANGED
@@ -1,11 +1,15 @@
|
|
1 |
"""Gradio demo for schemist."""
|
2 |
|
3 |
from typing import Iterable, List, Optional, Union
|
|
|
4 |
from functools import partial
|
5 |
from io import TextIOWrapper
|
|
|
6 |
import json
|
7 |
import os
|
8 |
-
|
|
|
|
|
9 |
|
10 |
from carabiner import cast, print_err
|
11 |
from carabiner.pd import read_table
|
@@ -22,18 +26,20 @@ from schemist.converting import (
|
|
22 |
)
|
23 |
from schemist.tables import converter
|
24 |
import torch
|
|
|
25 |
|
|
|
26 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
27 |
|
28 |
CACHE = "./cache"
|
29 |
-
MAX_ROWS =
|
30 |
-
BATCH_SIZE=32
|
31 |
HEADER_FILE = os.path.join("sources", "header.md")
|
32 |
with open("repos.json", "r") as f:
|
33 |
MODEL_REPOS = json.load(f)
|
34 |
|
35 |
MODELBOXES = {
|
36 |
-
key: AutoModelBox.from_pretrained(val, cache_dir=CACHE)
|
37 |
for key, val in MODEL_REPOS.items()
|
38 |
}
|
39 |
[mb.to(DEVICE) for mb in MODELBOXES.values()]
|
@@ -45,24 +51,46 @@ EXTRA_METRICS = {
|
|
45 |
"Information sensitivity (approx.)": lambda modelbox, candidates: modelbox.information_sensitivity(candidates=candidates, batch_size=BATCH_SIZE, optimality_approximation=True, approximator="squared_jacobian", cache=CACHE).map(lambda x: {"information sensitivity": torch.log10(x["information sensitivity"])}),
|
46 |
}
|
47 |
|
|
|
|
|
|
|
48 |
def get_dropdown_options(df, _type = str):
|
49 |
if _type == str:
|
50 |
cols = list(df.select_dtypes(exclude=[np.number]))
|
51 |
else:
|
52 |
cols = list(df.select_dtypes([np.number]))
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
|
56 |
-
def load_input_data(file: Union[TextIOWrapper, str]) -> pd.DataFrame:
|
57 |
file = file if isinstance(file, str) else file.name
|
58 |
print_err(f"Loading {file}")
|
59 |
-
df = read_table(file)
|
60 |
print_err(df.head())
|
61 |
-
|
|
|
|
|
|
|
62 |
|
63 |
|
64 |
def _clean_split_input(strings: str) -> List[str]:
|
65 |
-
return [
|
|
|
|
|
|
|
|
|
66 |
|
67 |
|
68 |
def _convert_input(
|
@@ -82,7 +110,7 @@ def _convert_input(
|
|
82 |
def convert_one(
|
83 |
strings: str,
|
84 |
input_representation: str = 'smiles',
|
85 |
-
output_representation: Union[Iterable[str], str] = 'smiles'
|
86 |
):
|
87 |
output_representation = cast(output_representation, to=list)
|
88 |
for rep in output_representation:
|
@@ -168,7 +196,9 @@ def predict_one(
|
|
168 |
strings: str,
|
169 |
input_representation: str = 'smiles',
|
170 |
predict: Union[Iterable[str], str] = 'smiles',
|
171 |
-
extra_metrics: Optional[Union[Iterable[str], str]] = None
|
|
|
|
|
172 |
):
|
173 |
prediction_df = convert_one(
|
174 |
strings=strings,
|
@@ -180,15 +210,26 @@ def predict_one(
|
|
180 |
predict=predict,
|
181 |
extra_metrics=extra_metrics,
|
182 |
)
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
def convert_file(
|
194 |
df: pd.DataFrame,
|
@@ -230,10 +271,12 @@ def predict_file(
|
|
230 |
input_representation: str = 'smiles',
|
231 |
predict: str = 'smiles',
|
232 |
predict2: Optional[str] = None,
|
233 |
-
extra_metrics: Optional[Union[Iterable[str], str]] = None
|
|
|
|
|
234 |
):
|
235 |
predict = cast(predict, to=list)
|
236 |
-
if predict2 is not None:
|
237 |
predict += cast(predict2, to=list)
|
238 |
if extra_metrics is None:
|
239 |
extra_metrics = []
|
@@ -266,25 +309,49 @@ def predict_file(
|
|
266 |
col for col in prediction_df
|
267 |
if col not in main_cols
|
268 |
]
|
269 |
-
|
270 |
['id', 'inchikey']
|
271 |
+ [column]
|
272 |
+ prediction_cols + other_cols
|
273 |
+ ['smiles', "mwt", "clogp"]
|
274 |
]
|
275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
def draw_one(
|
277 |
-
|
278 |
-
|
|
|
279 |
):
|
280 |
-
|
|
|
|
|
|
|
|
|
281 |
gr.Info(message, duration=10)
|
282 |
-
_ids =
|
283 |
-
|
284 |
-
input_representation,
|
285 |
-
["inchikey", "id", "pubchem_name"],
|
286 |
-
)
|
287 |
-
mols = cast(_x2mol(_clean_split_input(strings), input_representation), to=list)
|
288 |
if isinstance(mols, Mol):
|
289 |
mols = [mols]
|
290 |
return Draw.MolsToGridImage(
|
@@ -294,6 +361,7 @@ def draw_one(
|
|
294 |
legends=["\n".join(items) for items in zip(*_ids.values())],
|
295 |
)
|
296 |
|
|
|
297 |
def log10_if_all_positive(df, col):
|
298 |
if np.all(df[col] > 0.):
|
299 |
df[col] = np.log10(df[col])
|
@@ -355,386 +423,434 @@ def download_table(
|
|
355 |
df: pd.DataFrame
|
356 |
) -> str:
|
357 |
df_hash = nm.hash(pd.util.hash_pandas_object(df).values)
|
358 |
-
filename = f"predicted-{df_hash}.csv"
|
|
|
|
|
359 |
df.to_csv(filename, index=False)
|
360 |
return gr.DownloadButton(value=filename, visible=True)
|
361 |
|
362 |
|
363 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
364 |
|
365 |
-
|
366 |
-
header_md = f.read()
|
367 |
-
gr.Markdown(header_md)
|
368 |
|
369 |
-
|
370 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
371 |
label="Input string format",
|
372 |
choices=list(_FROM_FUNCTIONS),
|
373 |
value="smiles",
|
374 |
interactive=True,
|
375 |
-
)
|
376 |
-
|
377 |
-
label="Input",
|
378 |
-
placeholder="Paste your molecule here, one per line",
|
379 |
-
lines=2,
|
380 |
-
interactive=True,
|
381 |
-
submit_btn=True,
|
382 |
-
)
|
383 |
-
output_species_single = gr.CheckboxGroup(
|
384 |
label="Species for prediction",
|
385 |
choices=list(MODEL_REPOS),
|
386 |
value=list(MODEL_REPOS)[:1],
|
387 |
interactive=True,
|
388 |
-
)
|
389 |
-
|
390 |
label="Extra metrics (Doubscore & Information Sensitivity can increase calculation time to a couple of minutes!)",
|
391 |
choices=list(EXTRA_METRICS),
|
392 |
value=list(EXTRA_METRICS)[:2],
|
393 |
interactive=True,
|
394 |
-
)
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
"CC1=C2C=CC=C(C2=C(C3=C1C[C@H]4[C@@H](C(=O)C(=C([C@]4(C3=O)O)O)C(=O)N)N(C)C)O)O",
|
416 |
-
]),
|
417 |
-
"Staphylococcus aureus",
|
418 |
-
list(EXTRA_METRICS)[:2],
|
419 |
-
], # doxorubicin, ampicillin, amoxicillin, meropenem, tetracycline, anhydrotetracycline
|
420 |
-
[
|
421 |
-
'\n'.join([
|
422 |
-
"C1=C(SC(=N1)SC2=NN=C(S2)N)[N+](=O)[O-]",
|
423 |
-
"C1CN(CCC12C3=CC=CC=C3NC(=O)O2)CCC4=CC=C(C=C4)C(F)(F)F",
|
424 |
-
"COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N",
|
425 |
-
"CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N",
|
426 |
-
"C1[C@@H]([C@H]([C@@H]([C@H]([C@@H]1NC(=O)[C@H](CCN)O)O[C@@H]2[C@@H]([C@H]([C@@H]([C@H](O2)CO)O)N)O)O)O[C@@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CN)O)O)O)N",
|
427 |
-
"C1=CN=CC=C1C(=O)NN",
|
428 |
-
]),
|
429 |
-
["Escherichia coli", "Acinetobacter baumannii"],
|
430 |
-
list(EXTRA_METRICS)[:2],
|
431 |
-
], # Halicin, Abaucin, Trimethoprim, Sulfamethoxazole, Amikacin, Isoniazid
|
432 |
-
[
|
433 |
-
'\n'.join([
|
434 |
-
"CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N2CCC[C@@H]2C(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CC4=CNC5=CC=CC=C54)[C@@H](C)O)CO)C)CCN)CCN)CC6=CNC7=CC=CC=C76)CCN)CCN)CCCN)CCN",
|
435 |
-
"C[C@H]1[C@H]([C@@](C[C@@H](O1)O[C@@H]2[C@H]([C@@H]([C@H](O[C@H]2OC3=C4C=C5C=C3OC6=C(C=C(C=C6)[C@H]([C@H](C(=O)N[C@H](C(=O)N[C@H]5C(=O)N[C@@H]7C8=CC(=C(C=C8)O)C9=C(C=C(C=C9O)O)[C@H](NC(=O)[C@H]([C@@H](C1=CC(=C(O4)C=C1)Cl)O)NC7=O)C(=O)O)CC(=O)N)NC(=O)[C@@H](CC(C)C)NC)O)Cl)CO)O)O)(C)N)O",
|
436 |
-
"CN1[C@H](C(=O)NCC2=C(C=CC=C2SC3=C(CN[C@H](C(=O)N[C@H](C1=O)CCCCN)CCCN)C=CC=N3)C4=CC=C(C=C4)C(=O)O)CC5=CNC6=CC=CC=C65",
|
437 |
-
"C[C@@]1(CO[C@@H]([C@@H]([C@H]1NC)O)O[C@H]2[C@@H](C[C@@H]([C@H]([C@@H]2O)O[C@@H]3[C@@H](CC=C(O3)CNCCO)N)N)NC(=O)[C@H](CCN)O)O",
|
438 |
-
"CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC",
|
439 |
-
"C[C@H]1/C=C/C=C(\C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)[C@](O4)(O/C=C/[C@@H]([C@H]([C@H]([C@@H]([C@@H]([C@@H]([C@H]1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)/C=N/N5CCN(CC5)C)/C",
|
440 |
-
]),
|
441 |
-
"Acinetobacter baumannii",
|
442 |
-
list(EXTRA_METRICS)[:2],
|
443 |
-
], # murepavadin, vancomycin, zosurabalpin, plazomicin, Gentamicin, rifampicin
|
444 |
-
[
|
445 |
-
'\n'.join([
|
446 |
-
"CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)CC4)N=C3",
|
447 |
-
"CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)[C@@H](C4)N)N=C3",
|
448 |
-
"CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)[C@H](CC4)[NH3+])N=C3.[Cl-]",
|
449 |
-
"C1=C(C(=O)NC(=O)N1)F",
|
450 |
-
"CCCCCCNC(=O)N1C=C(C(=O)NC1=O)F",
|
451 |
-
"C[C@@H]1OC[C@@H]2[C@@H](O1)[C@@H]([C@H]([C@@H](O2)O[C@H]3[C@H]4COC(=O)[C@@H]4[C@@H](C5=CC6=C(C=C35)OCO6)C7=CC(=C(C(=C7)OC)O)OC)O)O",
|
452 |
-
]),
|
453 |
-
"Escherichia coli",
|
454 |
-
list(EXTRA_METRICS)[:2],
|
455 |
-
], # Debio1452, Debio-1452-NH3, Fabimycin, 5-FU, Carmofur, Etoposide
|
456 |
-
[
|
457 |
-
'\n'.join([
|
458 |
-
"COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N",
|
459 |
-
"CC(C)C1=CC=C(C=C1)CN2C=CC3=C2C=CC4=C3C(=NC(=N4)NC5CC5)N",
|
460 |
-
"C1=CC(=CC=C1CCC2=CNC3=C2C(=O)NC(=N3)N)C(=O)N[C@@H](CCC(=O)O)C(=O)O",
|
461 |
-
"CC1=C(C2=C(C=C1)N=C(NC2=O)N)SC3=CC=NC=C3",
|
462 |
-
"CN(CC1=CN=C2C(=N1)C(=NC(=N2)N)N)C3=CC=C(C=C3)C(=O)N[C@@H](CCC(=O)O)C(=O)O",
|
463 |
-
"CC1=NC2=C(C=C(C=C2)CN(C)C3=CC=C(S3)C(=O)N[C@@H](CCC(=O)O)C(=O)O)C(=O)N1",
|
464 |
-
]),
|
465 |
-
"Klebsiella pneumoniae",
|
466 |
-
list(EXTRA_METRICS)[:2],
|
467 |
-
], # Trimethoprim, SCH79797, Pemetrexed, Nolatrexed, Methotrexate, Raltitrexed
|
468 |
-
[
|
469 |
-
'\n'.join([
|
470 |
-
"C[C@H]([C@@H](C(=O)NO)NC(=O)C1=CC=C(C=C1)C#CC2=CC=C(C=C2)CN3CCOCC3)O",
|
471 |
-
"CC(C)C1=CC=C(C=C1)CN2C=CC3=C2C=CC4=C3C(=NC(=N4)NC5CC5)N",
|
472 |
-
"C1=CC=C(C=C1)CNC2=NC(=NC3=CC=CC=C32)NCC4=CC=CC=C4",
|
473 |
-
"CC(C)(C)C1=CC=C(C=C1)C(=O)NC(=S)NC2=CC=C(C=C2)NC(=O)CCCCN(C)C",
|
474 |
-
"CCC1=C(C(=NC(=N1)N)N)C2=CC=C(C=C2)Cl",
|
475 |
-
"C1=CC(=CC=C1C(=O)N[C@@H](CCC(=O)O)C(=O)O)NCC2=CN=C3C(=N2)C(=NC(=N3)N)N",
|
476 |
-
]),
|
477 |
-
"Klebsiella pneumoniae",
|
478 |
-
list(EXTRA_METRICS)[:2],
|
479 |
-
], # CHIR-090, SCH79797, DBeQ, Tenovin-6, Pyrimethamine, Aminopterin
|
480 |
-
|
481 |
-
],
|
482 |
-
example_labels=[
|
483 |
-
"_Y. pestis_ (plague) vs Ciprofloxacin, Ceftriaxone, Cefiderocol, Linezolid, Gepotidacin",
|
484 |
-
"_S. aureus_ vs Doxorubicin, Ampicillin, Amoxicillin, Meropenem, Tetracycline, Anhydrotetracycline",
|
485 |
-
"_E. coli_ and _A. baumannii_ vs Halicin, Abaucin, Trimethoprim, Sulfamethoxazole, Amikacin, Isoniazid",
|
486 |
-
"_A. baumannii_ vs Murepavadin, Vancomycin, Zosurabalpin, Plazomicin, Gentamicin, Rifampicin",
|
487 |
-
"_E. coli_ vs Debio-1452, Debio-1452-NH3, Fabimycin, 5-FU, Carmofur, Etoposide",
|
488 |
-
"_K. pneumoniae_ vs Trimethoprim, Pemetrexed, Nolatrexed, Methotrexate, Raltitrexed",
|
489 |
-
"_K. pneumoniae_ vs CHIR-090, SCH79797, DBeQ, Tenovin-6, Pyrimethamine, Aminopterin"
|
490 |
-
],
|
491 |
-
inputs=[input_line, output_species_single, extra_metric],
|
492 |
-
cache_mode="eager",
|
493 |
-
)
|
494 |
-
download_single = gr.DownloadButton(
|
495 |
-
label="Download predictions",
|
496 |
-
visible=False,
|
497 |
-
)
|
498 |
-
# with gr.Row():
|
499 |
-
output_line = gr.DataFrame(
|
500 |
-
label="Predictions",
|
501 |
-
interactive=False,
|
502 |
-
visible=False,
|
503 |
-
)
|
504 |
-
drawing = gr.Image(label="Chemical structures")
|
505 |
-
|
506 |
-
gr.on(
|
507 |
-
[
|
508 |
-
input_line.submit,
|
509 |
-
],
|
510 |
-
fn=predict_one,
|
511 |
-
inputs=[
|
512 |
-
input_line,
|
513 |
-
input_format_single,
|
514 |
-
output_species_single,
|
515 |
-
extra_metric,
|
516 |
-
],
|
517 |
-
outputs={
|
518 |
-
output_line,
|
519 |
-
}
|
520 |
-
).then(
|
521 |
-
draw_one,
|
522 |
-
inputs=[
|
523 |
-
input_line,
|
524 |
-
input_format_single,
|
525 |
-
],
|
526 |
-
outputs=drawing,
|
527 |
-
).then(
|
528 |
-
download_table,
|
529 |
-
inputs=output_line,
|
530 |
-
outputs=download_single
|
531 |
-
)
|
532 |
|
533 |
-
|
534 |
-
|
535 |
label="Upload a table of chemical compounds here",
|
536 |
file_types=[".xlsx", ".csv", ".tsv", ".txt"],
|
537 |
-
)
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
gr.Dropdown(
|
554 |
label="Species 1 for prediction",
|
555 |
choices=list(MODEL_REPOS),
|
556 |
value=list(MODEL_REPOS)[0],
|
557 |
interactive=True,
|
|
|
558 |
),
|
559 |
gr.Dropdown(
|
560 |
label="Species 2 for prediction",
|
561 |
choices=list(MODEL_REPOS),
|
562 |
value=None,
|
563 |
interactive=True,
|
|
|
564 |
),
|
565 |
-
]
|
566 |
-
|
567 |
label="Extra metrics (Information Sensitivity can increase calculation time)",
|
568 |
choices=list(EXTRA_METRICS),
|
569 |
value=list(EXTRA_METRICS)[:2],
|
570 |
interactive=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
571 |
)
|
572 |
-
|
573 |
-
go_button2 = gr.Button(
|
574 |
-
value="Predict!",
|
575 |
-
)
|
576 |
|
577 |
-
|
578 |
-
|
579 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
580 |
)
|
581 |
-
|
582 |
-
|
583 |
-
|
|
|
584 |
visible=False,
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
616 |
)
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
623 |
)
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
630 |
-
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
"
|
635 |
-
"Escherichia coli: Doubtscore",
|
636 |
-
list(EXTRA_METRICS)[:3],
|
637 |
],
|
638 |
-
[
|
639 |
-
"
|
640 |
-
"SMILES",
|
641 |
-
"Acinetobacter baumannii",
|
642 |
-
"Mean",
|
643 |
-
"Acinetobacter baumannii: Doubtscore",
|
644 |
-
list(EXTRA_METRICS)[:3],
|
645 |
],
|
646 |
-
|
647 |
-
|
648 |
-
"
|
649 |
-
"
|
650 |
-
"
|
651 |
-
"
|
652 |
-
|
653 |
],
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
inputs=[input_file, input_column, output_species[0], observed_col, color_col, extra_metric_file],
|
661 |
-
cache_mode="eager",
|
662 |
-
)
|
663 |
-
with gr.Row():
|
664 |
-
pred_vs_observed = gr.ScatterPlot(
|
665 |
-
label="Prediction vs observed",
|
666 |
-
x_title="Predicted MIC (µM)",
|
667 |
-
y_title="Observed",
|
668 |
-
visible=False,
|
669 |
-
height=600,
|
670 |
)
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
675 |
)
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
outputs={
|
699 |
-
input_data,
|
700 |
-
}
|
701 |
-
).then(
|
702 |
-
download_table,
|
703 |
-
inputs=input_data,
|
704 |
-
outputs=download
|
705 |
-
).then(
|
706 |
-
lambda: gr.Button(visible=True),
|
707 |
-
outputs=[plot_button]
|
708 |
-
)
|
709 |
-
|
710 |
-
for dropdown in [observed_col, color_col, any_color_col, any_x_col, any_y_col]:
|
711 |
-
go2_click_event.then(
|
712 |
-
partial(get_dropdown_options, _type="number"),
|
713 |
-
inputs=[input_data],
|
714 |
-
outputs=[dropdown],
|
715 |
)
|
716 |
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
],
|
734 |
-
outputs=[plot_any_vs_any],
|
735 |
-
)
|
736 |
-
|
737 |
-
if __name__ == "__main__":
|
738 |
-
demo.queue()
|
739 |
-
demo.launch(share=True)
|
740 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""Gradio demo for schemist."""
|
2 |
|
3 |
from typing import Iterable, List, Optional, Union
|
4 |
+
import csv
|
5 |
from functools import partial
|
6 |
from io import TextIOWrapper
|
7 |
+
import itertools
|
8 |
import json
|
9 |
import os
|
10 |
+
import sys
|
11 |
+
|
12 |
+
csv.field_size_limit(sys.maxsize)
|
13 |
|
14 |
from carabiner import cast, print_err
|
15 |
from carabiner.pd import read_table
|
|
|
26 |
)
|
27 |
from schemist.tables import converter
|
28 |
import torch
|
29 |
+
from duvida.stateless.config import config
|
30 |
|
31 |
+
THEME = 'd8ahazard/material_design_rd'
|
32 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
33 |
|
34 |
CACHE = "./cache"
|
35 |
+
MAX_ROWS = 500
|
36 |
+
BATCH_SIZE = 32
|
37 |
HEADER_FILE = os.path.join("sources", "header.md")
|
38 |
with open("repos.json", "r") as f:
|
39 |
MODEL_REPOS = json.load(f)
|
40 |
|
41 |
MODELBOXES = {
|
42 |
+
key: AutoModelBox.from_pretrained(val, cache_dir=os.path.join(CACHE, "duvida"))
|
43 |
for key, val in MODEL_REPOS.items()
|
44 |
}
|
45 |
[mb.to(DEVICE) for mb in MODELBOXES.values()]
|
|
|
51 |
"Information sensitivity (approx.)": lambda modelbox, candidates: modelbox.information_sensitivity(candidates=candidates, batch_size=BATCH_SIZE, optimality_approximation=True, approximator="squared_jacobian", cache=CACHE).map(lambda x: {"information sensitivity": torch.log10(x["information sensitivity"])}),
|
52 |
}
|
53 |
|
54 |
+
with open(os.path.join("example-data", "examples.json"), "r") as f:
|
55 |
+
EXAMPLES = json.load(f)
|
56 |
+
|
57 |
def get_dropdown_options(df, _type = str):
|
58 |
if _type == str:
|
59 |
cols = list(df.select_dtypes(exclude=[np.number]))
|
60 |
else:
|
61 |
cols = list(df.select_dtypes([np.number]))
|
62 |
+
non_none = [col for col in cols if col is not None]
|
63 |
+
if len(cols) > 0:
|
64 |
+
default_value = non_none[0]
|
65 |
+
else:
|
66 |
+
default_value = ""
|
67 |
+
print_err(f"Dropdown default value is {default_value}")
|
68 |
+
return gr.Dropdown(
|
69 |
+
choices=cols,
|
70 |
+
interactive=True,
|
71 |
+
value=default_value,
|
72 |
+
visible=True,
|
73 |
+
allow_custom_value=True,
|
74 |
+
)
|
75 |
|
76 |
|
77 |
+
def load_input_data(file: Union[TextIOWrapper, str], return_pd: bool = False) -> pd.DataFrame:
|
78 |
file = file if isinstance(file, str) else file.name
|
79 |
print_err(f"Loading {file}")
|
80 |
+
df = read_table(file, nrows=MAX_ROWS)
|
81 |
print_err(df.head())
|
82 |
+
if return_pd:
|
83 |
+
return (df, gr.Dataframe(value=df, visible=True)), get_dropdown_options(df, str)
|
84 |
+
else:
|
85 |
+
return gr.Dataframe(value=df, visible=True), get_dropdown_options(df, str)
|
86 |
|
87 |
|
88 |
def _clean_split_input(strings: str) -> List[str]:
|
89 |
+
return [
|
90 |
+
s2.split(":")[-1].strip()
|
91 |
+
for s in strings.split("\n")
|
92 |
+
for s2 in s.split(",")
|
93 |
+
]
|
94 |
|
95 |
|
96 |
def _convert_input(
|
|
|
110 |
def convert_one(
|
111 |
strings: str,
|
112 |
input_representation: str = 'smiles',
|
113 |
+
output_representation: Union[Iterable[str], str] = 'smiles',
|
114 |
):
|
115 |
output_representation = cast(output_representation, to=list)
|
116 |
for rep in output_representation:
|
|
|
196 |
strings: str,
|
197 |
input_representation: str = 'smiles',
|
198 |
predict: Union[Iterable[str], str] = 'smiles',
|
199 |
+
extra_metrics: Optional[Union[Iterable[str], str]] = None,
|
200 |
+
return_pd: bool = False
|
201 |
+
# progress = gr.Progress(track_tqdm=True)
|
202 |
):
|
203 |
prediction_df = convert_one(
|
204 |
strings=strings,
|
|
|
210 |
predict=predict,
|
211 |
extra_metrics=extra_metrics,
|
212 |
)
|
213 |
+
df = prediction_df[
|
214 |
+
['id', 'pubchem_name', 'pubchem_id']
|
215 |
+
+ prediction_cols
|
216 |
+
+ ['smiles', 'inchikey', "mwt", "clogp"]
|
217 |
+
]
|
218 |
+
if return_pd:
|
219 |
+
return (
|
220 |
+
df,
|
221 |
+
gr.DataFrame(
|
222 |
+
df,
|
223 |
+
pinned_columns=3,
|
224 |
+
visible=True,
|
225 |
+
)
|
226 |
+
)
|
227 |
+
else:
|
228 |
+
return gr.DataFrame(
|
229 |
+
df,
|
230 |
+
pinned_columns=3,
|
231 |
+
visible=True,
|
232 |
+
)
|
233 |
|
234 |
def convert_file(
|
235 |
df: pd.DataFrame,
|
|
|
271 |
input_representation: str = 'smiles',
|
272 |
predict: str = 'smiles',
|
273 |
predict2: Optional[str] = None,
|
274 |
+
extra_metrics: Optional[Union[Iterable[str], str]] = None,
|
275 |
+
return_pd: bool = False
|
276 |
+
# progress = gr.Progress(track_tqdm=True)
|
277 |
):
|
278 |
predict = cast(predict, to=list)
|
279 |
+
if predict2 is not None and predict2 in MODELBOXES:
|
280 |
predict += cast(predict2, to=list)
|
281 |
if extra_metrics is None:
|
282 |
extra_metrics = []
|
|
|
309 |
col for col in prediction_df
|
310 |
if col not in main_cols
|
311 |
]
|
312 |
+
prediction_df = prediction_df[
|
313 |
['id', 'inchikey']
|
314 |
+ [column]
|
315 |
+ prediction_cols + other_cols
|
316 |
+ ['smiles', "mwt", "clogp"]
|
317 |
]
|
318 |
|
319 |
+
if return_pd:
|
320 |
+
return (
|
321 |
+
prediction_df,
|
322 |
+
gr.Dataframe(
|
323 |
+
label="Predictions",
|
324 |
+
value=prediction_df,
|
325 |
+
pinned_columns=3,
|
326 |
+
visible=True,
|
327 |
+
wrap=True,
|
328 |
+
column_widths=[75] * prediction_df.shape[1],
|
329 |
+
),
|
330 |
+
)
|
331 |
+
else:
|
332 |
+
return gr.Dataframe(
|
333 |
+
label="Predictions",
|
334 |
+
value=prediction_df,
|
335 |
+
pinned_columns=3,
|
336 |
+
visible=True,
|
337 |
+
wrap=True,
|
338 |
+
column_widths=[125] * prediction_df.shape[1],
|
339 |
+
)
|
340 |
+
|
341 |
+
|
342 |
def draw_one(
|
343 |
+
df,
|
344 |
+
smiles_col: str = "smiles",
|
345 |
+
legends: Optional[Union[str, Iterable[str]]] = None
|
346 |
):
|
347 |
+
if legends is None:
|
348 |
+
legends = ["inchikey", "id", "pubchem_name"]
|
349 |
+
else:
|
350 |
+
legends = []
|
351 |
+
message = f"Drawing {df.shape[0]} molecules..."
|
352 |
gr.Info(message, duration=10)
|
353 |
+
_ids = {col: df[col].tolist() for col in legends}
|
354 |
+
mols = cast(_x2mol(df[smiles_col], "smiles"), to=list)
|
|
|
|
|
|
|
|
|
355 |
if isinstance(mols, Mol):
|
356 |
mols = [mols]
|
357 |
return Draw.MolsToGridImage(
|
|
|
361 |
legends=["\n".join(items) for items in zip(*_ids.values())],
|
362 |
)
|
363 |
|
364 |
+
|
365 |
def log10_if_all_positive(df, col):
|
366 |
if np.all(df[col] > 0.):
|
367 |
df[col] = np.log10(df[col])
|
|
|
423 |
df: pd.DataFrame
|
424 |
) -> str:
|
425 |
df_hash = nm.hash(pd.util.hash_pandas_object(df).values)
|
426 |
+
filename = os.path.join(CACHE, "downloads", f"predicted-{df_hash}.csv")
|
427 |
+
if not os.path.exists(os.path.dirname(filename)):
|
428 |
+
os.makedirs(os.path.dirname(filename))
|
429 |
df.to_csv(filename, index=False)
|
430 |
return gr.DownloadButton(value=filename, visible=True)
|
431 |
|
432 |
|
433 |
+
def _predict_then_draw_then_download(
|
434 |
+
strings: str,
|
435 |
+
input_representation: str = 'smiles',
|
436 |
+
predict: Union[Iterable[str], str] = 'smiles',
|
437 |
+
extra_metrics: Optional[Union[Iterable[str], str]] = None,
|
438 |
+
smiles_col: str = "smiles",
|
439 |
+
legends: Optional[Union[str, Iterable[str]]] = None
|
440 |
+
):
|
441 |
+
df, gr_df = predict_one(
|
442 |
+
strings=strings,
|
443 |
+
input_representation=input_representation,
|
444 |
+
predict=predict,
|
445 |
+
extra_metrics=extra_metrics,
|
446 |
+
return_pd=True,
|
447 |
+
)
|
448 |
+
img = draw_one(
|
449 |
+
df,
|
450 |
+
smiles_col="smiles",
|
451 |
+
)
|
452 |
+
return gr_df, img, download_table(df)
|
453 |
+
|
454 |
+
|
455 |
+
def _load_then_predict_then_download_then_reveal_plot(
|
456 |
+
file: str,
|
457 |
+
column: str = 'smiles',
|
458 |
+
input_representation: str = 'smiles',
|
459 |
+
predict: str = 'smiles',
|
460 |
+
predict2: Optional[str] = "",
|
461 |
+
extra_metrics: Optional[Union[Iterable[str], str]] = None
|
462 |
+
):
|
463 |
+
(df, df_gr), col_opts = load_input_data(
|
464 |
+
file,
|
465 |
+
return_pd=True,
|
466 |
+
)
|
467 |
+
df, df_gr = predict_file(
|
468 |
+
df,
|
469 |
+
column=column,
|
470 |
+
input_representation=input_representation,
|
471 |
+
predict=predict,
|
472 |
+
predict2=None if predict2 == "" else predict2,
|
473 |
+
extra_metrics=extra_metrics,
|
474 |
+
return_pd=True,
|
475 |
+
)
|
476 |
+
print_err(df.head())
|
477 |
+
# plot_dropdown = get_dropdown_options(df, _type="number")
|
478 |
+
return (
|
479 |
+
df_gr,
|
480 |
+
download_table(df),
|
481 |
+
)
|
482 |
+
|
483 |
|
484 |
+
def _initial_setup():
|
|
|
|
|
485 |
|
486 |
+
"""Set up blocks.
|
487 |
+
|
488 |
+
"""
|
489 |
+
print_err(f"Duvida config is {config}")
|
490 |
+
print_err(f"Default torch device is {DEVICE}")
|
491 |
+
|
492 |
+
line_inputs = {
|
493 |
+
"format": gr.Dropdown(
|
494 |
label="Input string format",
|
495 |
choices=list(_FROM_FUNCTIONS),
|
496 |
value="smiles",
|
497 |
interactive=True,
|
498 |
+
),
|
499 |
+
"species": gr.CheckboxGroup(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
500 |
label="Species for prediction",
|
501 |
choices=list(MODEL_REPOS),
|
502 |
value=list(MODEL_REPOS)[:1],
|
503 |
interactive=True,
|
504 |
+
),
|
505 |
+
"extras": gr.CheckboxGroup(
|
506 |
label="Extra metrics (Doubscore & Information Sensitivity can increase calculation time to a couple of minutes!)",
|
507 |
choices=list(EXTRA_METRICS),
|
508 |
value=list(EXTRA_METRICS)[:2],
|
509 |
interactive=True,
|
510 |
+
),
|
511 |
+
"strings": gr.Textbox(
|
512 |
+
label="Input",
|
513 |
+
placeholder="Paste your molecule here, one per line.",
|
514 |
+
lines=2,
|
515 |
+
interactive=True,
|
516 |
+
submit_btn=True,
|
517 |
+
),
|
518 |
+
}
|
519 |
+
output_line = gr.DataFrame(
|
520 |
+
label="Predictions (scroll left and right)",
|
521 |
+
interactive=False,
|
522 |
+
max_chars=75,
|
523 |
+
pinned_columns=3,
|
524 |
+
visible=True,
|
525 |
+
)
|
526 |
+
download_single = gr.DownloadButton(
|
527 |
+
label="Download predictions",
|
528 |
+
visible=False,
|
529 |
+
)
|
530 |
+
drawing = gr.Image(label="Chemical structures")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
|
532 |
+
file_inputs = {
|
533 |
+
"file": gr.File(
|
534 |
label="Upload a table of chemical compounds here",
|
535 |
file_types=[".xlsx", ".csv", ".tsv", ".txt"],
|
536 |
+
),
|
537 |
+
"column": gr.Dropdown(
|
538 |
+
label="Input column name",
|
539 |
+
choices=[],
|
540 |
+
allow_custom_value=True,
|
541 |
+
visible=True,
|
542 |
+
interactive=True,
|
543 |
+
),
|
544 |
+
"format": gr.Dropdown(
|
545 |
+
label="Input string format",
|
546 |
+
choices=list(_FROM_FUNCTIONS),
|
547 |
+
value="smiles",
|
548 |
+
interactive=True,
|
549 |
+
visible=True,
|
550 |
+
),
|
551 |
+
"species": [
|
552 |
gr.Dropdown(
|
553 |
label="Species 1 for prediction",
|
554 |
choices=list(MODEL_REPOS),
|
555 |
value=list(MODEL_REPOS)[0],
|
556 |
interactive=True,
|
557 |
+
allow_custom_value=True,
|
558 |
),
|
559 |
gr.Dropdown(
|
560 |
label="Species 2 for prediction",
|
561 |
choices=list(MODEL_REPOS),
|
562 |
value=None,
|
563 |
interactive=True,
|
564 |
+
allow_custom_value=True,
|
565 |
),
|
566 |
+
],
|
567 |
+
"extras": gr.CheckboxGroup(
|
568 |
label="Extra metrics (Information Sensitivity can increase calculation time)",
|
569 |
choices=list(EXTRA_METRICS),
|
570 |
value=list(EXTRA_METRICS)[:2],
|
571 |
interactive=True,
|
572 |
+
),
|
573 |
+
}
|
574 |
+
|
575 |
+
input_dataframe = gr.Dataframe(
|
576 |
+
label="Input data",
|
577 |
+
max_height=500,
|
578 |
+
visible=True,
|
579 |
+
interactive=False,
|
580 |
+
show_fullscreen_button=True,
|
581 |
+
show_search="filter",
|
582 |
+
max_chars=45,
|
583 |
+
)
|
584 |
+
download = gr.DownloadButton(
|
585 |
+
label="Download predictions",
|
586 |
+
visible=False,
|
587 |
+
)
|
588 |
+
plot_button = gr.Button(
|
589 |
+
value="Plot!",
|
590 |
+
visible=False,
|
591 |
+
)
|
592 |
+
|
593 |
+
left_plot_inputs = {
|
594 |
+
"observed": gr.Dropdown(
|
595 |
+
label="Observed column (y-axis) for left plot",
|
596 |
+
choices=[],
|
597 |
+
value=None,
|
598 |
+
interactive=True,
|
599 |
+
visible=True,
|
600 |
+
allow_custom_value=True,
|
601 |
+
),
|
602 |
+
"color": gr.Dropdown(
|
603 |
+
label="Color for left plot",
|
604 |
+
choices=[],
|
605 |
+
value=None,
|
606 |
+
interactive=True,
|
607 |
+
visible=True,
|
608 |
+
allow_custom_value=True,
|
609 |
)
|
610 |
+
}
|
|
|
|
|
|
|
611 |
|
612 |
+
right_plot_inputs = {
|
613 |
+
"x": gr.Dropdown(
|
614 |
+
label="x-axis for right plot",
|
615 |
+
choices=[],
|
616 |
+
value=None,
|
617 |
+
interactive=True,
|
618 |
+
visible=True,
|
619 |
+
allow_custom_value=True,
|
620 |
+
),
|
621 |
+
"y": gr.Dropdown(
|
622 |
+
label="y-axis for right plot",
|
623 |
+
choices=[],
|
624 |
+
value=None,
|
625 |
+
interactive=True,
|
626 |
+
visible=True,
|
627 |
+
allow_custom_value=True,
|
628 |
+
),
|
629 |
+
"color": gr.Dropdown(
|
630 |
+
label="Color for right plot",
|
631 |
+
choices=[],
|
632 |
+
value=None,
|
633 |
+
interactive=True,
|
634 |
+
visible=True,
|
635 |
+
allow_custom_value=True,
|
636 |
)
|
637 |
+
}
|
638 |
+
plots = {
|
639 |
+
"left": gr.ScatterPlot(
|
640 |
+
height=500,
|
641 |
visible=False,
|
642 |
+
),
|
643 |
+
"right": gr.ScatterPlot(
|
644 |
+
height=500,
|
645 |
+
visible=False,
|
646 |
+
),
|
647 |
+
}
|
648 |
+
|
649 |
+
return (
|
650 |
+
line_inputs,
|
651 |
+
output_line,
|
652 |
+
download_single,
|
653 |
+
drawing,
|
654 |
+
file_inputs,
|
655 |
+
input_dataframe,
|
656 |
+
download,
|
657 |
+
plot_button,
|
658 |
+
left_plot_inputs,
|
659 |
+
right_plot_inputs,
|
660 |
+
plots,
|
661 |
+
)
|
662 |
+
|
663 |
+
if __name__ == "__main__":
|
664 |
+
(
|
665 |
+
line_inputs,
|
666 |
+
output_line,
|
667 |
+
download_single,
|
668 |
+
drawing,
|
669 |
+
file_inputs,
|
670 |
+
input_dataframe,
|
671 |
+
download,
|
672 |
+
plot_button,
|
673 |
+
left_plot_inputs,
|
674 |
+
right_plot_inputs,
|
675 |
+
plots,
|
676 |
+
) = _initial_setup()
|
677 |
+
with gr.Blocks(theme=THEME) as demo:
|
678 |
+
with open(HEADER_FILE, 'r') as f:
|
679 |
+
header_md = f.read()
|
680 |
+
gr.Markdown(header_md)
|
681 |
+
|
682 |
+
with gr.Tab(label="Paste one per line"):
|
683 |
+
examples = gr.Examples(
|
684 |
+
examples=[
|
685 |
+
[
|
686 |
+
"\n".join(eg["strings"]),
|
687 |
+
"smiles",
|
688 |
+
eg["species"],
|
689 |
+
list(EXTRA_METRICS)[:2],
|
690 |
+
]
|
691 |
+
for eg in EXAMPLES["line input examples"]
|
692 |
+
],
|
693 |
+
example_labels=[
|
694 |
+
eg["label"] for eg in EXAMPLES["line input examples"]
|
695 |
+
],
|
696 |
+
inputs=[
|
697 |
+
line_inputs["strings"],
|
698 |
+
line_inputs["format"],
|
699 |
+
line_inputs["species"],
|
700 |
+
line_inputs["extras"],
|
701 |
+
],
|
702 |
+
fn=_predict_then_draw_then_download,
|
703 |
+
outputs=[
|
704 |
+
output_line,
|
705 |
+
drawing,
|
706 |
+
download_single,
|
707 |
+
],
|
708 |
+
cache_examples=True,
|
709 |
+
cache_mode="lazy",
|
710 |
)
|
711 |
+
|
712 |
+
for val in line_inputs.values():
|
713 |
+
val.render()
|
714 |
+
# with gr.Row():
|
715 |
+
output_line.render()
|
716 |
+
download_single.render()
|
717 |
+
drawing.render()
|
718 |
+
line_inputs["strings"].submit(
|
719 |
+
fn=_predict_then_draw_then_download,
|
720 |
+
inputs=[
|
721 |
+
line_inputs["strings"],
|
722 |
+
line_inputs["format"],
|
723 |
+
line_inputs["species"],
|
724 |
+
line_inputs["extras"],
|
725 |
+
],
|
726 |
+
outputs=[
|
727 |
+
output_line,
|
728 |
+
drawing,
|
729 |
+
download_single,
|
730 |
+
],
|
731 |
)
|
732 |
+
with gr.Tab(f"Predict on structures from a file (max. {MAX_ROWS} rows, ≤ 2 species)"):
|
733 |
+
file_examples = gr.Examples(
|
734 |
+
examples=[
|
735 |
+
[
|
736 |
+
eg["file"],
|
737 |
+
eg["column"],
|
738 |
+
"smiles",
|
739 |
+
eg["species"],
|
740 |
+
"",
|
741 |
+
list(EXTRA_METRICS)[:2],
|
742 |
+
] for eg in EXAMPLES["file examples"]
|
|
|
|
|
743 |
],
|
744 |
+
example_labels=[
|
745 |
+
eg["label"] for eg in EXAMPLES["file examples"]
|
|
|
|
|
|
|
|
|
|
|
746 |
],
|
747 |
+
fn=_load_then_predict_then_download_then_reveal_plot,
|
748 |
+
inputs=[
|
749 |
+
file_inputs["file"],
|
750 |
+
file_inputs["column"],
|
751 |
+
file_inputs["format"],
|
752 |
+
*file_inputs["species"],
|
753 |
+
file_inputs["extras"],
|
754 |
],
|
755 |
+
outputs=[
|
756 |
+
input_dataframe,
|
757 |
+
download,
|
758 |
+
],
|
759 |
+
cache_examples=True, ## appears to cause CSV load error
|
760 |
+
cache_mode="lazy",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
761 |
)
|
762 |
+
file_inputs["file"].render()
|
763 |
+
with gr.Row():
|
764 |
+
for key in ("column", "format"):
|
765 |
+
file_inputs[key].render()
|
766 |
+
with gr.Row():
|
767 |
+
for item in file_inputs["species"]:
|
768 |
+
item.render()
|
769 |
+
file_inputs["extras"].render()
|
770 |
+
|
771 |
+
go_button2 = gr.Button(value="Predict!")
|
772 |
+
|
773 |
+
input_dataframe.render()
|
774 |
+
download.render()
|
775 |
+
with gr.Row():
|
776 |
+
for val in left_plot_inputs.values():
|
777 |
+
val.render()
|
778 |
+
with gr.Row():
|
779 |
+
for val in right_plot_inputs.values():
|
780 |
+
val.render()
|
781 |
+
plot_button.render()
|
782 |
+
|
783 |
+
with gr.Row():
|
784 |
+
for val in plots.values():
|
785 |
+
val.render()
|
786 |
+
|
787 |
+
file_inputs["file"].upload(
|
788 |
+
fn=load_input_data,
|
789 |
+
inputs=file_inputs["file"],
|
790 |
+
outputs=[
|
791 |
+
input_dataframe,
|
792 |
+
file_inputs["column"],
|
793 |
+
],
|
794 |
)
|
795 |
+
go2_click_event = go_button2.click(
|
796 |
+
predict_file,
|
797 |
+
inputs=[
|
798 |
+
input_dataframe,
|
799 |
+
file_inputs["column"],
|
800 |
+
file_inputs["format"],
|
801 |
+
*file_inputs["species"],
|
802 |
+
file_inputs["extras"],
|
803 |
+
],
|
804 |
+
outputs=[
|
805 |
+
input_dataframe,
|
806 |
+
],
|
807 |
+
)
|
808 |
+
|
809 |
+
df_change = input_dataframe.change(
|
810 |
+
download_table,
|
811 |
+
inputs=input_dataframe,
|
812 |
+
outputs=download
|
813 |
+
).then(
|
814 |
+
lambda: gr.Button(visible=True),
|
815 |
+
outputs=[plot_button],
|
816 |
+
js=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
817 |
)
|
818 |
|
819 |
+
# file_examples.load_input_event.then(
|
820 |
+
# lambda: gr.Button(visible=True),
|
821 |
+
# outputs=[plot_button],
|
822 |
+
# js=True,
|
823 |
+
# )
|
824 |
+
|
825 |
+
for dropdown in itertools.chain(
|
826 |
+
left_plot_inputs.values(),
|
827 |
+
right_plot_inputs.values(),
|
828 |
+
):
|
829 |
+
# for e in (file_examples.load_input_event, go2_click_event):
|
830 |
+
df_change.then(
|
831 |
+
partial(get_dropdown_options, _type="number"),
|
832 |
+
inputs=[input_dataframe],
|
833 |
+
outputs=[dropdown],
|
834 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
835 |
|
836 |
+
plot_button.click(
|
837 |
+
plot_pred_vs_observed,
|
838 |
+
inputs=[
|
839 |
+
input_dataframe,
|
840 |
+
file_inputs["species"][0],
|
841 |
+
left_plot_inputs["observed"],
|
842 |
+
left_plot_inputs["color"],
|
843 |
+
],
|
844 |
+
outputs=[plots["left"]],
|
845 |
+
).then(
|
846 |
+
plot_x_vs_y,
|
847 |
+
inputs=[
|
848 |
+
input_dataframe,
|
849 |
+
right_plot_inputs["x"],
|
850 |
+
right_plot_inputs["y"],
|
851 |
+
right_plot_inputs["color"],
|
852 |
+
],
|
853 |
+
outputs=[plots["right"]],
|
854 |
+
)
|
855 |
+
demo.queue()
|
856 |
+
demo.launch(share=True)
|
example-data/examples.json
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"line input examples": [
|
3 |
+
{
|
4 |
+
"label": "Y. pestis (plague) vs Ciprofloxacin, Ceftriaxone, Cefiderocol, Linezolid, Gepotidacin",
|
5 |
+
"strings": [
|
6 |
+
"Ciprofloxacin: C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O",
|
7 |
+
"Ceftriaxone: CN1C(=NC(=O)C(=O)N1)SCC2=C(N3[C@@H]([C@@H](C3=O)NC(=O)/C(=N\\OC)/C4=CSC(=N4)N)SC2)C(=O)O",
|
8 |
+
"Cefiderocol: CC(C)(C(=O)O)O/N=C(/C1=CSC(=N1)N)\\C(=O)N[C@H]2[C@@H]3N(C2=O)C(=C(CS3)C[N+]4(CCCC4)CCNC(=O)C5=C(C(=C(C=C5)O)O)Cl)C(=O)[O-]",
|
9 |
+
"Linezolid: CC(=O)NC[C@H]1CN(C(=O)O1)C2=CC(=C(C=C2)N3CCOCC3)F",
|
10 |
+
"Gepotidacin: C1CC2=CC(=NC=C2OC1)CNC3CCN(CC3)C[C@@H]4CN5C(=O)C=CC6=C5N4C(=O)C=N6"
|
11 |
+
],
|
12 |
+
"species": [
|
13 |
+
"Yersinia pestis"
|
14 |
+
]
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"label": "S. aureus vs Doxorubicin, Ampicillin, Amoxicillin, Meropenem, Tetracycline, Anhydrotetracycline",
|
18 |
+
"strings": [
|
19 |
+
"Doxorubicin: C[C@H]1[C@H]([C@H](C[C@@H](O1)O[C@H]2C[C@@](CC3=C2C(=C4C(=C3O)C(=O)C5=C(C4=O)C(=CC=C5)OC)O)(C(=O)CO)O)N)O",
|
20 |
+
"Ampicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=CC=C3)N)C(=O)O)C",
|
21 |
+
"Amoxicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=C(C=C3)O)N)C(=O)O)C",
|
22 |
+
"Meropenem: C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)N(C)C)C(=O)O)[C@@H](C)O",
|
23 |
+
"Tetracycline: C[C@@]1([C@H]2C[C@H]3[C@@H](C(=O)C(=C([C@]3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O",
|
24 |
+
"Anhydrotetracycline: CC1=C2C=CC=C(C2=C(C3=C1C[C@H]4[C@@H](C(=O)C(=C([C@]4(C3=O)O)O)C(=O)N)N(C)C)O)O"
|
25 |
+
],
|
26 |
+
"species": [
|
27 |
+
"Staphylococcus aureus"
|
28 |
+
]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"label": "E. coli and A. baumannii vs Halicin, Abaucin, Trimethoprim, Sulfamethoxazole, Amikacin, Isoniazid",
|
32 |
+
"strings": [
|
33 |
+
"Halicin: C1=C(SC(=N1)SC2=NN=C(S2)N)[N+](=O)[O-]",
|
34 |
+
"Abaucin: C1CN(CCC12C3=CC=CC=C3NC(=O)O2)CCC4=CC=C(C=C4)C(F)(F)F",
|
35 |
+
"Trimethoprim: COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N",
|
36 |
+
"Amikacin: CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N",
|
37 |
+
"Sulfamethoxazole: C1[C@@H]([C@H]([C@@H]([C@H]([C@@H]1NC(=O)[C@H](CCN)O)O[C@@H]2[C@@H]([C@H]([C@@H]([C@H](O2)CO)O)N)O)O)O[C@@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CN)O)O)O)N",
|
38 |
+
"Isoniazid: C1=CN=CC=C1C(=O)NN"
|
39 |
+
],
|
40 |
+
"species": [
|
41 |
+
"Escherichia coli",
|
42 |
+
"Acinetobacter baumannii"
|
43 |
+
]
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"label": "A. baumannii vs Murepavadin, Vancomycin, Zosurabalpin, Plazomicin, Gentamicin, Rifampicin",
|
47 |
+
"strings": [
|
48 |
+
"Murepavadin: CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N2CCC[C@@H]2C(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CC4=CNC5=CC=CC=C54)[C@@H](C)O)CO)C)CCN)CCN)CC6=CNC7=CC=CC=C76)CCN)CCN)CCCN)CCN",
|
49 |
+
"Vancomycin: C[C@H]1[C@H]([C@@](C[C@@H](O1)O[C@@H]2[C@H]([C@@H]([C@H](O[C@H]2OC3=C4C=C5C=C3OC6=C(C=C(C=C6)[C@H]([C@H](C(=O)N[C@H](C(=O)N[C@H]5C(=O)N[C@@H]7C8=CC(=C(C=C8)O)C9=C(C=C(C=C9O)O)[C@H](NC(=O)[C@H]([C@@H](C1=CC(=C(O4)C=C1)Cl)O)NC7=O)C(=O)O)CC(=O)N)NC(=O)[C@@H](CC(C)C)NC)O)Cl)CO)O)O)(C)N)O",
|
50 |
+
"Zosurabalpin: CN1[C@H](C(=O)NCC2=C(C=CC=C2SC3=C(CN[C@H](C(=O)N[C@H](C1=O)CCCCN)CCCN)C=CC=N3)C4=CC=C(C=C4)C(=O)O)CC5=CNC6=CC=CC=C65",
|
51 |
+
"Plazomicin: C[C@@]1(CO[C@@H]([C@@H]([C@H]1NC)O)O[C@H]2[C@@H](C[C@@H]([C@H]([C@@H]2O)O[C@@H]3[C@@H](CC=C(O3)CNCCO)N)N)NC(=O)[C@H](CCN)O)O",
|
52 |
+
"Gentamicin: CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC",
|
53 |
+
"Rifampicin: C[C@H]1/C=C/C=C(\\C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)[C@](O4)(O/C=C/[C@@H]([C@H]([C@H]([C@@H]([C@@H]([C@@H]([C@H]1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)/C=N/N5CCN(CC5)C)/C"
|
54 |
+
],
|
55 |
+
"species": [
|
56 |
+
"Acinetobacter baumannii"
|
57 |
+
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"label": "E. coli vs Debio-1452, Debio-1452-NH3, Fabimycin, 5-FU, Carmofur, Etoposide",
|
61 |
+
"strings": [
|
62 |
+
"Debio-1452: CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)CC4)N=C3",
|
63 |
+
"Debio-1452-NH3: CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)[C@@H](C4)N)N=C3",
|
64 |
+
"Fabimycin: CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)[C@H](CC4)[NH3+])N=C3.[Cl-]",
|
65 |
+
"5-FU: C1=C(C(=O)NC(=O)N1)F",
|
66 |
+
"Carmofur: CCCCCCNC(=O)N1C=C(C(=O)NC1=O)F",
|
67 |
+
"Etoposide: C[C@@H]1OC[C@@H]2[C@@H](O1)[C@@H]([C@H]([C@@H](O2)O[C@H]3[C@H]4COC(=O)[C@@H]4[C@@H](C5=CC6=C(C=C35)OCO6)C7=CC(=C(C(=C7)OC)O)OC)O)O"
|
68 |
+
],
|
69 |
+
"species": [
|
70 |
+
"Escherichia coli"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"label": "K. pneumoniae vs Trimethoprim, SCH-79797, Pemetrexed, Nolatrexed, Methotrexate, Raltitrexed",
|
75 |
+
"strings": [
|
76 |
+
"Trimethoprim: COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N",
|
77 |
+
"SCH-79797: CC(C)C1=CC=C(C=C1)CN2C=CC3=C2C=CC4=C3C(=NC(=N4)NC5CC5)N",
|
78 |
+
"Pemetrexed: C1=CC(=CC=C1CCC2=CNC3=C2C(=O)NC(=N3)N)C(=O)N[C@@H](CCC(=O)O)C(=O)O",
|
79 |
+
"Nolatrexed: CC1=C(C2=C(C=C1)N=C(NC2=O)N)SC3=CC=NC=C3",
|
80 |
+
"Methotrexate: CN(CC1=CN=C2C(=N1)C(=NC(=N2)N)N)C3=CC=C(C=C3)C(=O)N[C@@H](CCC(=O)O)C(=O)O",
|
81 |
+
"Raltitrexed: CC1=NC2=C(C=C(C=C2)CN(C)C3=CC=C(S3)C(=O)N[C@@H](CCC(=O)O)C(=O)O)C(=O)N1"
|
82 |
+
],
|
83 |
+
"species": [
|
84 |
+
"Klebsiella pneumoniae"
|
85 |
+
]
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"label": "K. pneumoniae vs CHIR-090, SCH79797, DBeQ, Tenovin-6, Pyrimethamine, Aminopterin",
|
89 |
+
"strings": [
|
90 |
+
"CHIR-090: C[C@H]([C@@H](C(=O)NO)NC(=O)C1=CC=C(C=C1)C#CC2=CC=C(C=C2)CN3CCOCC3)O",
|
91 |
+
"SCH79797: CC(C)C1=CC=C(C=C1)CN2C=CC3=C2C=CC4=C3C(=NC(=N4)NC5CC5)N",
|
92 |
+
"DBeQ: C1=CC=C(C=C1)CNC2=NC(=NC3=CC=CC=C32)NCC4=CC=CC=C4",
|
93 |
+
"Tenovin-6: CC(C)(C)C1=CC=C(C=C1)C(=O)NC(=S)NC2=CC=C(C=C2)NC(=O)CCCCN(C)C",
|
94 |
+
"Pyrimethamine: CCC1=C(C(=NC(=N1)N)N)C2=CC=C(C=C2)Cl",
|
95 |
+
"Aminopterin: C1=CC(=CC=C1C(=O)N[C@@H](CCC(=O)O)C(=O)O)NCC2=CN=C3C(=N2)C(=NC(=N3)N)N"
|
96 |
+
],
|
97 |
+
"species": [
|
98 |
+
"Klebsiella pneumoniae"
|
99 |
+
]
|
100 |
+
}
|
101 |
+
],
|
102 |
+
"file examples": [
|
103 |
+
{
|
104 |
+
"label": "E. coli training data from Stokes J. et al., Cell (2020)",
|
105 |
+
"file": "example-data/stokes2020-eco.csv",
|
106 |
+
"column": "SMILES",
|
107 |
+
"species": "Escherichia coli"
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"label": "A. baumannii training data from Liu (2023)",
|
111 |
+
"file": "example-data/liu23-abau.csv",
|
112 |
+
"column": "SMILES",
|
113 |
+
"species": "Acinetobacter baumannii"
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"label": "S. aureus training data from Wong (2024)",
|
117 |
+
"file": "example-data/wong24-sau-tox-5000.csv",
|
118 |
+
"column": "SMILES",
|
119 |
+
"species": "Staphylococcus aureus"
|
120 |
+
}
|
121 |
+
]
|
122 |
+
}
|