Spaces:
Running
Running
Eachan Johnson
commited on
Commit
·
9219965
1
Parent(s):
e8cb587
Update examples
Browse files- .gitignore +2 -1
- example-data/examples.json +6 -4
- example-data/liu23-abau-1000.csv +0 -0
- example-data/stokes20-eco-1000.csv +0 -0
- example-data/wong24-sau-tox-1000.csv +0 -0
- scripts/prep-examples.sh +33 -0
.gitignore
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
.gradio/cached_examples/
|
2 |
/cache/duvida/models--*
|
3 |
/cache/downloads/
|
4 |
-
*.log
|
|
|
|
1 |
.gradio/cached_examples/
|
2 |
/cache/duvida/models--*
|
3 |
/cache/downloads/
|
4 |
+
*.log
|
5 |
+
/.schemist
|
example-data/examples.json
CHANGED
@@ -12,12 +12,13 @@
|
|
12 |
]
|
13 |
},
|
14 |
{
|
15 |
-
"label": "E. coli, K. pneumoniae, & S. aureus vs Ampicillin, Linezolid, Amoxicillin, Meropenem",
|
16 |
"strings": [
|
17 |
"Ampicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=CC=C3)N)C(=O)O)C",
|
18 |
"Linezolid: CC(=O)NC[C@H]1CN(C(=O)O1)C2=CC(=C(C=C2)N3CCOCC3)F",
|
19 |
"Amoxicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=C(C=C3)O)N)C(=O)O)C",
|
20 |
-
"Meropenem: C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)N(C)C)C(=O)O)[C@@H](C)O"
|
|
|
21 |
],
|
22 |
"species": [
|
23 |
"Escherichia coli",
|
@@ -26,12 +27,13 @@
|
|
26 |
]
|
27 |
},
|
28 |
{
|
29 |
-
"label": "E. coli, P. aeruginosa, & S. aureus vs Gepotidacin, Murepavadin, Zosurabalpin, Plazomicin",
|
30 |
"strings": [
|
31 |
"Murepavadin: CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N2CCC[C@@H]2C(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CC4=CNC5=CC=CC=C54)[C@@H](C)O)CO)C)CCN)CCN)CC6=CNC7=CC=CC=C76)CCN)CCN)CCCN)CCN",
|
32 |
"Gepotidacin: C1CC2=CC(=NC=C2OC1)CNC3CCN(CC3)C[C@@H]4CN5C(=O)C=CC6=C5N4C(=O)C=N6",
|
33 |
"Zosurabalpin: CN1[C@H](C(=O)NCC2=C(C=CC=C2SC3=C(CN[C@H](C(=O)N[C@H](C1=O)CCCCN)CCCN)C=CC=N3)C4=CC=C(C=C4)C(=O)O)CC5=CNC6=CC=CC=C65",
|
34 |
-
"Plazomicin: C[C@@]1(CO[C@@H]([C@@H]([C@H]1NC)O)O[C@H]2[C@@H](C[C@@H]([C@H]([C@@H]2O)O[C@@H]3[C@@H](CC=C(O3)CNCCO)N)N)NC(=O)[C@H](CCN)O)O"
|
|
|
35 |
],
|
36 |
"species": [
|
37 |
"Escherichia coli",
|
|
|
12 |
]
|
13 |
},
|
14 |
{
|
15 |
+
"label": "E. coli, K. pneumoniae, & S. aureus vs Ampicillin, Linezolid, Amoxicillin, Meropenem, Avibactam",
|
16 |
"strings": [
|
17 |
"Ampicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=CC=C3)N)C(=O)O)C",
|
18 |
"Linezolid: CC(=O)NC[C@H]1CN(C(=O)O1)C2=CC(=C(C=C2)N3CCOCC3)F",
|
19 |
"Amoxicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=C(C=C3)O)N)C(=O)O)C",
|
20 |
+
"Meropenem: C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)N(C)C)C(=O)O)[C@@H](C)O",
|
21 |
+
"Avibactam: C1C[C@H](N2C[C@@H]1N(C2=O)OS(=O)(=O)O)C(=O)N"
|
22 |
],
|
23 |
"species": [
|
24 |
"Escherichia coli",
|
|
|
27 |
]
|
28 |
},
|
29 |
{
|
30 |
+
"label": "E. coli, P. aeruginosa, & S. aureus vs Gepotidacin, Murepavadin, Zosurabalpin, Plazomicin, Iboxamycin",
|
31 |
"strings": [
|
32 |
"Murepavadin: CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N2CCC[C@@H]2C(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CC4=CNC5=CC=CC=C54)[C@@H](C)O)CO)C)CCN)CCN)CC6=CNC7=CC=CC=C76)CCN)CCN)CCCN)CCN",
|
33 |
"Gepotidacin: C1CC2=CC(=NC=C2OC1)CNC3CCN(CC3)C[C@@H]4CN5C(=O)C=CC6=C5N4C(=O)C=N6",
|
34 |
"Zosurabalpin: CN1[C@H](C(=O)NCC2=C(C=CC=C2SC3=C(CN[C@H](C(=O)N[C@H](C1=O)CCCCN)CCCN)C=CC=N3)C4=CC=C(C=C4)C(=O)O)CC5=CNC6=CC=CC=C65",
|
35 |
+
"Plazomicin: C[C@@]1(CO[C@@H]([C@@H]([C@H]1NC)O)O[C@H]2[C@@H](C[C@@H]([C@H]([C@@H]2O)O[C@@H]3[C@@H](CC=C(O3)CNCCO)N)N)NC(=O)[C@H](CCN)O)O",
|
36 |
+
"Iboxamycin: C[C@@H]([C@H]([C@@H]1[C@@H]([C@@H]([C@H]([C@H](O1)SC)O)O)O)NC(=O)[C@@H]2[C@H]3[C@@H](C[C@@H](CCO3)CC(C)C)CN2)Cl"
|
37 |
],
|
38 |
"species": [
|
39 |
"Escherichia coli",
|
example-data/liu23-abau-1000.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
example-data/stokes20-eco-1000.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
example-data/wong24-sau-tox-1000.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
scripts/prep-examples.sh
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
INPUT="$1"
|
4 |
+
OUTPUT="$2"
|
5 |
+
FIELDS="$3"
|
6 |
+
NLINES="${4:-1000}"
|
7 |
+
|
8 |
+
python -m venv .schemist && .schemist/bin/pip install "pandas schemist>=0.0.4" && .schemist/bin/activate
|
9 |
+
|
10 |
+
# Some functions for convenience
|
11 |
+
logger () (
|
12 |
+
local message="$1"
|
13 |
+
local _date=$(date)
|
14 |
+
local prefix=${2:-"$_date"}
|
15 |
+
>&2 echo "$prefix :: $message"
|
16 |
+
)
|
17 |
+
|
18 |
+
pandas () (
|
19 |
+
local cmd="$1"
|
20 |
+
local sep1=${2:-,}
|
21 |
+
local idx=${3:-False}
|
22 |
+
local sep2=${4:-"$sep1"}
|
23 |
+
python -c 'import sys; import pandas as pd; df = pd.read_csv(sys.stdin, sep="'"$sep1"'", low_memory=False)'"$cmd"'.to_csv(sys.stdout, index='"$idx"', sep="'"$sep2"'")'
|
24 |
+
)
|
25 |
+
|
26 |
+
set -e
|
27 |
+
set -x
|
28 |
+
|
29 |
+
pandas '[['"$FIELDS"']].sample('"$NLINES"')' \
|
30 |
+
< "$INPUT" \
|
31 |
+
| schemist convert -c SMILES -2 id smiles inchikey pubchem_id mwt clogp tpsa -f CSV \
|
32 |
+
| pandas '.sort_values(["id"])' \
|
33 |
+
> "$OUTPUT"
|