Eachan Johnson commited on
Commit
9219965
·
1 Parent(s): e8cb587

Update examples

Browse files
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  .gradio/cached_examples/
2
  /cache/duvida/models--*
3
  /cache/downloads/
4
- *.log
 
 
1
  .gradio/cached_examples/
2
  /cache/duvida/models--*
3
  /cache/downloads/
4
+ *.log
5
+ /.schemist
example-data/examples.json CHANGED
@@ -12,12 +12,13 @@
12
  ]
13
  },
14
  {
15
- "label": "E. coli, K. pneumoniae, & S. aureus vs Ampicillin, Linezolid, Amoxicillin, Meropenem",
16
  "strings": [
17
  "Ampicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=CC=C3)N)C(=O)O)C",
18
  "Linezolid: CC(=O)NC[C@H]1CN(C(=O)O1)C2=CC(=C(C=C2)N3CCOCC3)F",
19
  "Amoxicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=C(C=C3)O)N)C(=O)O)C",
20
- "Meropenem: C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)N(C)C)C(=O)O)[C@@H](C)O"
 
21
  ],
22
  "species": [
23
  "Escherichia coli",
@@ -26,12 +27,13 @@
26
  ]
27
  },
28
  {
29
- "label": "E. coli, P. aeruginosa, & S. aureus vs Gepotidacin, Murepavadin, Zosurabalpin, Plazomicin",
30
  "strings": [
31
  "Murepavadin: CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N2CCC[C@@H]2C(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CC4=CNC5=CC=CC=C54)[C@@H](C)O)CO)C)CCN)CCN)CC6=CNC7=CC=CC=C76)CCN)CCN)CCCN)CCN",
32
  "Gepotidacin: C1CC2=CC(=NC=C2OC1)CNC3CCN(CC3)C[C@@H]4CN5C(=O)C=CC6=C5N4C(=O)C=N6",
33
  "Zosurabalpin: CN1[C@H](C(=O)NCC2=C(C=CC=C2SC3=C(CN[C@H](C(=O)N[C@H](C1=O)CCCCN)CCCN)C=CC=N3)C4=CC=C(C=C4)C(=O)O)CC5=CNC6=CC=CC=C65",
34
- "Plazomicin: C[C@@]1(CO[C@@H]([C@@H]([C@H]1NC)O)O[C@H]2[C@@H](C[C@@H]([C@H]([C@@H]2O)O[C@@H]3[C@@H](CC=C(O3)CNCCO)N)N)NC(=O)[C@H](CCN)O)O"
 
35
  ],
36
  "species": [
37
  "Escherichia coli",
 
12
  ]
13
  },
14
  {
15
+ "label": "E. coli, K. pneumoniae, & S. aureus vs Ampicillin, Linezolid, Amoxicillin, Meropenem, Avibactam",
16
  "strings": [
17
  "Ampicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=CC=C3)N)C(=O)O)C",
18
  "Linezolid: CC(=O)NC[C@H]1CN(C(=O)O1)C2=CC(=C(C=C2)N3CCOCC3)F",
19
  "Amoxicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=C(C=C3)O)N)C(=O)O)C",
20
+ "Meropenem: C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)N(C)C)C(=O)O)[C@@H](C)O",
21
+ "Avibactam: C1C[C@H](N2C[C@@H]1N(C2=O)OS(=O)(=O)O)C(=O)N"
22
  ],
23
  "species": [
24
  "Escherichia coli",
 
27
  ]
28
  },
29
  {
30
+ "label": "E. coli, P. aeruginosa, & S. aureus vs Gepotidacin, Murepavadin, Zosurabalpin, Plazomicin, Iboxamycin",
31
  "strings": [
32
  "Murepavadin: CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N2CCC[C@@H]2C(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CC4=CNC5=CC=CC=C54)[C@@H](C)O)CO)C)CCN)CCN)CC6=CNC7=CC=CC=C76)CCN)CCN)CCCN)CCN",
33
  "Gepotidacin: C1CC2=CC(=NC=C2OC1)CNC3CCN(CC3)C[C@@H]4CN5C(=O)C=CC6=C5N4C(=O)C=N6",
34
  "Zosurabalpin: CN1[C@H](C(=O)NCC2=C(C=CC=C2SC3=C(CN[C@H](C(=O)N[C@H](C1=O)CCCCN)CCCN)C=CC=N3)C4=CC=C(C=C4)C(=O)O)CC5=CNC6=CC=CC=C65",
35
+ "Plazomicin: C[C@@]1(CO[C@@H]([C@@H]([C@H]1NC)O)O[C@H]2[C@@H](C[C@@H]([C@H]([C@@H]2O)O[C@@H]3[C@@H](CC=C(O3)CNCCO)N)N)NC(=O)[C@H](CCN)O)O",
36
+ "Iboxamycin: C[C@@H]([C@H]([C@@H]1[C@@H]([C@@H]([C@H]([C@H](O1)SC)O)O)O)NC(=O)[C@@H]2[C@H]3[C@@H](C[C@@H](CCO3)CC(C)C)CN2)Cl"
37
  ],
38
  "species": [
39
  "Escherichia coli",
example-data/liu23-abau-1000.csv CHANGED
The diff for this file is too large to render. See raw diff
 
example-data/stokes20-eco-1000.csv CHANGED
The diff for this file is too large to render. See raw diff
 
example-data/wong24-sau-tox-1000.csv CHANGED
The diff for this file is too large to render. See raw diff
 
scripts/prep-examples.sh ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ INPUT="$1"
4
+ OUTPUT="$2"
5
+ FIELDS="$3"
6
+ NLINES="${4:-1000}"
7
+
8
+ python -m venv .schemist && .schemist/bin/pip install "pandas schemist>=0.0.4" && .schemist/bin/activate
9
+
10
+ # Some functions for convenience
11
+ logger () (
12
+ local message="$1"
13
+ local _date=$(date)
14
+ local prefix=${2:-"$_date"}
15
+ >&2 echo "$prefix :: $message"
16
+ )
17
+
18
+ pandas () (
19
+ local cmd="$1"
20
+ local sep1=${2:-,}
21
+ local idx=${3:-False}
22
+ local sep2=${4:-"$sep1"}
23
+ python -c 'import sys; import pandas as pd; df = pd.read_csv(sys.stdin, sep="'"$sep1"'", low_memory=False)'"$cmd"'.to_csv(sys.stdout, index='"$idx"', sep="'"$sep2"'")'
24
+ )
25
+
26
+ set -e
27
+ set -x
28
+
29
+ pandas '[['"$FIELDS"']].sample('"$NLINES"')' \
30
+ < "$INPUT" \
31
+ | schemist convert -c SMILES -2 id smiles inchikey pubchem_id mwt clogp tpsa -f CSV \
32
+ | pandas '.sort_values(["id"])' \
33
+ > "$OUTPUT"