Eachan Johnson commited on
Commit
44ee556
·
1 Parent(s): 3d0bd0d

Add Gradio demo

Browse files
schemist/app/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Chemical string format converter
3
+ emoji: ⚗️
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # Chemical string format converter
13
+
14
+ Frontend for [schemist](https://github.com/scbirlab/schemist) to allow trivial interconversion from:
15
+
16
+ - SMILES
17
+ - SELFIES
18
+ - Amino acid sequences
19
+ - HELM
20
+
21
+ to...
22
+
23
+ - Strucure image
24
+ - SMILES
25
+ - SELFIES
26
+ - InChI
27
+ - InChIKey
28
+ - Name
29
+ - cLogP
30
+ - TPSA
31
+ - molecular weight
32
+ - charge
33
+
34
+ ... and several others!
schemist/app/app.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio demo for schemist."""
2
+
3
+ from typing import Iterable, List, Union
4
+ from io import TextIOWrapper
5
+ import os
6
+ os.environ["COMMANDLINE_ARGS"] = "--no-gradio-queue"
7
+
8
+ from carabiner import cast, print_err
9
+ from carabiner.pd import read_table
10
+ import gradio as gr
11
+ import nemony as nm
12
+ import numpy as np
13
+ import pandas as pd
14
+ from rdkit.Chem import Draw, Mol
15
+ import schemist as sch
16
+ from schemist.converting import (
17
+ _TO_FUNCTIONS,
18
+ _FROM_FUNCTIONS,
19
+ convert_string_representation,
20
+ _x2mol,
21
+ )
22
+ from schemist.tables import converter
23
+
24
+ def load_input_data(file: TextIOWrapper) -> pd.DataFrame:
25
+ df = read_table(file.name)
26
+ string_cols = list(df.select_dtypes(exclude=[np.number]))
27
+ df = gr.Dataframe(value=df, visible=True)
28
+ return df, gr.Dropdown(choices=string_cols, interactive=True)
29
+
30
+ def _clean_split_input(strings: str) -> List[str]:
31
+ return [s2.strip() for s in strings.split("\n") for s2 in s.split(",")]
32
+
33
+ def _convert_input(
34
+ strings: str,
35
+ input_representation: str = 'smiles',
36
+ output_representation: Union[Iterable[str], str] = 'smiles'
37
+ ) -> List[str]:
38
+ strings = _clean_split_input(strings)
39
+ return cast(map(str, convert_string_representation(
40
+ strings=strings,
41
+ input_representation=input_representation,
42
+ output_representation=output_representation,
43
+ )), to=list)
44
+
45
+
46
+ def convert_one(
47
+ strings: str,
48
+ input_representation: str = 'smiles',
49
+ output_representation: Union[Iterable[str], str] = 'smiles'
50
+ ):
51
+
52
+ df = pd.DataFrame({
53
+ input_representation: _clean_split_input(strings),
54
+ })
55
+
56
+ return gr.DataFrame(
57
+ convert_file(
58
+ df=df,
59
+ column=input_representation,
60
+ input_representation=input_representation,
61
+ output_representation=output_representation,
62
+ ),
63
+ visible=True
64
+ )
65
+
66
+
67
+ def convert_file(
68
+ df: pd.DataFrame,
69
+ column: str = 'smiles',
70
+ input_representation: str = 'smiles',
71
+ output_representation: Union[str, Iterable[str]] = 'smiles'
72
+ ):
73
+ message = f"Converting from {input_representation} to {output_representation}..."
74
+ print_err(message)
75
+ gr.Info(message, duration=5)
76
+ print_err(df)
77
+ errors, df = converter(
78
+ df=df,
79
+ column=column,
80
+ input_representation=input_representation,
81
+ output_representation=output_representation,
82
+ )
83
+ df = df[
84
+ cast(output_representation, to=list) +
85
+ [col for col in df if col not in output_representation]
86
+ ]
87
+ all_err = sum(err for key, err in errors.items())
88
+ message = (
89
+ f"Converted {df.shape[0]} molecules from "
90
+ f"{input_representation} to {output_representation} "
91
+ f"with {all_err} errors!"
92
+ )
93
+ print_err(message)
94
+ gr.Info(message, duration=5)
95
+ return df
96
+
97
+
98
+ def draw_one(
99
+ strings: Union[Iterable[str], str],
100
+ input_representation: str = 'smiles'
101
+ ):
102
+ smiles = _convert_input(strings, input_representation, "inchikey")
103
+ ids = _convert_input(strings, input_representation, "id")
104
+ mols = cast(_x2mol(_clean_split_input(strings), input_representation), to=list)
105
+ if isinstance(mols, Mol):
106
+ mols = [mols]
107
+ return Draw.MolsToGridImage(
108
+ mols,
109
+ molsPerRow=min(3, len(mols)),
110
+ subImgSize=(300, 300),
111
+ legends=[f"{sm}\n{_id}" for sm, _id in zip(smiles, ids)],
112
+ )
113
+
114
+
115
+ def download_table(
116
+ df: pd.DataFrame
117
+ ) -> str:
118
+ df_hash = nm.hash(pd.util.hash_pandas_object(df).values)
119
+ filename = f"converted-{df_hash}.csv"
120
+ df.to_csv(filename, index=False)
121
+ return gr.DownloadButton(value=filename, visible=True)
122
+
123
+ with gr.Blocks() as demo:
124
+
125
+ gr.Markdown(
126
+ """
127
+ # Chemical string format converter
128
+
129
+ """
130
+ )
131
+ with gr.Tab(label="Paste one per line"):
132
+ input_line = gr.Textbox(
133
+ label="Input",
134
+ placeholder="Paste your molecule here, one per line",
135
+ lines=2,
136
+ interactive=True,
137
+ submit_btn=True,
138
+ )
139
+ input_format_single = gr.Dropdown(
140
+ label="Input string format",
141
+ choices=list(_FROM_FUNCTIONS),
142
+ value="smiles",
143
+ interactive=True,
144
+ )
145
+ output_format_single = gr.CheckboxGroup(
146
+ label="Output format",
147
+ choices=list(_TO_FUNCTIONS),
148
+ value=["id", "pubchem_name"],
149
+ interactive=True,
150
+ )
151
+ download_single = gr.DownloadButton(
152
+ label="Download converted data",
153
+ visible=False,
154
+ )
155
+ with gr.Row():
156
+ output_line = gr.DataFrame(
157
+ label="Converted",
158
+ interactive=False,
159
+ visible=False,
160
+ )
161
+ drawing = gr.Image(label="Chemical structures")
162
+ gr.on(
163
+ [
164
+ # go_button.click,
165
+ input_line.submit,
166
+ ],
167
+ fn=convert_one,
168
+ inputs=[
169
+ input_line,
170
+ input_format_single,
171
+ output_format_single,
172
+ ],
173
+ outputs={
174
+ output_line,
175
+ }
176
+ ).then(
177
+ draw_one,
178
+ inputs=[
179
+ input_line,
180
+ input_format_single,
181
+ ],
182
+ outputs=drawing,
183
+ ).then(
184
+ download_table,
185
+ inputs=output_line,
186
+ outputs=download_single
187
+ )
188
+
189
+ with gr.Tab("Convert a file"):
190
+ input_file = gr.File(
191
+ label="Upload a table of chemical compounds here",
192
+ file_types=[".xlsx", ".csv", ".tsv", ".txt"],
193
+ )
194
+ with gr.Row():
195
+ input_column = gr.Dropdown(
196
+ label="Input column name",
197
+ choices=[],
198
+ )
199
+ input_format = gr.Dropdown(
200
+ label="Input string format",
201
+ choices=list(_FROM_FUNCTIONS),
202
+ value="smiles",
203
+ interactive=True,
204
+ )
205
+ output_format = gr.CheckboxGroup(
206
+ label="Output format",
207
+ choices=list(_TO_FUNCTIONS),
208
+ value=["id", "selfies"],
209
+ interactive=True,
210
+ )
211
+ go_button2 = gr.Button(
212
+ value="Convert molecules!",
213
+ )
214
+
215
+ download = gr.DownloadButton(
216
+ label="Download converted data",
217
+ visible=False,
218
+ )
219
+ input_data = gr.Dataframe(
220
+ label="Input data",
221
+ max_height=100,
222
+ visible=False,
223
+ interactive=False,
224
+ )
225
+
226
+ input_file.upload(
227
+ load_input_data,
228
+ inputs=[input_file],
229
+ outputs=[input_data, input_column]
230
+ )
231
+ go_button2.click(
232
+ convert_file,
233
+ inputs=[
234
+ input_data,
235
+ input_column,
236
+ input_format,
237
+ output_format,
238
+ ],
239
+ outputs={
240
+ input_data,
241
+ }
242
+ ).then(
243
+ download_table,
244
+ inputs=input_data,
245
+ outputs=download
246
+ )
247
+
248
+ if __name__ == "__main__":
249
+ demo.queue()
250
+ demo.launch(share=True)
251
+
schemist/app/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ carabiner-tools[mpl,pd]>=0.0.3.post1
2
+ gradio>=5.0
3
+ nemony
4
+ numpy
5
+ pandas
6
+ rdkit
7
+ schemist==0.0.1