Jhsmit commited on
Commit
d1c70f0
·
1 Parent(s): c8ea62d

switch to polars

Browse files

otherwise interger columns with missing data are not read as ints

Files changed (3) hide show
  1. app.py +17 -11
  2. make_link.py +5 -5
  3. viewer.py +17 -45
app.py CHANGED
@@ -4,8 +4,7 @@ from typing import Callable, Optional, cast
4
  from urllib.parse import parse_qsl
5
 
6
  import altair as alt
7
- import numpy as np
8
- import pandas as pd
9
  import reacton.core
10
  import solara
11
  import solara.lab
@@ -91,7 +90,7 @@ def ColorPickerMenuButton(title: str, color: solara.Reactive[str]):
91
  )
92
 
93
 
94
- empty_frame = pd.DataFrame()
95
  R_DEFAULT = ""
96
  V_DEFAULT = ""
97
 
@@ -130,17 +129,17 @@ def MainApp():
130
 
131
  def on_file(file_info: solara.components.file_drop.FileInfo | None):
132
  if not file_info:
133
- data.set(pd.DataFrame())
134
  return
135
 
136
  try:
137
- df = pd.read_csv(file_info["file_obj"])
138
  except Exception as e:
139
  warning_text.set(str(e))
140
  return
141
  if len(df.columns) < 2:
142
  warning_text.set(f"Expected at least 2 columns, got {len(df.columns)}")
143
- data.set(pd.DataFrame())
144
  return
145
 
146
  warning_text.set("")
@@ -168,10 +167,10 @@ def MainApp():
168
  autoscale_y=autoscale_y.value,
169
  )
170
 
171
- if data.value.empty:
172
- data_view = pd.DataFrame({"residue_number": [], "value": []})
173
  else:
174
- data_view = pd.DataFrame(
175
  {
176
  "residue_number": data.value[residue_column.value],
177
  "value": data.value[color_column.value],
@@ -236,7 +235,7 @@ def MainApp():
236
  if warning_text.value:
237
  solara.Warning(warning_text.value)
238
 
239
- if not data.value.empty:
240
  with solara.Row():
241
  solara.Select(
242
  label="Residue Column",
@@ -258,10 +257,17 @@ def MainApp():
258
  ColorPickerMenuButton("Highlight", highlight_color)
259
  ColorPickerMenuButton("Missing data", missing_data_color)
260
 
 
 
 
 
 
 
 
261
  # with solara.Row():
262
  solara.v.Autocomplete(
263
  v_model=cmap_name.value,
264
- on_v_model=cmap_name.set,
265
  items=CMAP_OPTIONS,
266
  )
267
 
 
4
  from urllib.parse import parse_qsl
5
 
6
  import altair as alt
7
+ import polars as pl
 
8
  import reacton.core
9
  import solara
10
  import solara.lab
 
90
  )
91
 
92
 
93
+ empty_frame = pl.DataFrame()
94
  R_DEFAULT = ""
95
  V_DEFAULT = ""
96
 
 
129
 
130
  def on_file(file_info: solara.components.file_drop.FileInfo | None):
131
  if not file_info:
132
+ data.set(pl.DataFrame())
133
  return
134
 
135
  try:
136
+ df = pl.read_csv(file_info["file_obj"])
137
  except Exception as e:
138
  warning_text.set(str(e))
139
  return
140
  if len(df.columns) < 2:
141
  warning_text.set(f"Expected at least 2 columns, got {len(df.columns)}")
142
+ data.set(pl.DataFrame())
143
  return
144
 
145
  warning_text.set("")
 
167
  autoscale_y=autoscale_y.value,
168
  )
169
 
170
+ if data.value.is_empty():
171
+ data_view = pl.DataFrame({"residue_number": [], "value": []})
172
  else:
173
+ data_view = pl.DataFrame(
174
  {
175
  "residue_number": data.value[residue_column.value],
176
  "value": data.value[color_column.value],
 
235
  if warning_text.value:
236
  solara.Warning(warning_text.value)
237
 
238
+ if not data.value.is_empty():
239
  with solara.Row():
240
  solara.Select(
241
  label="Residue Column",
 
257
  ColorPickerMenuButton("Highlight", highlight_color)
258
  ColorPickerMenuButton("Missing data", missing_data_color)
259
 
260
+ def set_cmap_name(name: str):
261
+ try:
262
+ Colormap(name)
263
+ cmap_name.set(name)
264
+ except TypeError:
265
+ pass
266
+
267
  # with solara.Row():
268
  solara.v.Autocomplete(
269
  v_model=cmap_name.value,
270
+ on_v_model=set_cmap_name,
271
  items=CMAP_OPTIONS,
272
  )
273
 
make_link.py CHANGED
@@ -5,7 +5,7 @@ from io import BytesIO
5
  from typing import TYPE_CHECKING
6
  from urllib.parse import urlencode
7
 
8
- import pandas as pd
9
  import zstandard as zstd
10
 
11
  if TYPE_CHECKING:
@@ -20,13 +20,13 @@ def encode_url(
20
  molecule_id: str,
21
  colors: ColorTransform,
22
  axis_properties: AxisProperties,
23
- data: pd.DataFrame,
24
  description: str = "",
25
  ):
26
  encode_dict = dict(title=title, molecule_id=molecule_id)
27
  encode_dict.update({**colors.model_dump(), **axis_properties.model_dump()})
28
 
29
- csv_str = data.to_csv(float_format="%.4f", index=False)
30
  compressed = COMPRESSOR.compress(csv_str.encode())
31
  base64_text = base64.b64encode(compressed).decode("utf8")
32
  encode_dict["data"] = base64_text
@@ -36,12 +36,12 @@ def encode_url(
36
  return urlencode(encode_dict)
37
 
38
 
39
- def decode_data(base64_text) -> pd.DataFrame:
40
  decoded_bytes = base64.b64decode(base64_text)
41
  decompressed = DECOMPRESSOR.decompress(decoded_bytes)
42
 
43
  bio = BytesIO(decompressed)
44
- data = pd.read_csv(bio)
45
  bio.close()
46
 
47
  return data
 
5
  from typing import TYPE_CHECKING
6
  from urllib.parse import urlencode
7
 
8
+ import polars as pl
9
  import zstandard as zstd
10
 
11
  if TYPE_CHECKING:
 
20
  molecule_id: str,
21
  colors: ColorTransform,
22
  axis_properties: AxisProperties,
23
+ data: pl.DataFrame,
24
  description: str = "",
25
  ):
26
  encode_dict = dict(title=title, molecule_id=molecule_id)
27
  encode_dict.update({**colors.model_dump(), **axis_properties.model_dump()})
28
 
29
+ csv_str = data.write_csv(float_precision=4, float_scientific=True)
30
  compressed = COMPRESSOR.compress(csv_str.encode())
31
  base64_text = base64.b64encode(compressed).decode("utf8")
32
  encode_dict["data"] = base64_text
 
36
  return urlencode(encode_dict)
37
 
38
 
39
+ def decode_data(base64_text) -> pl.DataFrame:
40
  decoded_bytes = base64.b64decode(base64_text)
41
  decompressed = DECOMPRESSOR.decompress(decoded_bytes)
42
 
43
  bio = BytesIO(decompressed)
44
+ data = pl.read_csv(bio)
45
  bio.close()
46
 
47
  return data
viewer.py CHANGED
@@ -11,7 +11,7 @@ from urllib.parse import parse_qsl
11
  import altair as alt
12
  import ipywidgets as widgets
13
  import numpy as np
14
- import pandas as pd
15
  import solara
16
  import solara.lab
17
  from cmap import Colormap
@@ -36,7 +36,8 @@ class ColorTransform(BaseModel):
36
  missing_data_color: str = "#8c8c8c"
37
  highlight_color: str = "#e933f8"
38
 
39
- def molstar_colors(self, data: pd.DataFrame) -> dict:
 
40
  if self.norm_type == "categorical":
41
  values = data["value"]
42
  else:
@@ -85,7 +86,7 @@ class AxisProperties(BaseModel):
85
 
86
 
87
  def make_chart(
88
- data: pd.DataFrame, colors: ColorTransform, axis_properties: AxisProperties
89
  ) -> alt.LayerChart:
90
  xmin, xmax = data["residue_number"].min(), data["residue_number"].max()
91
  xpad = (xmax - xmin) * 0.05
@@ -163,7 +164,7 @@ def make_chart(
163
 
164
  line_position = alt.param(name="line_position", value=0.0)
165
  line_opacity = alt.param(name="line_opacity", value=1)
166
- df_line = pd.DataFrame({"x": [1.0]})
167
 
168
  # Create vertical rule with parameter
169
  vline = (
@@ -188,7 +189,7 @@ def make_chart(
188
 
189
  @solara.component
190
  def ScatterChart(
191
- data: pd.DataFrame,
192
  colors: ColorTransform,
193
  axis_properties: AxisProperties,
194
  on_selections,
@@ -223,11 +224,17 @@ def ScatterChart(
223
  solara.use_effect(bind, [data, colors])
224
 
225
 
 
 
 
 
 
 
226
  @solara.component
227
  def ProteinView(
228
  title: str,
229
  molecule_id: str,
230
- data: pd.DataFrame,
231
  colors: ColorTransform,
232
  axis_properties: AxisProperties,
233
  dark_effective: bool,
@@ -242,16 +249,17 @@ def ProteinView(
242
  # residue number to highlight in protein view
243
  highlight_number = solara.use_reactive(None)
244
 
245
- if data.empty:
246
  color_data = {}
247
  else:
248
  color_data = colors.molstar_colors(data)
 
249
  tooltips = {
250
  "data": [
251
  {
252
  "residue_number": resi,
253
  "tooltip": f"{axis_properties.label}: {value:.2g} {axis_properties.unit}"
254
- if not np.isnan(value)
255
  else "No data",
256
  }
257
  for resi, value in zip(data["residue_number"], data["value"])
@@ -317,7 +325,7 @@ def ProteinView(
317
  ).key(f"molstar-{dark_effective}")
318
  if not fullscreen.value:
319
  with solara.Card(style={"height": "550px"}):
320
- if data.empty:
321
  solara.Text("No data")
322
  else:
323
  ScatterChart(
@@ -350,39 +358,3 @@ def RoutedView():
350
  )
351
  except KeyError as err:
352
  solara.Warning(f"Error: {err}")
353
-
354
-
355
- @solara.component
356
- def Page():
357
- dark_effective = solara.lab.use_dark_effective()
358
- dark_previous = solara.use_previous(dark_effective)
359
-
360
- if dark_previous != dark_effective:
361
- if dark_effective:
362
- alt.themes.enable("dark")
363
- else:
364
- alt.themes.enable("default")
365
-
366
- solara.Style(
367
- """
368
- .vega-embed {
369
- overflow: visible;
370
- width: 100% !important;
371
- }"""
372
- )
373
-
374
- settings = json.loads(Path("settings.json").read_text())
375
-
376
- colors = ColorTransform(**settings)
377
- axis_properties = AxisProperties(**settings)
378
-
379
- data = pd.read_csv("color_data.csv")
380
-
381
- ProteinView(
382
- settings["title"],
383
- molecule_id=settings["molecule_id"],
384
- data=data,
385
- colors=colors,
386
- axis_properties=axis_properties,
387
- dark_effective=dark_effective,
388
- )
 
11
  import altair as alt
12
  import ipywidgets as widgets
13
  import numpy as np
14
+ import polars as pl
15
  import solara
16
  import solara.lab
17
  from cmap import Colormap
 
36
  missing_data_color: str = "#8c8c8c"
37
  highlight_color: str = "#e933f8"
38
 
39
+ def molstar_colors(self, data: pl.DataFrame) -> dict:
40
+ data = data.drop_nulls()
41
  if self.norm_type == "categorical":
42
  values = data["value"]
43
  else:
 
86
 
87
 
88
  def make_chart(
89
+ data: pl.DataFrame, colors: ColorTransform, axis_properties: AxisProperties
90
  ) -> alt.LayerChart:
91
  xmin, xmax = data["residue_number"].min(), data["residue_number"].max()
92
  xpad = (xmax - xmin) * 0.05
 
164
 
165
  line_position = alt.param(name="line_position", value=0.0)
166
  line_opacity = alt.param(name="line_opacity", value=1)
167
+ df_line = pl.DataFrame({"x": [1.0]})
168
 
169
  # Create vertical rule with parameter
170
  vline = (
 
189
 
190
  @solara.component
191
  def ScatterChart(
192
+ data: pl.DataFrame,
193
  colors: ColorTransform,
194
  axis_properties: AxisProperties,
195
  on_selections,
 
224
  solara.use_effect(bind, [data, colors])
225
 
226
 
227
+ def is_numeric(val) -> bool:
228
+ if val is not None:
229
+ return not np.isnan(val)
230
+ return False
231
+
232
+
233
  @solara.component
234
  def ProteinView(
235
  title: str,
236
  molecule_id: str,
237
+ data: pl.DataFrame,
238
  colors: ColorTransform,
239
  axis_properties: AxisProperties,
240
  dark_effective: bool,
 
249
  # residue number to highlight in protein view
250
  highlight_number = solara.use_reactive(None)
251
 
252
+ if data.is_empty():
253
  color_data = {}
254
  else:
255
  color_data = colors.molstar_colors(data)
256
+
257
  tooltips = {
258
  "data": [
259
  {
260
  "residue_number": resi,
261
  "tooltip": f"{axis_properties.label}: {value:.2g} {axis_properties.unit}"
262
+ if is_numeric(value)
263
  else "No data",
264
  }
265
  for resi, value in zip(data["residue_number"], data["value"])
 
325
  ).key(f"molstar-{dark_effective}")
326
  if not fullscreen.value:
327
  with solara.Card(style={"height": "550px"}):
328
+ if data.is_empty():
329
  solara.Text("No data")
330
  else:
331
  ScatterChart(
 
358
  )
359
  except KeyError as err:
360
  solara.Warning(f"Error: {err}")