mylessss commited on
Commit
697c77c
Β·
1 Parent(s): b6f407d
Files changed (3) hide show
  1. README.md +7 -6
  2. app.py +189 -366
  3. requirements.txt +9 -4
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
- title: marimo app template
3
- emoji: πŸƒ
4
- colorFrom: indigo
5
- colorTo: purple
6
  sdk: docker
7
- pinned: true
8
  license: mit
9
- short_description: Template for deploying a marimo application to HF
10
  ---
11
 
12
  Check out marimo at <https://github.com/marimo-team/marimo>
 
13
  Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
 
1
  ---
2
+ title: motherduck embedding explorer
3
+ emoji: πŸ¦†
4
+ colorFrom: purple
5
+ colorTo: indigo
6
  sdk: docker
7
+ pinned: false
8
  license: mit
9
+ short_description: motherduck embedding explorer
10
  ---
11
 
12
  Check out marimo at <https://github.com/marimo-team/marimo>
13
+ Check out motherduck at <https://motherduck.com>
14
  Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
app.py CHANGED
@@ -1,469 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import marimo
2
 
3
- __generated_with = "0.9.2"
4
- app = marimo.App()
5
 
6
 
7
  @app.cell
8
  def __():
9
  import marimo as mo
10
 
11
- mo.md("# Welcome to marimo! πŸŒŠπŸƒ")
12
  return (mo,)
13
 
14
 
15
- @app.cell
16
  def __(mo):
17
- slider = mo.ui.slider(1, 22)
18
- return (slider,)
19
-
20
-
21
- @app.cell
22
- def __(mo, slider):
23
  mo.md(
24
- f"""
25
- marimo is a **reactive** Python notebook.
26
 
27
- This means that unlike traditional notebooks, marimo notebooks **run
28
- automatically** when you modify them or
29
- interact with UI elements, like this slider: {slider}.
30
 
31
- {"##" + "πŸƒ" * slider.value}
32
  """
33
  )
34
  return
35
 
36
 
37
- @app.cell(hide_code=True)
38
- def __(mo):
39
- mo.accordion(
40
- {
41
- "Tip: disabling automatic execution": mo.md(
42
- rf"""
43
- marimo lets you disable automatic execution: just go into the
44
- notebook settings and set
45
-
46
- "Runtime > On Cell Change" to "lazy".
47
-
48
- When the runtime is lazy, after running a cell, marimo marks its
49
- descendants as stale instead of automatically running them. The
50
- lazy runtime puts you in control over when cells are run, while
51
- still giving guarantees about the notebook state.
52
- """
53
- )
54
- }
55
- )
56
- return
57
-
58
-
59
- @app.cell(hide_code=True)
60
- def __(mo):
61
- mo.md(
62
- """
63
- Tip: This is a tutorial notebook. You can create your own notebooks
64
- by entering `marimo edit` at the command line.
65
- """
66
- ).callout()
67
- return
68
-
69
-
70
  @app.cell(hide_code=True)
71
  def __(mo):
72
  mo.md(
73
  """
74
- ## 1. Reactive execution
75
-
76
- A marimo notebook is made up of small blocks of Python code called
77
- cells.
78
 
79
- marimo reads your cells and models the dependencies among them: whenever
80
- a cell that defines a global variable is run, marimo
81
- **automatically runs** all cells that reference that variable.
82
 
83
- Reactivity keeps your program state and outputs in sync with your code,
84
- making for a dynamic programming environment that prevents bugs before they
85
- happen.
 
86
  """
87
  )
88
  return
89
 
90
 
91
- @app.cell(hide_code=True)
92
- def __(changed, mo):
93
- (
94
- mo.md(
95
- f"""
96
- **✨ Nice!** The value of `changed` is now {changed}.
97
-
98
- When you updated the value of the variable `changed`, marimo
99
- **reacted** by running this cell automatically, because this cell
100
- references the global variable `changed`.
101
-
102
- Reactivity ensures that your notebook state is always
103
- consistent, which is crucial for doing good science; it's also what
104
- enables marimo notebooks to double as tools and apps.
105
- """
106
- )
107
- if changed
108
- else mo.md(
109
- """
110
- **🌊 See it in action.** In the next cell, change the value of the
111
- variable `changed` to `True`, then click the run button.
112
- """
113
- )
114
- )
115
- return
116
-
117
-
118
  @app.cell
119
- def __():
120
- changed = False
121
- return (changed,)
122
-
123
-
124
- @app.cell(hide_code=True)
125
  def __(mo):
126
- mo.accordion(
127
- {
128
- "Tip: execution order": (
129
- """
130
- The order of cells on the page has no bearing on
131
- the order in which cells are executed: marimo knows that a cell
132
- reading a variable must run after the cell that defines it. This
133
- frees you to organize your code in the way that makes the most
134
- sense for you.
135
- """
136
- )
137
- }
138
- )
139
- return
140
-
141
-
142
- @app.cell(hide_code=True)
143
- def __(mo):
144
- mo.md(
145
  """
146
- **Global names must be unique.** To enable reactivity, marimo imposes a
147
- constraint on how names appear in cells: no two cells may define the same
148
- variable.
149
  """
150
  )
151
- return
152
 
153
 
154
- @app.cell(hide_code=True)
155
- def __(mo):
156
- mo.accordion(
157
- {
158
- "Tip: encapsulation": (
159
- """
160
- By encapsulating logic in functions, classes, or Python modules,
161
- you can minimize the number of global variables in your notebook.
162
- """
163
- )
164
- }
165
- )
166
- return
167
-
168
-
169
- @app.cell(hide_code=True)
170
- def __(mo):
171
- mo.accordion(
172
- {
173
- "Tip: private variables": (
174
- """
175
- Variables prefixed with an underscore are "private" to a cell, so
176
- they can be defined by multiple cells.
177
- """
178
- )
179
- }
180
- )
181
- return
182
-
183
-
184
- @app.cell(hide_code=True)
185
  def __(mo):
186
- mo.md(
187
  """
188
- ## 2. UI elements
189
-
190
- Cells can output interactive UI elements. Interacting with a UI
191
- element **automatically triggers notebook execution**: when
192
- you interact with a UI element, its value is sent back to Python, and
193
- every cell that references that element is re-run.
194
-
195
- marimo provides a library of UI elements to choose from under
196
- `marimo.ui`.
197
  """
198
  )
199
  return
200
 
201
 
202
  @app.cell
203
- def __(mo):
204
- mo.md("""**🌊 Some UI elements.** Try interacting with the below elements.""")
205
- return
206
-
 
 
 
 
207
 
208
- @app.cell
209
- def __(mo):
210
- icon = mo.ui.dropdown(["πŸƒ", "🌊", "✨"], value="πŸƒ")
211
- return (icon,)
212
 
213
 
214
  @app.cell
215
- def __(icon, mo):
216
- repetitions = mo.ui.slider(1, 16, label=f"number of {icon.value}: ")
217
- return (repetitions,)
 
 
 
 
 
 
 
 
 
 
 
218
 
 
 
 
 
 
 
 
 
219
 
220
- @app.cell
221
- def __(icon, repetitions):
222
- icon, repetitions
223
- return
 
224
 
 
 
 
225
 
226
- @app.cell
227
- def __(icon, mo, repetitions):
228
- mo.md("# " + icon.value * repetitions.value)
229
- return
230
 
231
 
232
- @app.cell(hide_code=True)
233
  def __(mo):
234
- mo.md(
235
- """
236
- ## 3. marimo is just Python
237
-
238
- marimo cells parse Python (and only Python), and marimo notebooks are
239
- stored as pure Python files β€” outputs are _not_ included. There's no
240
- magical syntax.
241
-
242
- The Python files generated by marimo are:
243
-
244
- - easily versioned with git, yielding minimal diffs
245
- - legible for both humans and machines
246
- - formattable using your tool of choice,
247
- - usable as Python scripts, with UI elements taking their default
248
- values, and
249
- - importable by other modules (more on that in the future).
250
- """
251
  )
252
- return
 
 
 
 
 
253
 
254
 
255
- @app.cell(hide_code=True)
256
  def __(mo):
257
  mo.md(
258
- """
259
- ## 4. Running notebooks as apps
260
-
261
- marimo notebooks can double as apps. Click the app window icon in the
262
- bottom-right to see this notebook in "app view."
263
 
264
- Serve a notebook as an app with `marimo run` at the command-line.
265
- Of course, you can use marimo just to level-up your
266
- notebooking, without ever making apps.
267
  """
268
  )
269
  return
270
 
271
 
272
- @app.cell(hide_code=True)
273
- def __(mo):
274
- mo.md(
275
- """
276
- ## 5. The `marimo` command-line tool
277
-
278
- **Creating and editing notebooks.** Use
279
-
280
- ```
281
- marimo edit
282
- ```
283
-
284
- in a terminal to start the marimo notebook server. From here
285
- you can create a new notebook or edit existing ones.
286
-
287
-
288
- **Running as apps.** Use
 
 
 
289
 
290
- ```
291
- marimo run notebook.py
292
- ```
293
 
294
- to start a webserver that serves your notebook as an app in read-only mode,
295
- with code cells hidden.
 
 
296
 
297
- **Convert a Jupyter notebook.** Convert a Jupyter notebook to a marimo
298
- notebook using `marimo convert`:
299
 
300
- ```
301
- marimo convert your_notebook.ipynb > your_app.py
302
- ```
303
-
304
- **Tutorials.** marimo comes packaged with tutorials:
 
 
 
 
 
 
 
 
305
 
306
- - `dataflow`: more on marimo's automatic execution
307
- - `ui`: how to use UI elements
308
- - `markdown`: how to write markdown, with interpolated values and
309
- LaTeX
310
- - `plots`: how plotting works in marimo
311
- - `sql`: how to use SQL
312
- - `layout`: layout elements in marimo
313
- - `fileformat`: how marimo's file format works
314
- - `markdown-format`: for using `.md` files in marimo
315
- - `for-jupyter-users`: if you are coming from Jupyter
316
 
317
- Start a tutorial with `marimo tutorial`; for example,
 
 
 
 
 
318
 
319
- ```
320
- marimo tutorial dataflow
321
- ```
322
 
323
- In addition to tutorials, we have examples in our
324
- [our GitHub repo](https://www.github.com/marimo-team/marimo/tree/main/examples).
325
- """
 
 
 
 
 
 
 
 
326
  )
327
- return
 
 
328
 
329
 
330
  @app.cell(hide_code=True)
331
  def __(mo):
332
  mo.md(
333
- """
334
- ## 6. The marimo editor
335
 
336
- Here are some tips to help you get started with the marimo editor.
337
  """
338
  )
339
  return
340
 
341
 
342
  @app.cell
343
- def __(mo, tips):
344
- mo.accordion(tips)
345
  return
346
 
347
 
348
- @app.cell(hide_code=True)
349
- def __(mo):
350
- mo.md("""## Finally, a fun fact""")
351
  return
352
 
353
 
354
- @app.cell(hide_code=True)
355
  def __(mo):
356
- mo.md(
357
- """
358
- The name "marimo" is a reference to a type of algae that, under
359
- the right conditions, clumps together to form a small sphere
360
- called a "marimo moss ball". Made of just strands of algae, these
361
- beloved assemblages are greater than the sum of their parts.
362
- """
363
- )
364
  return
365
 
366
 
367
- @app.cell(hide_code=True)
368
  def __():
369
- tips = {
370
- "Saving": (
371
- """
372
- **Saving**
373
-
374
- - _Name_ your app using the box at the top of the screen, or
375
- with `Ctrl/Cmd+s`. You can also create a named app at the
376
- command line, e.g., `marimo edit app_name.py`.
377
-
378
- - _Save_ by clicking the save icon on the bottom right, or by
379
- inputting `Ctrl/Cmd+s`. By default marimo is configured
380
- to autosave.
381
- """
382
- ),
383
- "Running": (
384
- """
385
- 1. _Run a cell_ by clicking the play ( β–· ) button on the top
386
- right of a cell, or by inputting `Ctrl/Cmd+Enter`.
387
-
388
- 2. _Run a stale cell_ by clicking the yellow run button on the
389
- right of the cell, or by inputting `Ctrl/Cmd+Enter`. A cell is
390
- stale when its code has been modified but not run.
391
-
392
- 3. _Run all stale cells_ by clicking the play ( β–· ) button on
393
- the bottom right of the screen, or input `Ctrl/Cmd+Shift+r`.
394
- """
395
- ),
396
- "Console Output": (
397
- """
398
- Console output (e.g., `print()` statements) is shown below a
399
- cell.
400
- """
401
- ),
402
- "Creating, Moving, and Deleting Cells": (
403
- """
404
- 1. _Create_ a new cell above or below a given one by clicking
405
- the plus button to the left of the cell, which appears on
406
- mouse hover.
407
-
408
- 2. _Move_ a cell up or down by dragging on the handle to the
409
- right of the cell, which appears on mouse hover.
410
-
411
- 3. _Delete_ a cell by clicking the trash bin icon. Bring it
412
- back by clicking the undo button on the bottom right of the
413
- screen, or with `Ctrl/Cmd+Shift+z`.
414
- """
415
- ),
416
- "Disabling Automatic Execution": (
417
- """
418
- Via the notebook settings (gear icon) or footer panel, you
419
- can disable automatic execution. This is helpful when
420
- working with expensive notebooks or notebooks that have
421
- side-effects like database transactions.
422
- """
423
- ),
424
- "Disabling Cells": (
425
- """
426
- You can disable a cell via the cell context menu.
427
- marimo will never run a disabled cell or any cells that depend on it.
428
- This can help prevent accidental execution of expensive computations
429
- when editing a notebook.
430
- """
431
- ),
432
- "Code Folding": (
433
- """
434
- You can collapse or fold the code in a cell by clicking the arrow
435
- icons in the line number column to the left, or by using keyboard
436
- shortcuts.
437
-
438
- Use the command palette (`Ctrl/Cmd+k`) or a keyboard shortcut to
439
- quickly fold or unfold all cells.
440
- """
441
- ),
442
- "Code Formatting": (
443
- """
444
- If you have [ruff](https://github.com/astral-sh/ruff) installed,
445
- you can format a cell with the keyboard shortcut `Ctrl/Cmd+b`.
446
- """
447
- ),
448
- "Command Palette": (
449
- """
450
- Use `Ctrl/Cmd+k` to open the command palette.
451
- """
452
- ),
453
- "Keyboard Shortcuts": (
454
- """
455
- Open the notebook menu (top-right) or input `Ctrl/Cmd+Shift+h` to
456
- view a list of all keyboard shortcuts.
457
- """
458
- ),
459
- "Configuration": (
460
- """
461
- Configure the editor by clicking the gears icon near the top-right
462
- of the screen.
463
- """
464
- ),
465
- }
466
- return (tips,)
467
 
468
 
469
  if __name__ == "__main__":
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "altair==5.4.1",
5
+ # "duckdb==1.1.3",
6
+ # "hdbscan==0.8.39",
7
+ # "marimo",
8
+ # "numba==0.60.0",
9
+ # "numpy==2.0.2",
10
+ # "polars==1.17.1",
11
+ # "pyarrow==18.0.0",
12
+ # "scikit-learn==1.5.2",
13
+ # "umap-learn==0.5.7",
14
+ # ]
15
+ # ///
16
+
17
  import marimo
18
 
19
+ __generated_with = "0.9.33"
20
+ app = marimo.App(width="medium")
21
 
22
 
23
  @app.cell
24
  def __():
25
  import marimo as mo
26
 
 
27
  return (mo,)
28
 
29
 
30
+ @app.cell(hide_code=True)
31
  def __(mo):
 
 
 
 
 
 
32
  mo.md(
33
+ r"""
34
+ # Visualizing text embeddings using MotherDuck and marimo
35
 
36
+ > Text embeddings have become a crucial tool in AI/ML applications, allowing us to convert text into numerical vectors that capture semantic meaning. These vectors are often used for semantic search, but in this blog post, we'll explore how to visualize and explore text embeddings interactively using MotherDuck and marimo.
 
 
37
 
38
+ [_Read the full blog here._](https://motherduck.com/blog/MotherDuck-Visualize-Embeddings-Marimo/)
39
  """
40
  )
41
  return
42
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  @app.cell(hide_code=True)
45
  def __(mo):
46
  mo.md(
47
  """
48
+ ## Connecting to MotherDuck and Loading Sample Data
 
 
 
49
 
50
+ This data has already been pre-computed, but you can fork and edit this notebook to run with your own data!
 
 
51
 
52
+ ```sql
53
+ ATTACH IF NOT EXISTS 'md:my_db'
54
+ SELECT * FROM my_db.demo_with_embeddings;
55
+ ```
56
  """
57
  )
58
  return
59
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  @app.cell
 
 
 
 
 
 
62
  def __(mo):
63
+ _df = mo.sql(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  """
65
+ ATTACH IF NOT EXISTS 'md:my_db'
 
 
66
  """
67
  )
68
+ return (my_db,)
69
 
70
 
71
+ @app.cell
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  def __(mo):
73
+ _df = mo.sql(
74
  """
75
+ -- Commented out as we have already run the embeddings for showcasing purposes.
76
+
77
+ -- CREATE OR REPLACE TABLE my_db.demo_embedding_data AS
78
+ -- SELECT DISTINCT ON (url) * -- Remove duplicate URLs
79
+ -- FROM 'hf://datasets/julien040/hacker-news-posts/story.parquet'
80
+ -- WHERE contains(title, 'database') -- Filter for posts about databases
81
+ -- AND score > 5 -- Only include popular posts
82
+ -- LIMIT 50000;
 
83
  """
84
  )
85
  return
86
 
87
 
88
  @app.cell
89
+ def __(demo_with_embeddings, mo, my_db):
90
+ embeddings = mo.sql(
91
+ f"""
92
+ -- Commented out as we have already run the embeddings for showcasing purposes.
93
+ -- CREATE TABLE my_db.demo_with_embeddings AS
94
+ -- SELECT *, embedding(title) as text_embedding
95
+ -- FROM my_db.demo_embedding_data
96
+ -- LIMIT 1500;
97
 
98
+ SELECT title, text_embedding, * EXCLUDE(id, title, text_embedding, comments) FROM my_db.demo_with_embeddings;
99
+ """
100
+ )
101
+ return (embeddings,)
102
 
103
 
104
  @app.cell
105
+ def __(PCA, hdbscan, np, umap):
106
+ def umap_reduce(np_array, metric="cosine"):
107
+ """
108
+ Reduce the dimensionality of the embeddings to 2D using
109
+ UMAP algorithm. UMAP preserves both local and global structure
110
+ of the high-dimensional data.
111
+ """
112
+ reducer = umap.UMAP(
113
+ n_components=2, # Reduce to 2D for visualization
114
+ metric=metric, # Default: cosine similarity for text embeddings
115
+ n_neighbors=80, # Higher values = more global structure
116
+ min_dist=0.1, # Controls how tightly points cluster
117
+ )
118
+ return reducer.fit_transform(np_array)
119
 
120
+ def cluster_points(np_array, min_cluster_size=4, max_cluster_size=50):
121
+ """
122
+ Cluster the embeddings using HDBSCAN algorithm.
123
+ We first reduce dimensionality to 50D with PCA to speed up clustering,
124
+ while still preserving most of the important information.
125
+ """
126
+ pca = PCA(n_components=50)
127
+ np_array = pca.fit_transform(np_array)
128
 
129
+ hdb = hdbscan.HDBSCAN(
130
+ min_samples=3, # Minimum points to form dense region
131
+ min_cluster_size=min_cluster_size, # Minimum size of a cluster
132
+ max_cluster_size=max_cluster_size, # Maximum size of a cluster
133
+ ).fit(np_array)
134
 
135
+ return np.where(
136
+ hdb.labels_ == -1, "outlier", "cluster_" + hdb.labels_.astype(str)
137
+ )
138
 
139
+ return cluster_points, umap_reduce
 
 
 
140
 
141
 
142
+ @app.cell
143
  def __(mo):
144
+ cluster_size_slider = mo.ui.range_slider(
145
+ start=1,
146
+ stop=80,
147
+ value=(4, 50),
148
+ step=1,
149
+ show_value=True,
150
+ debounce=True,
151
+ label="Cluster Size (min, max)",
 
 
 
 
 
 
 
 
 
152
  )
153
+ metric_dropdown = mo.ui.dropdown(
154
+ ["cosine", "euclidean", "manhattan", "mahalanobis"],
155
+ value="cosine",
156
+ label="Distance Metric",
157
+ )
158
+ return cluster_size_slider, metric_dropdown
159
 
160
 
161
+ @app.cell
162
  def __(mo):
163
  mo.md(
164
+ r"""
165
+ ## Processing the Data
 
 
 
166
 
167
+ Now we'll transform our high-dimensional embeddings into something we can visualize, using `umap_reduce` and `cluster_points`. More details on this step [in the blog](https://motherduck.com/blog/MotherDuck-Visualize-Embeddings-Marimo/).
 
 
168
  """
169
  )
170
  return
171
 
172
 
173
+ @app.cell
174
+ def __(
175
+ cluster_points,
176
+ cluster_size_slider,
177
+ embeddings,
178
+ metric_dropdown,
179
+ mo,
180
+ umap_reduce,
181
+ ):
182
+ with mo.status.spinner("Clustering points...") as _s:
183
+ embeddings_array = embeddings["text_embedding"].to_numpy()
184
+ hdb_labels = cluster_points(
185
+ embeddings_array,
186
+ min_cluster_size=cluster_size_slider.value[0],
187
+ max_cluster_size=cluster_size_slider.value[1],
188
+ )
189
+ _s.update("Reducing dimensionality...")
190
+ embeddings_2d = umap_reduce(embeddings_array, metric=metric_dropdown.value)
191
+ mo.show_code()
192
+ return embeddings_2d, embeddings_array, hdb_labels
193
 
 
 
 
194
 
195
+ @app.cell
196
+ def __(cluster_size_slider, metric_dropdown, mo):
197
+ mo.hstack([cluster_size_slider, metric_dropdown])
198
+ return
199
 
 
 
200
 
201
+ @app.cell
202
+ def __(embeddings, embeddings_2d, hdb_labels, pl):
203
+ data = embeddings.lazy() # Lazy evaluation for performance
204
+ data = data.with_columns(
205
+ text_embedding_2d_1=embeddings_2d[:, 0],
206
+ text_embedding_2d_2=embeddings_2d[:, 1],
207
+ cluster=hdb_labels,
208
+ )
209
+ data = data.unique(subset=["url"], maintain_order=True) # Remove duplicate URLs
210
+ data = data.drop(["text_embedding"]) # Drop unused columns
211
+ data = data.filter(pl.col("cluster") != "outlier") # Filter out outliers
212
+ data = data.collect() # Collect the data
213
+ return (data,)
214
 
 
 
 
 
 
 
 
 
 
 
215
 
216
+ @app.cell
217
+ def __(data):
218
+ data.select(
219
+ "title", "cluster", "text_embedding_2d_1", "text_embedding_2d_2", "score"
220
+ )
221
+ return
222
 
 
 
 
223
 
224
+ @app.cell
225
+ def __(alt, data, mo):
226
+ chart = (
227
+ alt.Chart(data)
228
+ .mark_point()
229
+ .encode(
230
+ x=alt.X("text_embedding_2d_1").scale(zero=False),
231
+ y=alt.Y("text_embedding_2d_2").scale(zero=False),
232
+ color="cluster",
233
+ tooltip=["title", "score", "cluster"],
234
+ )
235
  )
236
+ chart = mo.ui.altair_chart(chart)
237
+ mo.show_code()
238
+ return (chart,)
239
 
240
 
241
  @app.cell(hide_code=True)
242
  def __(mo):
243
  mo.md(
244
+ r"""
245
+ ## Creating an Interactive Visualization
246
 
247
+ We will plot the 2D representation of the text embeddings, colored by the clusters identified by HDBSCAN. You can select points on the chart to explore the text embeddings further. πŸ‘‡
248
  """
249
  )
250
  return
251
 
252
 
253
  @app.cell
254
+ def __(chart):
255
+ chart
256
  return
257
 
258
 
259
+ @app.cell
260
+ def __(chart):
261
+ chart.value
262
  return
263
 
264
 
265
+ @app.cell
266
  def __(mo):
267
+ # Empty space for the table
268
+ mo.Html("<div style='height: 400px;'></div>")
 
 
 
 
 
 
269
  return
270
 
271
 
272
+ @app.cell
273
  def __():
274
+ # Data manipulation and database connections
275
+ import polars as pl
276
+ import duckdb
277
+ import numba # <- FYI, this module takes a while to load, be patient
278
+ import pyarrow
279
+
280
+ # Visualization
281
+ import altair as alt
282
+
283
+ # ML tools for dimensionality reduction and clustering
284
+ import umap # For reducing high-dimensional embeddings to 2D
285
+ import hdbscan # For clustering similar embeddings
286
+ import numpy as np
287
+ from sklearn.decomposition import PCA
288
+
289
+ return PCA, alt, duckdb, hdbscan, np, numba, pl, pyarrow, umap
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
 
292
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,5 +1,10 @@
 
 
 
1
  marimo
2
- # Or a specific version
3
- # marimo>=0.9.0
4
-
5
- # Add other dependencies as needed
 
 
 
1
+ altair==5.4.1
2
+ duckdb==1.1.3
3
+ hdbscan==0.8.39
4
  marimo
5
+ numba==0.60.0
6
+ numpy==2.0.2
7
+ polars==1.17.1
8
+ pyarrow==18.0.0
9
+ scikit-learn==1.5.2
10
+ umap-learn==0.5.7