koaning commited on
Commit
36c6cb2
ยท
1 Parent(s): ceba672
Files changed (3) hide show
  1. README.md +8 -0
  2. app.py +215 -393
  3. spam.csv +0 -0
README.md CHANGED
@@ -11,3 +11,11 @@ short_description: A bulk labelling interface for binary text classification
11
 
12
  Check out marimo at <https://github.com/marimo-team/marimo>
13
  Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
 
 
 
 
 
 
 
 
 
11
 
12
  Check out marimo at <https://github.com/marimo-team/marimo>
13
  Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
14
+
15
+ Make sure you set your [access token](https://huggingface.co/blog/password-git-deprecation) before pushing/pulling.
16
+
17
+ ```bash
18
+ git remote set-url origin https://<user_name>:<token>@huggingface.co/<repo_path>
19
+ git pull origin
20
+ git push origin
21
+ ```
app.py CHANGED
@@ -1,469 +1,291 @@
1
- import marimo
2
-
3
- __generated_with = "0.9.2"
4
- app = marimo.App()
5
-
6
-
7
- @app.cell
8
- def __():
9
- import marimo as mo
10
-
11
- mo.md("# Welcome to marimo! Updated! ๐ŸŒŠ๐Ÿƒ")
12
- return (mo,)
 
 
13
 
 
14
 
15
- @app.cell
16
- def __(mo):
17
- slider = mo.ui.slider(1, 22)
18
- return (slider,)
19
 
20
 
21
  @app.cell
22
- def __(mo, slider):
23
- mo.md(
24
- f"""
25
- marimo is a **reactive** Python notebook.
26
-
27
- This means that unlike traditional notebooks, marimo notebooks **run
28
- automatically** when you modify them or
29
- interact with UI elements, like this slider: {slider}.
30
-
31
- {"##" + "๐Ÿƒ" * slider.value}
32
- """
33
- )
34
  return
35
 
36
 
37
- @app.cell(hide_code=True)
38
- def __(mo):
39
- mo.accordion(
40
- {
41
- "Tip: disabling automatic execution": mo.md(
42
- rf"""
43
- marimo lets you disable automatic execution: just go into the
44
- notebook settings and set
45
-
46
- "Runtime > On Cell Change" to "lazy".
47
-
48
- When the runtime is lazy, after running a cell, marimo marks its
49
- descendants as stale instead of automatically running them. The
50
- lazy runtime puts you in control over when cells are run, while
51
- still giving guarantees about the notebook state.
52
- """
53
- )
54
- }
55
- )
56
- return
57
-
58
 
59
- @app.cell(hide_code=True)
60
- def __(mo):
61
- mo.md(
62
- """
63
- Tip: This is a tutorial notebook. You can create your own notebooks
64
- by entering `marimo edit` at the command line.
65
- """
66
- ).callout()
67
- return
68
 
69
 
70
- @app.cell(hide_code=True)
71
- def __(mo):
72
- mo.md(
73
- """
74
- ## 1. Reactive execution
75
 
76
- A marimo notebook is made up of small blocks of Python code called
77
- cells.
78
 
79
- marimo reads your cells and models the dependencies among them: whenever
80
- a cell that defines a global variable is run, marimo
81
- **automatically runs** all cells that reference that variable.
 
 
82
 
83
- Reactivity keeps your program state and outputs in sync with your code,
84
- making for a dynamic programming environment that prevents bugs before they
85
- happen.
86
- """
87
- )
88
- return
89
 
 
 
 
90
 
91
- @app.cell(hide_code=True)
92
- def __(changed, mo):
93
- (
94
- mo.md(
95
- f"""
96
- **โœจ Nice!** The value of `changed` is now {changed}.
97
-
98
- When you updated the value of the variable `changed`, marimo
99
- **reacted** by running this cell automatically, because this cell
100
- references the global variable `changed`.
101
-
102
- Reactivity ensures that your notebook state is always
103
- consistent, which is crucial for doing good science; it's also what
104
- enables marimo notebooks to double as tools and apps.
105
- """
106
- )
107
- if changed
108
- else mo.md(
109
- """
110
- **๐ŸŒŠ See it in action.** In the next cell, change the value of the
111
- variable `changed` to `True`, then click the run button.
112
- """
113
- )
114
- )
115
- return
116
 
117
 
118
  @app.cell
119
- def __():
120
- changed = False
121
- return (changed,)
122
-
123
-
124
- @app.cell(hide_code=True)
125
- def __(mo):
126
- mo.accordion(
127
- {
128
- "Tip: execution order": (
129
- """
130
- The order of cells on the page has no bearing on
131
- the order in which cells are executed: marimo knows that a cell
132
- reading a variable must run after the cell that defines it. This
133
- frees you to organize your code in the way that makes the most
134
- sense for you.
135
- """
136
- )
137
- }
138
- )
139
- return
140
 
141
 
142
- @app.cell(hide_code=True)
143
- def __(mo):
144
- mo.md(
145
- """
146
- **Global names must be unique.** To enable reactivity, marimo imposes a
147
- constraint on how names appear in cells: no two cells may define the same
148
- variable.
149
- """
150
- )
151
- return
152
 
 
 
 
153
 
154
- @app.cell(hide_code=True)
155
- def __(mo):
156
- mo.accordion(
157
- {
158
- "Tip: encapsulation": (
159
- """
160
- By encapsulating logic in functions, classes, or Python modules,
161
- you can minimize the number of global variables in your notebook.
162
- """
163
- )
164
- }
165
- )
166
- return
167
 
 
 
 
 
 
 
168
 
169
- @app.cell(hide_code=True)
170
- def __(mo):
171
- mo.accordion(
172
- {
173
- "Tip: private variables": (
174
- """
175
- Variables prefixed with an underscore are "private" to a cell, so
176
- they can be defined by multiple cells.
177
- """
178
- )
179
- }
180
- )
181
- return
182
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- @app.cell(hide_code=True)
185
- def __(mo):
186
- mo.md(
187
- """
188
- ## 2. UI elements
189
 
190
- Cells can output interactive UI elements. Interacting with a UI
191
- element **automatically triggers notebook execution**: when
192
- you interact with a UI element, its value is sent back to Python, and
193
- every cell that references that element is re-run.
194
 
195
- marimo provides a library of UI elements to choose from under
196
- `marimo.ui`.
197
- """
198
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  return
200
 
201
 
202
  @app.cell
203
- def __(mo):
204
- mo.md("""**๐ŸŒŠ Some UI elements.** Try interacting with the below elements.""")
205
  return
206
 
207
 
208
  @app.cell
209
- def __(mo):
210
- icon = mo.ui.dropdown(["๐Ÿƒ", "๐ŸŒŠ", "โœจ"], value="๐Ÿƒ")
211
- return (icon,)
 
 
 
 
212
 
213
 
214
  @app.cell
215
- def __(icon, mo):
216
- repetitions = mo.ui.slider(1, 16, label=f"number of {icon.value}: ")
217
- return (repetitions,)
218
 
219
 
220
  @app.cell
221
- def __(icon, repetitions):
222
- icon, repetitions
223
  return
224
 
225
 
226
  @app.cell
227
- def __(icon, mo, repetitions):
228
- mo.md("# " + icon.value * repetitions.value)
229
- return
230
-
231
-
232
- @app.cell(hide_code=True)
233
- def __(mo):
234
- mo.md(
235
- """
236
- ## 3. marimo is just Python
237
-
238
- marimo cells parse Python (and only Python), and marimo notebooks are
239
- stored as pure Python files โ€” outputs are _not_ included. There's no
240
- magical syntax.
241
-
242
- The Python files generated by marimo are:
243
-
244
- - easily versioned with git, yielding minimal diffs
245
- - legible for both humans and machines
246
- - formattable using your tool of choice,
247
- - usable as Python scripts, with UI elements taking their default
248
- values, and
249
- - importable by other modules (more on that in the future).
250
- """
251
- )
252
- return
253
 
254
 
255
- @app.cell(hide_code=True)
256
- def __(mo):
257
- mo.md(
258
- """
259
- ## 4. Running notebooks as apps
260
-
261
- marimo notebooks can double as apps. Click the app window icon in the
262
- bottom-right to see this notebook in "app view."
263
-
264
- Serve a notebook as an app with `marimo run` at the command-line.
265
- Of course, you can use marimo just to level-up your
266
- notebooking, without ever making apps.
267
- """
268
- )
269
- return
270
-
271
-
272
- @app.cell(hide_code=True)
273
- def __(mo):
274
- mo.md(
275
- """
276
- ## 5. The `marimo` command-line tool
277
-
278
- **Creating and editing notebooks.** Use
279
 
280
- ```
281
- marimo edit
282
- ```
283
 
284
- in a terminal to start the marimo notebook server. From here
285
- you can create a new notebook or edit existing ones.
 
286
 
 
 
287
 
288
- **Running as apps.** Use
 
289
 
290
- ```
291
- marimo run notebook.py
292
- ```
293
 
294
- to start a webserver that serves your notebook as an app in read-only mode,
295
- with code cells hidden.
 
296
 
297
- **Convert a Jupyter notebook.** Convert a Jupyter notebook to a marimo
298
- notebook using `marimo convert`:
299
 
300
- ```
301
- marimo convert your_notebook.ipynb > your_app.py
302
- ```
 
 
 
 
303
 
304
- **Tutorials.** marimo comes packaged with tutorials:
305
 
306
- - `dataflow`: more on marimo's automatic execution
307
- - `ui`: how to use UI elements
308
- - `markdown`: how to write markdown, with interpolated values and
309
- LaTeX
310
- - `plots`: how plotting works in marimo
311
- - `sql`: how to use SQL
312
- - `layout`: layout elements in marimo
313
- - `fileformat`: how marimo's file format works
314
- - `markdown-format`: for using `.md` files in marimo
315
- - `for-jupyter-users`: if you are coming from Jupyter
316
 
317
- Start a tutorial with `marimo tutorial`; for example,
318
 
319
- ```
320
- marimo tutorial dataflow
321
- ```
 
322
 
323
- In addition to tutorials, we have examples in our
324
- [our GitHub repo](https://www.github.com/marimo-team/marimo/tree/main/examples).
325
- """
326
- )
327
- return
328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
 
330
- @app.cell(hide_code=True)
331
- def __(mo):
332
- mo.md(
333
- """
334
- ## 6. The marimo editor
335
 
336
- Here are some tips to help you get started with the marimo editor.
337
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  )
339
- return
340
 
 
 
 
 
341
 
342
- @app.cell
343
- def __(mo, tips):
344
- mo.accordion(tips)
345
- return
346
 
 
 
 
 
 
 
 
 
347
 
348
- @app.cell(hide_code=True)
349
- def __(mo):
350
- mo.md("""## Finally, a fun fact""")
351
- return
352
 
353
 
354
- @app.cell(hide_code=True)
355
- def __(mo):
356
- mo.md(
357
- """
358
- The name "marimo" is a reference to a type of algae that, under
359
- the right conditions, clumps together to form a small sphere
360
- called a "marimo moss ball". Made of just strands of algae, these
361
- beloved assemblages are greater than the sum of their parts.
362
- """
 
 
 
 
 
 
 
 
363
  )
364
- return
365
-
366
-
367
- @app.cell(hide_code=True)
368
- def __():
369
- tips = {
370
- "Saving": (
371
- """
372
- **Saving**
373
-
374
- - _Name_ your app using the box at the top of the screen, or
375
- with `Ctrl/Cmd+s`. You can also create a named app at the
376
- command line, e.g., `marimo edit app_name.py`.
377
-
378
- - _Save_ by clicking the save icon on the bottom right, or by
379
- inputting `Ctrl/Cmd+s`. By default marimo is configured
380
- to autosave.
381
- """
382
- ),
383
- "Running": (
384
- """
385
- 1. _Run a cell_ by clicking the play ( โ–ท ) button on the top
386
- right of a cell, or by inputting `Ctrl/Cmd+Enter`.
387
-
388
- 2. _Run a stale cell_ by clicking the yellow run button on the
389
- right of the cell, or by inputting `Ctrl/Cmd+Enter`. A cell is
390
- stale when its code has been modified but not run.
391
-
392
- 3. _Run all stale cells_ by clicking the play ( โ–ท ) button on
393
- the bottom right of the screen, or input `Ctrl/Cmd+Shift+r`.
394
- """
395
- ),
396
- "Console Output": (
397
- """
398
- Console output (e.g., `print()` statements) is shown below a
399
- cell.
400
- """
401
- ),
402
- "Creating, Moving, and Deleting Cells": (
403
- """
404
- 1. _Create_ a new cell above or below a given one by clicking
405
- the plus button to the left of the cell, which appears on
406
- mouse hover.
407
-
408
- 2. _Move_ a cell up or down by dragging on the handle to the
409
- right of the cell, which appears on mouse hover.
410
-
411
- 3. _Delete_ a cell by clicking the trash bin icon. Bring it
412
- back by clicking the undo button on the bottom right of the
413
- screen, or with `Ctrl/Cmd+Shift+z`.
414
- """
415
- ),
416
- "Disabling Automatic Execution": (
417
- """
418
- Via the notebook settings (gear icon) or footer panel, you
419
- can disable automatic execution. This is helpful when
420
- working with expensive notebooks or notebooks that have
421
- side-effects like database transactions.
422
- """
423
- ),
424
- "Disabling Cells": (
425
- """
426
- You can disable a cell via the cell context menu.
427
- marimo will never run a disabled cell or any cells that depend on it.
428
- This can help prevent accidental execution of expensive computations
429
- when editing a notebook.
430
- """
431
- ),
432
- "Code Folding": (
433
- """
434
- You can collapse or fold the code in a cell by clicking the arrow
435
- icons in the line number column to the left, or by using keyboard
436
- shortcuts.
437
-
438
- Use the command palette (`Ctrl/Cmd+k`) or a keyboard shortcut to
439
- quickly fold or unfold all cells.
440
- """
441
- ),
442
- "Code Formatting": (
443
- """
444
- If you have [ruff](https://github.com/astral-sh/ruff) installed,
445
- you can format a cell with the keyboard shortcut `Ctrl/Cmd+b`.
446
- """
447
- ),
448
- "Command Palette": (
449
- """
450
- Use `Ctrl/Cmd+k` to open the command palette.
451
- """
452
- ),
453
- "Keyboard Shortcuts": (
454
- """
455
- Open the notebook menu (top-right) or input `Ctrl/Cmd+Shift+h` to
456
- view a list of all keyboard shortcuts.
457
- """
458
- ),
459
- "Configuration": (
460
- """
461
- Configure the editor by clicking the gears icon near the top-right
462
- of the screen.
463
- """
464
- ),
465
- }
466
- return (tips,)
467
 
468
 
469
  if __name__ == "__main__":
 
1
+ # /// script
2
+ # requires-python = "==3.10"
3
+ # dependencies = [
4
+ # "marimo",
5
+ # "polars==1.23.0",
6
+ # "sentence-transformers==3.4.1",
7
+ # "umap-learn==0.5.7",
8
+ # "llvmlite==0.44.0",
9
+ # "altair==5.5.0",
10
+ # "scikit-learn==1.6.1",
11
+ # "numpy==2.1.3",
12
+ # "mohtml==0.1.2",
13
+ # ]
14
+ # ///
15
 
16
+ import marimo
17
 
18
+ __generated_with = "0.11.9"
19
+ app = marimo.App(width="medium")
 
 
20
 
21
 
22
  @app.cell
23
+ def _(mo):
24
+ mo.md("""### Bulk labelling demo""")
 
 
 
 
 
 
 
 
 
 
25
  return
26
 
27
 
28
+ @app.cell
29
+ def _(mo, use_default_switch):
30
+ mo.stop(use_default_switch.value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ uploaded_file = mo.ui.file(kind="area")
33
+ uploaded_file
34
+ return (uploaded_file,)
 
 
 
 
 
 
35
 
36
 
37
+ @app.cell
38
+ def _(mo):
39
+ use_default_switch = mo.ui.switch(False, label="Use default dataset")
40
+ use_default_switch
41
+ return (use_default_switch,)
42
 
 
 
43
 
44
+ @app.cell
45
+ def _(mo):
46
+ pos_label = mo.ui.text("pos", placeholder="positive label name")
47
+ neg_label = mo.ui.text("neg", placeholder="negative label name")
48
+ return neg_label, pos_label
49
 
 
 
 
 
 
 
50
 
51
+ @app.cell
52
+ def _(mo, pl, uploaded_f7ile, uploaded_file, use_default_switch):
53
+ mo.stop(not use_default_switch.value and len(uploaded_file.value) == 0 , mo.md("**Submit a dataset or use default one to continue.**"))
54
 
55
+ if use_default_switch.value:
56
+ df = pl.read_csv("spam.csv")
57
+ else:
58
+ df = pl.read_csv(uploaded_f7ile.value[0].contents)
59
+
60
+ texts = df["text"].to_list()
61
+ return df, texts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
 
64
  @app.cell
65
+ def _(SentenceTransformer, mo, texts):
66
+ with mo.status.spinner(subtitle="Creating embeddings ...") as _spinner:
67
+ tfm = SentenceTransformer("all-MiniLM-L6-v2")
68
+ X = tfm.encode(texts)
69
+ return X, tfm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
+ @app.cell
73
+ def _(X, mo):
74
+ with mo.status.spinner(subtitle="Running UMAP ...") as _spinner:
75
+ from umap import UMAP
 
 
 
 
 
 
76
 
77
+ umap_tfm = UMAP()
78
+ X_tfm = umap_tfm.fit_transform(X)
79
+ return UMAP, X_tfm, umap_tfm
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ @app.cell
83
+ def _(add_label, mo, neg_label, pos_label, undo):
84
+ btn_spam = mo.ui.button(label=f"Annotate {neg_label.value}", on_click=lambda d: add_label(neg_label.value))
85
+ btn_ham = mo.ui.button(label=f"Annotate {pos_label.value}", on_click=lambda d: add_label(pos_label.value))
86
+ btn_undo = mo.ui.button(label="Undo", on_click=lambda d: undo())
87
+ return btn_ham, btn_spam, btn_undo
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ @app.cell
91
+ def _(chart, get_label, neg_label, pos_label, set_label):
92
+ def add_label(lab):
93
+ current_labels = get_label()
94
+ if lab == neg_label.value:
95
+ new_ham = list(set(current_labels[pos_label.value]).difference(chart.value["index"]))
96
+ new_spam = list(set(current_labels[neg_label.value]).union(chart.value["index"]))
97
+ if lab == pos_label.value:
98
+ new_ham = list(set(current_labels[pos_label.value]).union(chart.value["index"]))
99
+ new_spam = list(set(current_labels[neg_label.value]).difference(chart.value["index"]))
100
 
101
+ set_label({neg_label.value: new_spam, pos_label.value: new_ham})
102
+ return (add_label,)
 
 
 
103
 
 
 
 
 
104
 
105
+ @app.cell
106
+ def _(
107
+ br,
108
+ btn_ham,
109
+ btn_spam,
110
+ btn_undo,
111
+ chart,
112
+ form,
113
+ json_download,
114
+ mo,
115
+ neg_label,
116
+ pos_label,
117
+ switch,
118
+ ):
119
+ mo.vstack([
120
+ mo.md("Assign label names"),
121
+ mo.hstack([pos_label, neg_label]),
122
+ mo.md("Explore the data"),
123
+ mo.hstack([btn_ham, btn_spam, btn_undo, switch, json_download]),
124
+ br(),
125
+ form if switch.value else "",
126
+ br() if switch.value else "",
127
+ chart
128
+ ])
129
  return
130
 
131
 
132
  @app.cell
133
+ def _(chart):
134
+ chart.value["text"]
135
  return
136
 
137
 
138
  @app.cell
139
+ def _(chart, get_label, neg_label, pos_label, set_label):
140
+ def undo():
141
+ current_labels = get_label()
142
+ new_spam = set(current_labels[neg_label.value]).difference(chart.value["index"])
143
+ new_ham = set(current_labels[pos_label.value]).difference(chart.value["index"])
144
+ set_label({neg_label.value: list(new_spam), pos_label.value: list(new_ham)})
145
+ return (undo,)
146
 
147
 
148
  @app.cell
149
+ def _():
150
+ from mohtml import br
151
+ return (br,)
152
 
153
 
154
  @app.cell
155
+ def _(get_label):
156
+ get_label()
157
  return
158
 
159
 
160
  @app.cell
161
+ def _(mo, neg_label, pos_label):
162
+ get_label, set_label = mo.state({pos_label.value: [], neg_label.value: []})
163
+ return get_label, set_label
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
 
166
+ @app.cell
167
+ def _(mo):
168
+ text_input = mo.ui.text_area(label="Reference sentences")
169
+ form = mo.md("""{text_input}""").batch(text_input=text_input).form()
170
+ return form, text_input
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
 
 
 
172
 
173
+ @app.cell
174
+ def _(df_emb, labels, mo):
175
+ from collections import Counter
176
 
177
+ with mo.status.spinner(subtitle="Starting UI ...") as _spinner:
178
+ df_emb
179
 
180
+ Counter(labels)
181
+ return (Counter,)
182
 
 
 
 
183
 
184
+ @app.cell
185
+ def _(df_emb, mo, pl):
186
+ import json
187
 
188
+ data = df_emb.filter(pl.col("label") != "unlabeled").select("text", "label").to_dicts()
 
189
 
190
+ json_download = mo.download(
191
+ data=json.dumps(data).encode("utf-8"),
192
+ filename="data.json",
193
+ mimetype="application/json",
194
+ label="Download JSON",
195
+ )
196
+ return data, json, json_download
197
 
 
198
 
199
+ @app.cell
200
+ def _(df_emb, mo, scatter):
201
+ chart = mo.ui.altair_chart(scatter(df_emb))
202
+ return (chart,)
 
 
 
 
 
 
203
 
 
204
 
205
+ @app.cell
206
+ def _(mo):
207
+ switch = mo.ui.switch(False, label="Use search")
208
+ return (switch,)
209
 
 
 
 
 
 
210
 
211
+ @app.cell
212
+ def _(alt, neg_label, pos_label, switch):
213
+ def scatter(df):
214
+ return (alt.Chart(df)
215
+ .mark_circle()
216
+ .encode(
217
+ x=alt.X("x:Q"),
218
+ y=alt.Y("y:Q"),
219
+ color=alt.Color("sim:Q") if switch.value else alt.Color("label:N", scale=alt.Scale(
220
+ domain=['unlabeled', pos_label.value, neg_label.value],
221
+ range=['steelblue', 'green', 'red']
222
+ ))
223
+ ).properties(width=500, height=500))
224
+ return (scatter,)
225
 
 
 
 
 
 
226
 
227
+ @app.cell
228
+ def _(
229
+ X,
230
+ X_tfm,
231
+ cosine_similarity,
232
+ form,
233
+ get_label,
234
+ neg_label,
235
+ np,
236
+ pl,
237
+ pos_label,
238
+ texts,
239
+ tfm,
240
+ ):
241
+ df_emb = (
242
+ pl.DataFrame({
243
+ "x": X_tfm[:, 0],
244
+ "y": X_tfm[:, 1],
245
+ "index": range(X.shape[0]),
246
+ "text": texts
247
+ }).with_columns(sim=pl.lit(1))
248
  )
 
249
 
250
+ if form.value:
251
+ query = tfm.encode([form.value["text_input"]])
252
+ similarity = cosine_similarity(query, X)[0]
253
+ df_emb = df_emb.with_columns(sim=similarity)
254
 
255
+ spam = set(get_label()[neg_label.value])
256
+ ham = set(get_label()[pos_label.value])
 
 
257
 
258
+ labels = []
259
+ for i in range(df_emb.shape[0]):
260
+ if i in spam:
261
+ labels.append(neg_label.value)
262
+ elif i in ham:
263
+ labels.append(pos_label.value)
264
+ else:
265
+ labels.append("unlabeled")
266
 
267
+ df_emb = df_emb.with_columns(label=np.array(labels))
268
+ return df_emb, ham, i, labels, query, similarity, spam
 
 
269
 
270
 
271
+ @app.cell
272
+ def _():
273
+ import marimo as mo
274
+ import polars as pl
275
+ from sentence_transformers import SentenceTransformer
276
+ import altair as alt
277
+ import numpy as np
278
+ from sklearn.metrics.pairwise import cosine_similarity
279
+ from sklearn.linear_model import LogisticRegression
280
+ return (
281
+ LogisticRegression,
282
+ SentenceTransformer,
283
+ alt,
284
+ cosine_similarity,
285
+ mo,
286
+ np,
287
+ pl,
288
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
 
291
  if __name__ == "__main__":
spam.csv ADDED
The diff for this file is too large to render. See raw diff