koaning commited on
Commit
917f380
·
verified ·
1 Parent(s): 3f1e457

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -50
app.py CHANGED
@@ -3,26 +3,23 @@
3
  # dependencies = [
4
  # "marimo",
5
  # "polars==1.23.0",
6
- # "sentence-transformers==3.4.1",
7
- # "umap-learn==0.5.7",
8
- # "llvmlite==0.44.0",
9
- # "altair==5.5.0",
10
  # "scikit-learn==1.6.1",
11
  # "numpy==2.1.3",
12
  # "mohtml==0.1.2",
13
  # "model2vec==0.4.0",
 
14
  # ]
15
  # ///
16
 
17
  import marimo
18
 
19
- __generated_with = "0.11.9"
20
  app = marimo.App()
21
 
22
 
23
  @app.cell
24
  def _(mo):
25
- mo.md("""### Bulk labelling demo""")
26
  return
27
 
28
 
@@ -42,8 +39,8 @@ def _(mo):
42
 
43
  @app.cell
44
  def _(mo):
45
- pos_label = mo.ui.text("pos", placeholder="positive label name")
46
- neg_label = mo.ui.text("neg", placeholder="negative label name")
47
  return neg_label, pos_label
48
 
49
 
@@ -55,7 +52,7 @@ def _(uploaded_file, use_default_switch):
55
 
56
  @app.cell
57
  def _(mo, pl, should_stop, uploaded_file, use_default_switch):
58
- mo.stop(form is None, "Need a text input to fetch example")
59
 
60
  if use_default_switch.value:
61
  df = pl.read_csv("spam.csv")
@@ -73,6 +70,16 @@ def _(StaticModel, mo):
73
  return (tfm,)
74
 
75
 
 
 
 
 
 
 
 
 
 
 
76
  @app.cell
77
  def _(mo, texts, tfm):
78
  with mo.status.spinner(subtitle="Creating embeddings ...") as _spinner:
@@ -81,7 +88,7 @@ def _(mo, texts, tfm):
81
 
82
 
83
  @app.cell
84
- def _(add_label, get_example, mo, neg_label, pos_label):
85
  btn_spam = mo.ui.button(
86
  label=f"Annotate {neg_label.value}",
87
  on_click=lambda d: add_label(get_example(), neg_label.value),
@@ -92,7 +99,12 @@ def _(add_label, get_example, mo, neg_label, pos_label):
92
  on_click=lambda d: add_label(get_example(), pos_label.value),
93
  keyboard_shortcut="Ctrl-K"
94
  )
95
- return btn_ham, btn_spam
 
 
 
 
 
96
 
97
 
98
  @app.cell
@@ -101,7 +113,11 @@ def _(gen, get_label, set_example, set_label):
101
  current_labels = get_label()
102
  set_label(current_labels + [{"text": text, "label": lab}])
103
  set_example(next(gen))
104
- return (add_label,)
 
 
 
 
105
 
106
 
107
  @app.cell
@@ -110,6 +126,23 @@ def _():
110
  return (br,)
111
 
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  @app.cell
114
  def _(mo):
115
  get_label, set_label = mo.state([])
@@ -122,23 +155,6 @@ def _(gen, mo):
122
  return get_example, set_example
123
 
124
 
125
- @app.cell
126
- def _(div, get_example, p):
127
- div(
128
- p(get_example()),
129
- klass="bg-gray-100 p-4 rounded-lg"
130
- )
131
- return
132
-
133
-
134
- @app.cell
135
- def _(btn_ham, btn_spam, mo):
136
- mo.hstack([
137
- btn_ham, btn_spam
138
- ])
139
- return
140
-
141
-
142
  @app.cell
143
  def _():
144
  from mohtml import tailwind_css, div, p
@@ -147,16 +163,6 @@ def _():
147
  return div, p, tailwind_css
148
 
149
 
150
- @app.cell
151
- def _(mo, should_stop):
152
- mo.stop(should_stop)
153
-
154
- text_input = mo.ui.text_area("Query can go here", label="Reference sentences")
155
- form = mo.md("""{text_input}""").batch(text_input=text_input).form()
156
- form
157
- return form, text_input
158
-
159
-
160
  @app.cell
161
  def _(get_label, mo):
162
  import json
@@ -173,9 +179,8 @@ def _(get_label, mo):
173
 
174
 
175
  @app.cell
176
- def _(X, cosine_similarity, form, mo, pl, texts, tfm):
177
- mo.stop(form is None, "Need a query input to fetch example")
178
- mo.stop(form.value is None, "Need a query input to fetch example")
179
 
180
  df_emb = (
181
  pl.DataFrame({
@@ -188,15 +193,25 @@ def _(X, cosine_similarity, form, mo, pl, texts, tfm):
188
  query = tfm.encode([form.value["text_input"]])
189
  similarity = cosine_similarity(query, X)[0]
190
  df_emb = df_emb.with_columns(sim=similarity).sort(pl.col("sim"), descending=True)
191
- gen = (_["text"] for _ in df_emb.head(100).to_dicts())
192
- return df_emb, gen, query, similarity
 
 
 
 
 
 
 
 
 
 
193
 
194
 
195
  @app.cell
196
  def _(get_label, mo, pl, should_stop):
197
  mo.stop(should_stop)
198
 
199
- pl.DataFrame(get_label())
200
  return
201
 
202
 
@@ -204,12 +219,9 @@ def _(get_label, mo, pl, should_stop):
204
  def _(mo):
205
  with mo.status.spinner(subtitle="Loading libraries ...") as _spinner:
206
  import polars as pl
207
- import altair as alt
208
  import numpy as np
209
  from sklearn.metrics.pairwise import cosine_similarity
210
- from sklearn.linear_model import LogisticRegression
211
- from sklearn.decomposition import PCA
212
- return LogisticRegression, PCA, alt, cosine_similarity, np, pl
213
 
214
 
215
  @app.cell
@@ -231,4 +243,4 @@ def _():
231
 
232
 
233
  if __name__ == "__main__":
234
- app.run()
 
3
  # dependencies = [
4
  # "marimo",
5
  # "polars==1.23.0",
 
 
 
 
6
  # "scikit-learn==1.6.1",
7
  # "numpy==2.1.3",
8
  # "mohtml==0.1.2",
9
  # "model2vec==0.4.0",
10
+ # "altair==5.5.0",
11
  # ]
12
  # ///
13
 
14
  import marimo
15
 
16
+ __generated_with = "0.11.14"
17
  app = marimo.App()
18
 
19
 
20
  @app.cell
21
  def _(mo):
22
+ mo.md("""### Fast labelling demo""")
23
  return
24
 
25
 
 
39
 
40
  @app.cell
41
  def _(mo):
42
+ pos_label = mo.ui.text("pos", placeholder="positive label name", label="positive class name")
43
+ neg_label = mo.ui.text("neg", placeholder="negative label name", label="negative class name")
44
  return neg_label, pos_label
45
 
46
 
 
52
 
53
  @app.cell
54
  def _(mo, pl, should_stop, uploaded_file, use_default_switch):
55
+ mo.stop(should_stop , mo.md("**Submit a dataset or use default one to continue.**"))
56
 
57
  if use_default_switch.value:
58
  df = pl.read_csv("spam.csv")
 
70
  return (tfm,)
71
 
72
 
73
+ @app.cell
74
+ def _(mo, should_stop):
75
+ mo.stop(should_stop)
76
+
77
+ text_input = mo.ui.text_area("you will win a free ringtone!", label="Reference sentences")
78
+ form = mo.md("""{text_input}""").batch(text_input=text_input).form()
79
+ form
80
+ return form, text_input
81
+
82
+
83
  @app.cell
84
  def _(mo, texts, tfm):
85
  with mo.status.spinner(subtitle="Creating embeddings ...") as _spinner:
 
88
 
89
 
90
  @app.cell
91
+ def _(add_label, get_example, mo, neg_label, pos_label, undo):
92
  btn_spam = mo.ui.button(
93
  label=f"Annotate {neg_label.value}",
94
  on_click=lambda d: add_label(get_example(), neg_label.value),
 
99
  on_click=lambda d: add_label(get_example(), pos_label.value),
100
  keyboard_shortcut="Ctrl-K"
101
  )
102
+ btn_undo = mo.ui.button(
103
+ label="Undo",
104
+ on_click=lambda d: undo(),
105
+ keyboard_shortcut="Ctrl-U"
106
+ )
107
+ return btn_ham, btn_spam, btn_undo
108
 
109
 
110
  @app.cell
 
113
  current_labels = get_label()
114
  set_label(current_labels + [{"text": text, "label": lab}])
115
  set_example(next(gen))
116
+
117
+ def undo():
118
+ current_labels = get_label()
119
+ set_label(current_labels[:-2])
120
+ return add_label, undo
121
 
122
 
123
  @app.cell
 
126
  return (br,)
127
 
128
 
129
+ @app.cell
130
+ def _(br, btn_ham, btn_spam, btn_undo, example, mo, neg_label, p, pos_label):
131
+ mo.vstack([
132
+ mo.hstack([
133
+ pos_label, neg_label
134
+ ]),
135
+ br(),
136
+ mo.hstack([
137
+ btn_ham, btn_spam, btn_undo
138
+ ]),
139
+ br(),
140
+ p("Current example:", klass="font-bold"),
141
+ example
142
+ ])
143
+ return
144
+
145
+
146
  @app.cell
147
  def _(mo):
148
  get_label, set_label = mo.state([])
 
155
  return get_example, set_example
156
 
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  @app.cell
159
  def _():
160
  from mohtml import tailwind_css, div, p
 
163
  return div, p, tailwind_css
164
 
165
 
 
 
 
 
 
 
 
 
 
 
166
  @app.cell
167
  def _(get_label, mo):
168
  import json
 
179
 
180
 
181
  @app.cell
182
+ def _(X, cosine_similarity, form, get_label, mo, pl, texts, tfm):
183
+ mo.stop(not form.value.get("text_input", None), "Need a text input to fetch example")
 
184
 
185
  df_emb = (
186
  pl.DataFrame({
 
193
  query = tfm.encode([form.value["text_input"]])
194
  similarity = cosine_similarity(query, X)[0]
195
  df_emb = df_emb.with_columns(sim=similarity).sort(pl.col("sim"), descending=True)
196
+ label_texts = [_["text"] for _ in get_label()]
197
+ gen = (_["text"] for _ in df_emb.head(100).to_dicts() if _["text"] not in label_texts)
198
+ return df_emb, gen, label_texts, query, similarity
199
+
200
+
201
+ @app.cell
202
+ def _(div, get_example, p):
203
+ example = div(
204
+ p(get_example()),
205
+ klass="bg-gray-100 p-4 rounded-lg"
206
+ )
207
+ return (example,)
208
 
209
 
210
  @app.cell
211
  def _(get_label, mo, pl, should_stop):
212
  mo.stop(should_stop)
213
 
214
+ pl.DataFrame(get_label()).reverse()
215
  return
216
 
217
 
 
219
  def _(mo):
220
  with mo.status.spinner(subtitle="Loading libraries ...") as _spinner:
221
  import polars as pl
 
222
  import numpy as np
223
  from sklearn.metrics.pairwise import cosine_similarity
224
+ return cosine_similarity, np, pl
 
 
225
 
226
 
227
  @app.cell
 
243
 
244
 
245
  if __name__ == "__main__":
246
+ app.run()