Spaces:
Running
Running
Pin requirements for reproducibility
Browse files- polars/09_strings.py +36 -9
polars/09_strings.py
CHANGED
@@ -1,6 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import marimo
|
2 |
|
3 |
-
__generated_with = "0.11.
|
4 |
app = marimo.App(width="medium")
|
5 |
|
6 |
|
@@ -573,13 +583,30 @@ def _(expressions_df, pl):
|
|
573 |
|
574 |
@app.cell
|
575 |
def _(mo):
|
576 |
-
mo.md(r"""As a more practical example, we can use the `split` expression with some aggregation to count the number of times a particular word occurs in member names across all namespaces.""")
|
577 |
return
|
578 |
|
579 |
|
580 |
@app.cell(hide_code=True)
|
581 |
-
def _(
|
582 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
583 |
expressions_df.select(pl.col("member").str.split("_"))
|
584 |
.explode("member")
|
585 |
.group_by("member")
|
@@ -587,17 +614,17 @@ def _(alt, expressions_df, pl, random):
|
|
587 |
# Generating random x and y coordinates to distribute the words in the 2D space
|
588 |
.with_columns(
|
589 |
x=pl.col("member").map_elements(
|
590 |
-
lambda e: random.randint(0,
|
591 |
return_dtype=pl.UInt8,
|
592 |
),
|
593 |
y=pl.col("member").map_elements(
|
594 |
-
lambda e: random.randint(0,
|
595 |
return_dtype=pl.UInt8,
|
596 |
),
|
597 |
)
|
598 |
)
|
599 |
|
600 |
-
alt.Chart(
|
601 |
x=alt.X("x:O", axis=None),
|
602 |
y=alt.Y("y:O", axis=None),
|
603 |
text="member:N",
|
@@ -605,7 +632,7 @@ def _(alt, expressions_df, pl, random):
|
|
605 |
size=alt.Size("len:Q", legend=None),
|
606 |
tooltip=["member", "len"],
|
607 |
).configure_view(strokeWidth=0)
|
608 |
-
return
|
609 |
|
610 |
|
611 |
@app.cell
|
@@ -955,7 +982,7 @@ def _():
|
|
955 |
import altair as alt
|
956 |
import random
|
957 |
|
958 |
-
random.seed(
|
959 |
return alt, mo, pl, random
|
960 |
|
961 |
|
|
|
1 |
+
# /// script
|
2 |
+
# requires-python = ">=3.12"
|
3 |
+
# dependencies = [
|
4 |
+
# "altair==5.5.0",
|
5 |
+
# "marimo",
|
6 |
+
# "numpy==2.2.3",
|
7 |
+
# "polars==1.24.0",
|
8 |
+
# ]
|
9 |
+
# ///
|
10 |
+
|
11 |
import marimo
|
12 |
|
13 |
+
__generated_with = "0.11.17"
|
14 |
app = marimo.App(width="medium")
|
15 |
|
16 |
|
|
|
583 |
|
584 |
@app.cell
|
585 |
def _(mo):
|
586 |
+
mo.md(r"""As a more practical example, we can use the `split` expression with some aggregation to count the number of times a particular word occurs in member names across all namespaces. This enables us to create a word cloud of the API members' constituents!""")
|
587 |
return
|
588 |
|
589 |
|
590 |
@app.cell(hide_code=True)
|
591 |
+
def _(mo, wordcloud, wordcloud_height, wordcloud_width):
|
592 |
+
mo.vstack([
|
593 |
+
wordcloud_width,
|
594 |
+
wordcloud_height,
|
595 |
+
wordcloud,
|
596 |
+
])
|
597 |
+
return
|
598 |
+
|
599 |
+
|
600 |
+
@app.cell(hide_code=True)
|
601 |
+
def _(mo):
|
602 |
+
wordcloud_width = mo.ui.slider(0, 64, step=1, value=32, label="Word Cloud Width")
|
603 |
+
wordcloud_height = mo.ui.slider(0, 32, step=1, value=16, label="Word Cloud Height")
|
604 |
+
return wordcloud_height, wordcloud_width
|
605 |
+
|
606 |
+
|
607 |
+
@app.cell(hide_code=True)
|
608 |
+
def _(alt, expressions_df, pl, random, wordcloud_height, wordcloud_width):
|
609 |
+
wordcloud_df = (
|
610 |
expressions_df.select(pl.col("member").str.split("_"))
|
611 |
.explode("member")
|
612 |
.group_by("member")
|
|
|
614 |
# Generating random x and y coordinates to distribute the words in the 2D space
|
615 |
.with_columns(
|
616 |
x=pl.col("member").map_elements(
|
617 |
+
lambda e: random.randint(0, wordcloud_width.value),
|
618 |
return_dtype=pl.UInt8,
|
619 |
),
|
620 |
y=pl.col("member").map_elements(
|
621 |
+
lambda e: random.randint(0, wordcloud_height.value),
|
622 |
return_dtype=pl.UInt8,
|
623 |
),
|
624 |
)
|
625 |
)
|
626 |
|
627 |
+
wordcloud = alt.Chart(wordcloud_df).mark_text(baseline="middle").encode(
|
628 |
x=alt.X("x:O", axis=None),
|
629 |
y=alt.Y("y:O", axis=None),
|
630 |
text="member:N",
|
|
|
632 |
size=alt.Size("len:Q", legend=None),
|
633 |
tooltip=["member", "len"],
|
634 |
).configure_view(strokeWidth=0)
|
635 |
+
return wordcloud, wordcloud_df
|
636 |
|
637 |
|
638 |
@app.cell
|
|
|
982 |
import altair as alt
|
983 |
import random
|
984 |
|
985 |
+
random.seed(42)
|
986 |
return alt, mo, pl, random
|
987 |
|
988 |
|