Spaces:
Running
Running
Jesse Hartman
commited on
Commit
·
5552162
1
Parent(s):
15ebd87
add polars joins
Browse files- polars/basic_polars_joins.py +99 -0
polars/basic_polars_joins.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# /// script
|
2 |
+
# dependencies = [
|
3 |
+
# "marimo",
|
4 |
+
# "polars==1.28.1",
|
5 |
+
# ]
|
6 |
+
# [tool.marimo.runtime]
|
7 |
+
# auto_instantiate = false
|
8 |
+
# ///
|
9 |
+
|
10 |
+
import marimo
|
11 |
+
|
12 |
+
__generated_with = "0.13.2"
|
13 |
+
app = marimo.App(width="medium")
|
14 |
+
|
15 |
+
|
16 |
+
@app.cell
|
17 |
+
def _():
|
18 |
+
import marimo as mo
|
19 |
+
import polars as pl
|
20 |
+
return mo, pl
|
21 |
+
|
22 |
+
|
23 |
+
@app.cell
|
24 |
+
def _(pl):
|
25 |
+
base = pl.DataFrame(
|
26 |
+
{"id": [1, 2, 3, 4], "name": ["Alice", "Bob", "Charlie", "Diana"]}
|
27 |
+
)
|
28 |
+
|
29 |
+
other = pl.DataFrame({"id": [3, 4, 5, 6], "age": [25, 32, 40, 28]})
|
30 |
+
return base, other
|
31 |
+
|
32 |
+
|
33 |
+
@app.cell
|
34 |
+
def _(base, mo, other):
|
35 |
+
mo.vstack(
|
36 |
+
[
|
37 |
+
mo.hstack([mo.ui.table(base, show_download=False , label='base'), mo.ui.table(other, show_download=False, label='other')], justify="center"),
|
38 |
+
]
|
39 |
+
)
|
40 |
+
return
|
41 |
+
|
42 |
+
|
43 |
+
@app.cell
|
44 |
+
def _(mo):
|
45 |
+
join_options: dict = {
|
46 |
+
"inner (default)": "inner",
|
47 |
+
"left": "left",
|
48 |
+
"right": "right",
|
49 |
+
"full": "full",
|
50 |
+
"semi": "semi",
|
51 |
+
"anti": "anti",
|
52 |
+
}
|
53 |
+
|
54 |
+
# These are from https://docs.pola.rs/user-guide/transformations/joins/#quick-reference-table
|
55 |
+
descriptions: dict = {
|
56 |
+
"inner": "Keeps rows that matched both on the left and right.",
|
57 |
+
"left": "Keeps all rows from the left plus matching rows from the right. Non-matching rows from the left have their right columns filled with null.",
|
58 |
+
"right": "Keeps all rows from the right plus matching rows from the left. Non-matching rows from the right have their left columns filled with null.",
|
59 |
+
"full": "Keeps all rows from either dataframe, regardless of whether they match or not. Non-matching rows from one side have the columns from the other side filled with null.",
|
60 |
+
"semi": "Keeps rows from the left that have a match on the right.",
|
61 |
+
"anti": "Keeps rows from the left that do not have a match on the right.",
|
62 |
+
"join_where": "Finds all possible pairings of rows from the left and right that satisfy the given predicate(s).",
|
63 |
+
"join_asof": "Like a left outer join, but matches on the nearest key instead of on exact key matches.",
|
64 |
+
"cross": "Computes the Cartesian product of the two dataframes.",
|
65 |
+
}
|
66 |
+
dropdown = mo.ui.dropdown(
|
67 |
+
value="inner (default)",
|
68 |
+
options=join_options,
|
69 |
+
)
|
70 |
+
return descriptions, dropdown
|
71 |
+
|
72 |
+
|
73 |
+
@app.cell
|
74 |
+
def _(base, descriptions, dropdown, mo, other, pl):
|
75 |
+
result: pl.DataFrame = base.join(other, on="id", how=dropdown.value)
|
76 |
+
n_rows, n_columns = result.shape
|
77 |
+
mo.vstack(
|
78 |
+
[
|
79 |
+
dropdown,
|
80 |
+
mo.md(descriptions[dropdown.value]),
|
81 |
+
mo.md(f"rows: {n_rows} columns: {n_columns}"),
|
82 |
+
]
|
83 |
+
)
|
84 |
+
return (result,)
|
85 |
+
|
86 |
+
|
87 |
+
@app.cell
|
88 |
+
def _(mo, result):
|
89 |
+
mo.vstack([result])
|
90 |
+
return
|
91 |
+
|
92 |
+
|
93 |
+
@app.cell
|
94 |
+
def _():
|
95 |
+
return
|
96 |
+
|
97 |
+
|
98 |
+
if __name__ == "__main__":
|
99 |
+
app.run()
|