Jesse Hartman commited on
Commit
5552162
·
1 Parent(s): 15ebd87

add polars joins

Browse files
Files changed (1) hide show
  1. polars/basic_polars_joins.py +99 -0
polars/basic_polars_joins.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = [
3
+ # "marimo",
4
+ # "polars==1.28.1",
5
+ # ]
6
+ # [tool.marimo.runtime]
7
+ # auto_instantiate = false
8
+ # ///
9
+
10
+ import marimo
11
+
12
+ __generated_with = "0.13.2"
13
+ app = marimo.App(width="medium")
14
+
15
+
16
+ @app.cell
17
+ def _():
18
+ import marimo as mo
19
+ import polars as pl
20
+ return mo, pl
21
+
22
+
23
+ @app.cell
24
+ def _(pl):
25
+ base = pl.DataFrame(
26
+ {"id": [1, 2, 3, 4], "name": ["Alice", "Bob", "Charlie", "Diana"]}
27
+ )
28
+
29
+ other = pl.DataFrame({"id": [3, 4, 5, 6], "age": [25, 32, 40, 28]})
30
+ return base, other
31
+
32
+
33
+ @app.cell
34
+ def _(base, mo, other):
35
+ mo.vstack(
36
+ [
37
+ mo.hstack([mo.ui.table(base, show_download=False , label='base'), mo.ui.table(other, show_download=False, label='other')], justify="center"),
38
+ ]
39
+ )
40
+ return
41
+
42
+
43
+ @app.cell
44
+ def _(mo):
45
+ join_options: dict = {
46
+ "inner (default)": "inner",
47
+ "left": "left",
48
+ "right": "right",
49
+ "full": "full",
50
+ "semi": "semi",
51
+ "anti": "anti",
52
+ }
53
+
54
+ # These are from https://docs.pola.rs/user-guide/transformations/joins/#quick-reference-table
55
+ descriptions: dict = {
56
+ "inner": "Keeps rows that matched both on the left and right.",
57
+ "left": "Keeps all rows from the left plus matching rows from the right. Non-matching rows from the left have their right columns filled with null.",
58
+ "right": "Keeps all rows from the right plus matching rows from the left. Non-matching rows from the right have their left columns filled with null.",
59
+ "full": "Keeps all rows from either dataframe, regardless of whether they match or not. Non-matching rows from one side have the columns from the other side filled with null.",
60
+ "semi": "Keeps rows from the left that have a match on the right.",
61
+ "anti": "Keeps rows from the left that do not have a match on the right.",
62
+ "join_where": "Finds all possible pairings of rows from the left and right that satisfy the given predicate(s).",
63
+ "join_asof": "Like a left outer join, but matches on the nearest key instead of on exact key matches.",
64
+ "cross": "Computes the Cartesian product of the two dataframes.",
65
+ }
66
+ dropdown = mo.ui.dropdown(
67
+ value="inner (default)",
68
+ options=join_options,
69
+ )
70
+ return descriptions, dropdown
71
+
72
+
73
+ @app.cell
74
+ def _(base, descriptions, dropdown, mo, other, pl):
75
+ result: pl.DataFrame = base.join(other, on="id", how=dropdown.value)
76
+ n_rows, n_columns = result.shape
77
+ mo.vstack(
78
+ [
79
+ dropdown,
80
+ mo.md(descriptions[dropdown.value]),
81
+ mo.md(f"rows: {n_rows} columns: {n_columns}"),
82
+ ]
83
+ )
84
+ return (result,)
85
+
86
+
87
+ @app.cell
88
+ def _(mo, result):
89
+ mo.vstack([result])
90
+ return
91
+
92
+
93
+ @app.cell
94
+ def _():
95
+ return
96
+
97
+
98
+ if __name__ == "__main__":
99
+ app.run()