cboettig commited on
Commit
d2c79b3
·
1 Parent(s): 7c2025e

pre-proccess

Browse files
Files changed (1) hide show
  1. preprocess.ipynb +154 -0
preprocess.ipynb ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 4,
6
+ "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "# boilerplate setup\n",
11
+ "import leafmap.maplibregl as leafmap\n",
12
+ "import ibis\n",
13
+ "from ibis import _\n",
14
+ "\n",
15
+ "conn = ibis.duckdb.connect(\"tmp\")\n",
16
+ "ca_parquet = \"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\"\n",
17
+ "# or use local copy:\n",
18
+ "ca_parquet = \"/home/rstudio/source.coop/cboettig/ca30x30/ca_areas.parquet\"\n"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 5,
24
+ "id": "a0cb34b1-8d70-49bf-80c6-244ecc8ddf84",
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "buffer = -0.00003 \n",
29
+ "\n",
30
+ "tbl = (\n",
31
+ " conn.read_parquet(\"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\")\n",
32
+ " .cast({\"SHAPE\": \"geometry\"})\n",
33
+ " .rename(geom = \"SHAPE\")\n",
34
+ " # .filter(_.UNIT_NAME == \"Angeles National Forest\")\n",
35
+ " .filter(_.reGAP < 3) \n",
36
+ ")\n",
37
+ "tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer))\n",
38
+ "tbl_2024 = tbl.filter(_.Release_Year == 2024)\n",
39
+ "intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))\n",
40
+ "\n",
41
+ "new2024 = intersects.select(\"OBJECTID\").mutate(established = 2024)"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": 7,
47
+ "id": "275c171a-f82f-4ee8-991c-1e34eb83a33d",
48
+ "metadata": {},
49
+ "outputs": [
50
+ {
51
+ "data": {
52
+ "application/vnd.jupyter.widget-view+json": {
53
+ "model_id": "998c2484ba604297ad438ed5c17dc59d",
54
+ "version_major": 2,
55
+ "version_minor": 0
56
+ },
57
+ "text/plain": [
58
+ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
59
+ ]
60
+ },
61
+ "metadata": {},
62
+ "output_type": "display_data"
63
+ }
64
+ ],
65
+ "source": [
66
+ "ca = (conn\n",
67
+ " .read_parquet(ca_parquet)\n",
68
+ " .cast({\"SHAPE\": \"geometry\"})\n",
69
+ " .mutate(area = _.SHAPE.area())\n",
70
+ " .filter(_.Release_Year == 2024)\n",
71
+ " .filter(_.reGAP < 3)\n",
72
+ " .left_join(new2024, \"OBJECTID\")\n",
73
+ " .mutate(established=_.established.fill_null(2023))\n",
74
+ " .mutate(geom = _.SHAPE.convert(\"epsg:3310\",\"epsg:4326\"))\n",
75
+ " .rename(name = \"UNIT_NAME\", access_type = \"ACCESS_TYP\", manager = \"MNG_AGNCY\",\n",
76
+ " manager_type = \"MNG_AG_LEV\", id = \"OBJECTID\", type = \"TYPE\")\n",
77
+ " .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,\n",
78
+ " _.Easement, _.Acres, _.id, _.type, _.geom)\n",
79
+ " )\n",
80
+ "ca.to_parquet(\"ca2024.parquet\")"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "markdown",
85
+ "id": "ebbb2650-4442-4e54-8467-5e681d6fab9e",
86
+ "metadata": {},
87
+ "source": [
88
+ "Using difference (overlap) instead:"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": null,
94
+ "id": "6f3df8c1-a603-4dd5-be84-8deaae928d0a",
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": [
98
+ "# compute some summary tables:\n",
99
+ "\n",
100
+ "(ca\n",
101
+ " .filter(_.established == 2024)\n",
102
+ " .filter(_.manager_type == manager_type)\n",
103
+ " .group_by(_.manager, _.manager_type)\n",
104
+ " .agg(area = _.Acres.sum())\n",
105
+ " .order_by(_.area.desc())\n",
106
+ " .execute()\n",
107
+ ")"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": null,
113
+ "id": "c62854f6-1456-4207-8c69-53af17970102",
114
+ "metadata": {},
115
+ "outputs": [],
116
+ "source": [
117
+ "gdf = ca.filter(_.manager == \"United States National Park Service\", _.established== 2024).execute()\n",
118
+ "established = {'property': 'established',\n",
119
+ " 'type': 'categorical',\n",
120
+ " 'stops': [\n",
121
+ " [2023, \"#26542C80\"], \n",
122
+ " [2024, \"#F3AB3D80\"]]\n",
123
+ " }\n",
124
+ "paint = {\"fill-color\": established}\n",
125
+ "\n",
126
+ "m = leafmap.Map(style=\"positron\")\n",
127
+ "m.add_gdf(gdf,layer_type=\"fill\", name = \"CA 30x30\", paint = paint)\n",
128
+ "m.add_layer_control()\n",
129
+ "m"
130
+ ]
131
+ }
132
+ ],
133
+ "metadata": {
134
+ "kernelspec": {
135
+ "display_name": "Python 3 (ipykernel)",
136
+ "language": "python",
137
+ "name": "python3"
138
+ },
139
+ "language_info": {
140
+ "codemirror_mode": {
141
+ "name": "ipython",
142
+ "version": 3
143
+ },
144
+ "file_extension": ".py",
145
+ "mimetype": "text/x-python",
146
+ "name": "python",
147
+ "nbconvert_exporter": "python",
148
+ "pygments_lexer": "ipython3",
149
+ "version": "3.11.10"
150
+ }
151
+ },
152
+ "nbformat": 4,
153
+ "nbformat_minor": 5
154
+ }