cboettig commited on
Commit
c2b1848
·
1 Parent(s): d2c79b3
Files changed (1) hide show
  1. preprocess.ipynb +249 -17
preprocess.ipynb CHANGED
@@ -1,18 +1,24 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
- "execution_count": 4,
6
  "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
7
  "metadata": {},
8
  "outputs": [],
9
  "source": [
10
- "# boilerplate setup\n",
11
- "import leafmap.maplibregl as leafmap\n",
12
  "import ibis\n",
13
  "from ibis import _\n",
14
  "\n",
15
- "conn = ibis.duckdb.connect(\"tmp\")\n",
16
  "ca_parquet = \"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\"\n",
17
  "# or use local copy:\n",
18
  "ca_parquet = \"/home/rstudio/source.coop/cboettig/ca30x30/ca_areas.parquet\"\n"
@@ -20,15 +26,15 @@
20
  },
21
  {
22
  "cell_type": "code",
23
- "execution_count": 5,
24
  "id": "a0cb34b1-8d70-49bf-80c6-244ecc8ddf84",
25
  "metadata": {},
26
  "outputs": [],
27
  "source": [
28
- "buffer = -0.00003 \n",
29
  "\n",
30
  "tbl = (\n",
31
- " conn.read_parquet(\"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\")\n",
32
  " .cast({\"SHAPE\": \"geometry\"})\n",
33
  " .rename(geom = \"SHAPE\")\n",
34
  " # .filter(_.UNIT_NAME == \"Angeles National Forest\")\n",
@@ -43,14 +49,14 @@
43
  },
44
  {
45
  "cell_type": "code",
46
- "execution_count": 7,
47
  "id": "275c171a-f82f-4ee8-991c-1e34eb83a33d",
48
  "metadata": {},
49
  "outputs": [
50
  {
51
  "data": {
52
  "application/vnd.jupyter.widget-view+json": {
53
- "model_id": "998c2484ba604297ad438ed5c17dc59d",
54
  "version_major": 2,
55
  "version_minor": 0
56
  },
@@ -77,7 +83,54 @@
77
  " .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,\n",
78
  " _.Easement, _.Acres, _.id, _.type, _.geom)\n",
79
  " )\n",
80
- "ca.to_parquet(\"ca2024.parquet\")"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  ]
82
  },
83
  {
@@ -90,16 +143,178 @@
90
  },
91
  {
92
  "cell_type": "code",
93
- "execution_count": null,
94
  "id": "6f3df8c1-a603-4dd5-be84-8deaae928d0a",
95
  "metadata": {},
96
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  "source": [
98
  "# compute some summary tables:\n",
99
  "\n",
100
- "(ca\n",
101
  " .filter(_.established == 2024)\n",
102
- " .filter(_.manager_type == manager_type)\n",
103
  " .group_by(_.manager, _.manager_type)\n",
104
  " .agg(area = _.Acres.sum())\n",
105
  " .order_by(_.area.desc())\n",
@@ -109,12 +324,29 @@
109
  },
110
  {
111
  "cell_type": "code",
112
- "execution_count": null,
113
  "id": "c62854f6-1456-4207-8c69-53af17970102",
114
  "metadata": {},
115
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  "source": [
117
- "gdf = ca.filter(_.manager == \"United States National Park Service\", _.established== 2024).execute()\n",
 
118
  "established = {'property': 'established',\n",
119
  " 'type': 'categorical',\n",
120
  " 'stops': [\n",
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "4b4adc2a-bf0c-4ace-87be-dbaf90be0125",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Preporcessing"
9
+ ]
10
+ },
11
  {
12
  "cell_type": "code",
13
+ "execution_count": 1,
14
  "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
15
  "metadata": {},
16
  "outputs": [],
17
  "source": [
 
 
18
  "import ibis\n",
19
  "from ibis import _\n",
20
  "\n",
21
+ "conn = ibis.duckdb.connect(\"tmp\", extensions=[\"spatial\"])\n",
22
  "ca_parquet = \"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\"\n",
23
  "# or use local copy:\n",
24
  "ca_parquet = \"/home/rstudio/source.coop/cboettig/ca30x30/ca_areas.parquet\"\n"
 
26
  },
27
  {
28
  "cell_type": "code",
29
+ "execution_count": 2,
30
  "id": "a0cb34b1-8d70-49bf-80c6-244ecc8ddf84",
31
  "metadata": {},
32
  "outputs": [],
33
  "source": [
34
+ "buffer = -0.00001\n",
35
  "\n",
36
  "tbl = (\n",
37
+ " conn.read_parquet(ca_parquet)\n",
38
  " .cast({\"SHAPE\": \"geometry\"})\n",
39
  " .rename(geom = \"SHAPE\")\n",
40
  " # .filter(_.UNIT_NAME == \"Angeles National Forest\")\n",
 
49
  },
50
  {
51
  "cell_type": "code",
52
+ "execution_count": 3,
53
  "id": "275c171a-f82f-4ee8-991c-1e34eb83a33d",
54
  "metadata": {},
55
  "outputs": [
56
  {
57
  "data": {
58
  "application/vnd.jupyter.widget-view+json": {
59
+ "model_id": "dd86bb91838d45aa87197fc49a3b2362",
60
  "version_major": 2,
61
  "version_minor": 0
62
  },
 
83
  " .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,\n",
84
  " _.Easement, _.Acres, _.id, _.type, _.geom)\n",
85
  " )\n",
86
+ "ca.execute().to_parquet(\"ca2024.parquet\")"
87
+ ]
88
+ },
89
+ {
90
+ "cell_type": "markdown",
91
+ "id": "cebd0ff5-8353-4b84-b9ee-182b74613554",
92
+ "metadata": {},
93
+ "source": [
94
+ "# Testing & visualization\n",
95
+ "\n",
96
+ "`ca2024.parquet()` now contains all we need. The code below illustrates some quick examples of the kinds of visualizations and summaries we might want to compute with this data. \n"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": 4,
102
+ "id": "55afe07c-8681-4308-bbb9-e460f7380f86",
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": [
106
+ "import leafmap.maplibregl as leafmap\n",
107
+ "import ibis\n",
108
+ "from ibis import _\n",
109
+ "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
110
+ "\n",
111
+ "ca2024 = conn.read_parquet(\"ca2024.parquet\")"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": 5,
117
+ "id": "9d4cd1c4-288b-4d1c-907c-ca76ccbdb1d6",
118
+ "metadata": {},
119
+ "outputs": [
120
+ {
121
+ "data": {
122
+ "text/plain": [
123
+ "geopandas.geodataframe.GeoDataFrame"
124
+ ]
125
+ },
126
+ "execution_count": 5,
127
+ "metadata": {},
128
+ "output_type": "execute_result"
129
+ }
130
+ ],
131
+ "source": [
132
+ "gdf = ca2024.execute()\n",
133
+ "gdf.__class__"
134
  ]
135
  },
136
  {
 
143
  },
144
  {
145
  "cell_type": "code",
146
+ "execution_count": 6,
147
  "id": "6f3df8c1-a603-4dd5-be84-8deaae928d0a",
148
  "metadata": {},
149
+ "outputs": [
150
+ {
151
+ "data": {
152
+ "text/html": [
153
+ "<div>\n",
154
+ "<style scoped>\n",
155
+ " .dataframe tbody tr th:only-of-type {\n",
156
+ " vertical-align: middle;\n",
157
+ " }\n",
158
+ "\n",
159
+ " .dataframe tbody tr th {\n",
160
+ " vertical-align: top;\n",
161
+ " }\n",
162
+ "\n",
163
+ " .dataframe thead th {\n",
164
+ " text-align: right;\n",
165
+ " }\n",
166
+ "</style>\n",
167
+ "<table border=\"1\" class=\"dataframe\">\n",
168
+ " <thead>\n",
169
+ " <tr style=\"text-align: right;\">\n",
170
+ " <th></th>\n",
171
+ " <th>manager</th>\n",
172
+ " <th>manager_type</th>\n",
173
+ " <th>area</th>\n",
174
+ " </tr>\n",
175
+ " </thead>\n",
176
+ " <tbody>\n",
177
+ " <tr>\n",
178
+ " <th>0</th>\n",
179
+ " <td>California Department of Fish and Wildlife</td>\n",
180
+ " <td>State</td>\n",
181
+ " <td>42086.259379</td>\n",
182
+ " </tr>\n",
183
+ " <tr>\n",
184
+ " <th>1</th>\n",
185
+ " <td>California Department of Parks and Recreation</td>\n",
186
+ " <td>State</td>\n",
187
+ " <td>17931.321473</td>\n",
188
+ " </tr>\n",
189
+ " <tr>\n",
190
+ " <th>2</th>\n",
191
+ " <td>California Tahoe Conservancy</td>\n",
192
+ " <td>State</td>\n",
193
+ " <td>4803.250929</td>\n",
194
+ " </tr>\n",
195
+ " <tr>\n",
196
+ " <th>3</th>\n",
197
+ " <td>California Department of Water Resources</td>\n",
198
+ " <td>State</td>\n",
199
+ " <td>2248.610289</td>\n",
200
+ " </tr>\n",
201
+ " <tr>\n",
202
+ " <th>4</th>\n",
203
+ " <td>University of California</td>\n",
204
+ " <td>State</td>\n",
205
+ " <td>1860.854444</td>\n",
206
+ " </tr>\n",
207
+ " <tr>\n",
208
+ " <th>5</th>\n",
209
+ " <td>California Department of Forestry and Fire Pro...</td>\n",
210
+ " <td>State</td>\n",
211
+ " <td>1089.029581</td>\n",
212
+ " </tr>\n",
213
+ " <tr>\n",
214
+ " <th>6</th>\n",
215
+ " <td>Coachella Valley Mountains Conservancy</td>\n",
216
+ " <td>State</td>\n",
217
+ " <td>119.907070</td>\n",
218
+ " </tr>\n",
219
+ " <tr>\n",
220
+ " <th>7</th>\n",
221
+ " <td>California State Lands Commission</td>\n",
222
+ " <td>State</td>\n",
223
+ " <td>109.016475</td>\n",
224
+ " </tr>\n",
225
+ " <tr>\n",
226
+ " <th>8</th>\n",
227
+ " <td>California State Coastal Conservancy</td>\n",
228
+ " <td>State</td>\n",
229
+ " <td>97.314705</td>\n",
230
+ " </tr>\n",
231
+ " <tr>\n",
232
+ " <th>9</th>\n",
233
+ " <td>California State University Sonoma</td>\n",
234
+ " <td>State</td>\n",
235
+ " <td>38.760956</td>\n",
236
+ " </tr>\n",
237
+ " <tr>\n",
238
+ " <th>10</th>\n",
239
+ " <td>Other State</td>\n",
240
+ " <td>State</td>\n",
241
+ " <td>32.927882</td>\n",
242
+ " </tr>\n",
243
+ " <tr>\n",
244
+ " <th>11</th>\n",
245
+ " <td>San Joaquin River Conservancy</td>\n",
246
+ " <td>State</td>\n",
247
+ " <td>1.531470</td>\n",
248
+ " </tr>\n",
249
+ " <tr>\n",
250
+ " <th>12</th>\n",
251
+ " <td>California Department of Transportation</td>\n",
252
+ " <td>State</td>\n",
253
+ " <td>1.261433</td>\n",
254
+ " </tr>\n",
255
+ " <tr>\n",
256
+ " <th>13</th>\n",
257
+ " <td>Unknown</td>\n",
258
+ " <td>State</td>\n",
259
+ " <td>0.255531</td>\n",
260
+ " </tr>\n",
261
+ " <tr>\n",
262
+ " <th>14</th>\n",
263
+ " <td>California State University</td>\n",
264
+ " <td>State</td>\n",
265
+ " <td>0.021589</td>\n",
266
+ " </tr>\n",
267
+ " </tbody>\n",
268
+ "</table>\n",
269
+ "</div>"
270
+ ],
271
+ "text/plain": [
272
+ " manager manager_type \\\n",
273
+ "0 California Department of Fish and Wildlife State \n",
274
+ "1 California Department of Parks and Recreation State \n",
275
+ "2 California Tahoe Conservancy State \n",
276
+ "3 California Department of Water Resources State \n",
277
+ "4 University of California State \n",
278
+ "5 California Department of Forestry and Fire Pro... State \n",
279
+ "6 Coachella Valley Mountains Conservancy State \n",
280
+ "7 California State Lands Commission State \n",
281
+ "8 California State Coastal Conservancy State \n",
282
+ "9 California State University Sonoma State \n",
283
+ "10 Other State State \n",
284
+ "11 San Joaquin River Conservancy State \n",
285
+ "12 California Department of Transportation State \n",
286
+ "13 Unknown State \n",
287
+ "14 California State University State \n",
288
+ "\n",
289
+ " area \n",
290
+ "0 42086.259379 \n",
291
+ "1 17931.321473 \n",
292
+ "2 4803.250929 \n",
293
+ "3 2248.610289 \n",
294
+ "4 1860.854444 \n",
295
+ "5 1089.029581 \n",
296
+ "6 119.907070 \n",
297
+ "7 109.016475 \n",
298
+ "8 97.314705 \n",
299
+ "9 38.760956 \n",
300
+ "10 32.927882 \n",
301
+ "11 1.531470 \n",
302
+ "12 1.261433 \n",
303
+ "13 0.255531 \n",
304
+ "14 0.021589 "
305
+ ]
306
+ },
307
+ "execution_count": 6,
308
+ "metadata": {},
309
+ "output_type": "execute_result"
310
+ }
311
+ ],
312
  "source": [
313
  "# compute some summary tables:\n",
314
  "\n",
315
+ "(ca2024\n",
316
  " .filter(_.established == 2024)\n",
317
+ " .filter(_.manager_type == \"State\")\n",
318
  " .group_by(_.manager, _.manager_type)\n",
319
  " .agg(area = _.Acres.sum())\n",
320
  " .order_by(_.area.desc())\n",
 
324
  },
325
  {
326
  "cell_type": "code",
327
+ "execution_count": 7,
328
  "id": "c62854f6-1456-4207-8c69-53af17970102",
329
  "metadata": {},
330
+ "outputs": [
331
+ {
332
+ "data": {
333
+ "application/vnd.jupyter.widget-view+json": {
334
+ "model_id": "10329a95c7b84de4b598f0ccf4c6af20",
335
+ "version_major": 2,
336
+ "version_minor": 1
337
+ },
338
+ "text/plain": [
339
+ "Map(height='600px', map_options={'bearing': 0, 'center': (0, 20), 'pitch': 0, 'style': 'https://basemaps.carto…"
340
+ ]
341
+ },
342
+ "execution_count": 7,
343
+ "metadata": {},
344
+ "output_type": "execute_result"
345
+ }
346
+ ],
347
  "source": [
348
+ "gdf = ca2024.filter(_.manager == \"California Department of Parks and Recreation\", _.established== 2024).execute()\n",
349
+ "\n",
350
  "established = {'property': 'established',\n",
351
  " 'type': 'categorical',\n",
352
  " 'stops': [\n",