cboettig commited on
Commit
d48b10c
·
1 Parent(s): 5dee49f
Files changed (2) hide show
  1. difference.ipynb +176 -32
  2. preprocess.ipynb +152 -90
difference.ipynb CHANGED
@@ -2,24 +2,14 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "00982ea4-b630-4233-b6df-354af3498ed2",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "import ibis"
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": 2,
16
  "id": "e84709ab-1b47-49ee-8cbd-8aa69744b6c3",
17
  "metadata": {},
18
  "outputs": [
19
  {
20
  "data": {
21
  "application/vnd.jupyter.widget-view+json": {
22
- "model_id": "cc0e11b65226461e924d9a79ac880cae",
23
  "version_major": 2,
24
  "version_minor": 0
25
  },
@@ -33,7 +23,7 @@
33
  {
34
  "data": {
35
  "application/vnd.jupyter.widget-view+json": {
36
- "model_id": "740ca34451554f8da330cf50694668be",
37
  "version_major": 2,
38
  "version_minor": 0
39
  },
@@ -43,6 +33,78 @@
43
  },
44
  "metadata": {},
45
  "output_type": "display_data"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
  ],
48
  "source": [
@@ -57,23 +119,50 @@
57
  " # .filter(_.UNIT_NAME == \"Angeles National Forest\")\n",
58
  " .filter(_.reGAP < 3) \n",
59
  ")\n",
60
- "lvl = \"Federal\"\n",
61
- "conn.create_table(\"t1\", tbl.filter(_.Release_Year == 2024, _.MNG_AG_LEV != lvl), overwrite = True)\n",
62
- "conn.create_table(\"t2\", tbl.filter(_.Release_Year == 2023, _.MNG_AG_LEV != lvl), overwrite = True)\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  "\n",
64
  "conn.disconnect()"
65
  ]
66
  },
67
  {
68
  "cell_type": "code",
69
- "execution_count": 3,
70
  "id": "121c3cad-680c-4f3a-9075-638711ea1634",
71
  "metadata": {},
72
  "outputs": [
73
  {
74
  "data": {
75
  "application/vnd.jupyter.widget-view+json": {
76
- "model_id": "2e29aea00f864b6b9e440f741c4979d0",
77
  "version_major": 2,
78
  "version_minor": 0
79
  },
@@ -88,13 +177,15 @@
88
  "name": "stdout",
89
  "output_type": "stream",
90
  "text": [
91
- "CPU times: user 1min 28s, sys: 6.91 s, total: 1min 35s\n",
92
- "Wall time: 1min 28s\n"
93
  ]
94
  }
95
  ],
96
  "source": [
97
  "%%time\n",
 
 
98
  "import duckdb\n",
99
  "db = duckdb.connect(\"tmp2\")\n",
100
  "db.install_extension(\"spatial\")\n",
@@ -116,35 +207,67 @@
116
  },
117
  {
118
  "cell_type": "code",
119
- "execution_count": null,
120
- "id": "e0a31bd7-4b67-46ad-88c4-7c17d820d66b",
121
  "metadata": {},
122
  "outputs": [],
123
  "source": [
124
- "#db.table(\"diff\").to_parquet(\"diff.parquet\")"
 
 
 
 
 
 
125
  ]
126
  },
127
  {
128
  "cell_type": "code",
129
- "execution_count": 4,
130
- "id": "28a66c8b-89fb-4972-9ff5-ee9bfdf148e9",
131
  "metadata": {},
132
  "outputs": [],
133
  "source": [
 
134
  "conn = ibis.duckdb.connect(\"tmp2\", extensions=[\"spatial\"])\n",
135
- "gdf = conn.table(\"diff\").mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()"
 
136
  ]
137
  },
138
  {
139
  "cell_type": "code",
140
- "execution_count": 5,
141
- "id": "308dc665-1323-4e9b-bc2d-69201c325c4b",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  "metadata": {},
143
  "outputs": [
144
  {
145
  "data": {
146
  "application/vnd.jupyter.widget-view+json": {
147
- "model_id": "3168461698f142a2b6c26bee3647378e",
148
  "version_major": 2,
149
  "version_minor": 1
150
  },
@@ -152,16 +275,37 @@
152
  "Map(height='600px', map_options={'bearing': 0, 'center': (0, 20), 'pitch': 0, 'style': 'https://basemaps.carto…"
153
  ]
154
  },
155
- "execution_count": 5,
156
  "metadata": {},
157
  "output_type": "execute_result"
158
  }
159
  ],
160
  "source": [
 
 
161
  "import leafmap.maplibregl as leafmap\n",
 
 
 
162
  "m = leafmap.Map()\n",
163
  "m.add_gdf(gdf)\n",
164
- "m\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  ]
166
  },
167
  {
@@ -171,7 +315,7 @@
171
  "metadata": {},
172
  "outputs": [],
173
  "source": [
174
- "\"TO 'new2024.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON', LAYER_CREATION_OPTIONS 'WRITE_BBOX=YES')\""
175
  ]
176
  }
177
  ],
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 21,
 
 
 
 
 
 
 
 
 
 
6
  "id": "e84709ab-1b47-49ee-8cbd-8aa69744b6c3",
7
  "metadata": {},
8
  "outputs": [
9
  {
10
  "data": {
11
  "application/vnd.jupyter.widget-view+json": {
12
+ "model_id": "c4382edb57b643e6907b0314c79387bd",
13
  "version_major": 2,
14
  "version_minor": 0
15
  },
 
23
  {
24
  "data": {
25
  "application/vnd.jupyter.widget-view+json": {
26
+ "model_id": "b4ae3a5cd5f84c5b86fdcd767d330adf",
27
  "version_major": 2,
28
  "version_minor": 0
29
  },
 
33
  },
34
  "metadata": {},
35
  "output_type": "display_data"
36
+ },
37
+ {
38
+ "data": {
39
+ "text/html": [
40
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">DatabaseTable: tmp2.main.t2\n",
41
+ " gid int64\n",
42
+ " cpad_ACCESS_TYP string\n",
43
+ " cpad_PARK_NAME string\n",
44
+ " cpad_MNG_AGENCY string\n",
45
+ " cpad_MNG_AG_LEV string\n",
46
+ " reGAP int16\n",
47
+ " Easement int16\n",
48
+ " TYPE string\n",
49
+ " CA_County_Name string\n",
50
+ " CA_Region_Name string\n",
51
+ " TerrMar string\n",
52
+ " CA_Ecoregion_Name string\n",
53
+ " ACCESS_TYP string\n",
54
+ " MNG_AGNCY string\n",
55
+ " MNG_AG_LEV string\n",
56
+ " UNIT_NAME string\n",
57
+ " DefaultSelection string\n",
58
+ " CA_Ecoregion_Acres float32\n",
59
+ " CA_Region_Acres float32\n",
60
+ " CA_County_Acres float32\n",
61
+ " Acres float32\n",
62
+ " CA_Marine_Acres float32\n",
63
+ " Release_Year int16\n",
64
+ " mgmt_stack string\n",
65
+ " geom geospatial:geometry\n",
66
+ " SHAPE_bbox xmin: float32\n",
67
+ " ymin: float32\n",
68
+ " xmax: float32\n",
69
+ " ymax: float32\n",
70
+ "</pre>\n"
71
+ ],
72
+ "text/plain": [
73
+ "DatabaseTable: tmp2.main.t2\n",
74
+ " gid int64\n",
75
+ " cpad_ACCESS_TYP string\n",
76
+ " cpad_PARK_NAME string\n",
77
+ " cpad_MNG_AGENCY string\n",
78
+ " cpad_MNG_AG_LEV string\n",
79
+ " reGAP int16\n",
80
+ " Easement int16\n",
81
+ " TYPE string\n",
82
+ " CA_County_Name string\n",
83
+ " CA_Region_Name string\n",
84
+ " TerrMar string\n",
85
+ " CA_Ecoregion_Name string\n",
86
+ " ACCESS_TYP string\n",
87
+ " MNG_AGNCY string\n",
88
+ " MNG_AG_LEV string\n",
89
+ " UNIT_NAME string\n",
90
+ " DefaultSelection string\n",
91
+ " CA_Ecoregion_Acres float32\n",
92
+ " CA_Region_Acres float32\n",
93
+ " CA_County_Acres float32\n",
94
+ " Acres float32\n",
95
+ " CA_Marine_Acres float32\n",
96
+ " Release_Year int16\n",
97
+ " mgmt_stack string\n",
98
+ " geom geospatial:geometry\n",
99
+ " SHAPE_bbox xmin: float32\n",
100
+ " ymin: float32\n",
101
+ " xmax: float32\n",
102
+ " ymax: float32"
103
+ ]
104
+ },
105
+ "execution_count": 21,
106
+ "metadata": {},
107
+ "output_type": "execute_result"
108
  }
109
  ],
110
  "source": [
 
119
  " # .filter(_.UNIT_NAME == \"Angeles National Forest\")\n",
120
  " .filter(_.reGAP < 3) \n",
121
  ")\n",
122
+ "conn.create_table(\"t1\", tbl.filter(_.Release_Year == 2024), overwrite = True)\n",
123
+ "conn.create_table(\"t2\", tbl.filter(_.Release_Year == 2023), overwrite = True)\n",
124
+ "\n"
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": 23,
130
+ "id": "303792ac-9b1d-41b2-a17b-5cf855d70633",
131
+ "metadata": {},
132
+ "outputs": [],
133
+ "source": [
134
+ "ca2024 = conn.table(\"t1\").execute()\n",
135
+ "ca2023 = conn.table(\"t2\").execute()\n",
136
+ "\n",
137
+ "import leafmap.maplibregl as leafmap\n",
138
+ "m = leafmap.Map()\n",
139
+ "m.add_gdf(ca2024, name = \"2024\")\n",
140
+ "m.add_gdf(ca2023, name =\"2023\")\n",
141
+ "\n",
142
+ "m"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": null,
148
+ "id": "9fdd2ed2-623f-479c-b0b7-7c723f3f6728",
149
+ "metadata": {},
150
+ "outputs": [],
151
+ "source": [
152
  "\n",
153
  "conn.disconnect()"
154
  ]
155
  },
156
  {
157
  "cell_type": "code",
158
+ "execution_count": 2,
159
  "id": "121c3cad-680c-4f3a-9075-638711ea1634",
160
  "metadata": {},
161
  "outputs": [
162
  {
163
  "data": {
164
  "application/vnd.jupyter.widget-view+json": {
165
+ "model_id": "c69a096d24974e9ea8ad3d5b937b723a",
166
  "version_major": 2,
167
  "version_minor": 0
168
  },
 
177
  "name": "stdout",
178
  "output_type": "stream",
179
  "text": [
180
+ "CPU times: user 22min 10s, sys: 43 s, total: 22min 53s\n",
181
+ "Wall time: 11min 47s\n"
182
  ]
183
  }
184
  ],
185
  "source": [
186
  "%%time\n",
187
+ "\n",
188
+ "## RUN this on a machine with a whole lot of RAM. consider filtering federal/non-federal first.\n",
189
  "import duckdb\n",
190
  "db = duckdb.connect(\"tmp2\")\n",
191
  "db.install_extension(\"spatial\")\n",
 
207
  },
208
  {
209
  "cell_type": "code",
210
+ "execution_count": 7,
211
+ "id": "cf3d3e5b-2ff1-4ef9-a147-01f15e970e49",
212
  "metadata": {},
213
  "outputs": [],
214
  "source": [
215
+ "## Cannot go straight to geoparquet due to M geometries!\n",
216
+ "#db.table(\"diff\").to_parquet(\"diff.parquet\")\n",
217
+ "\n",
218
+ "## This doesn't work either: \n",
219
+ "#db.sql('''CREATE OR REPLACE TABLE diff2024 AS SELECT *, st_force2d(geom) AS geom FROM diff''')\n",
220
+ "\n",
221
+ "## We could cast geom as blob...."
222
  ]
223
  },
224
  {
225
  "cell_type": "code",
226
+ "execution_count": 10,
227
+ "id": "308dc665-1323-4e9b-bc2d-69201c325c4b",
228
  "metadata": {},
229
  "outputs": [],
230
  "source": [
231
+ "# F*ck it. Let's do it all in RAM via geopandas, which drops M geoms due to a fortunate bug! \n",
232
  "conn = ibis.duckdb.connect(\"tmp2\", extensions=[\"spatial\"])\n",
233
+ "gdf = conn.table(\"diff\").mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n",
234
+ "gdf.to_parquet(\"ca2024_diffs.parquet\")\n"
235
  ]
236
  },
237
  {
238
  "cell_type": "code",
239
+ "execution_count": 19,
240
+ "id": "063a11d8-15d7-4b91-b67c-3ccae3edcc8d",
241
+ "metadata": {},
242
+ "outputs": [],
243
+ "source": [
244
+ "# stash in our team S3 storage \n",
245
+ "\n",
246
+ "import streamlit as st\n",
247
+ "from minio import Minio\n",
248
+ "import os\n",
249
+ "# Get signed URLs to access license-controlled layers\n",
250
+ "key = st.secrets[\"MINIO_KEY\"]\n",
251
+ "secret = st.secrets[\"MINIO_SECRET\"]\n",
252
+ "client = Minio(\"minio.carlboettiger.info\", key, secret, secure=True)\n",
253
+ "\n",
254
+ "size = os.path.getsize(\"ca2024_diffs.parquet\")\n",
255
+ "with open(\"ca2024_diffs.parquet\", \"rb\") as file_data:\n",
256
+ " client.put_object(\"public-biodiversity\", \"ca30x30/ca2024_diffs.parquet\", file_data, length = size)\n",
257
+ "\n",
258
+ "\n"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "code",
263
+ "execution_count": 26,
264
+ "id": "34425101-0592-42fd-9d62-22c9e7a6d6ac",
265
  "metadata": {},
266
  "outputs": [
267
  {
268
  "data": {
269
  "application/vnd.jupyter.widget-view+json": {
270
+ "model_id": "c5cc696c15374d519fb940e054a902f0",
271
  "version_major": 2,
272
  "version_minor": 1
273
  },
 
275
  "Map(height='600px', map_options={'bearing': 0, 'center': (0, 20), 'pitch': 0, 'style': 'https://basemaps.carto…"
276
  ]
277
  },
278
+ "execution_count": 26,
279
  "metadata": {},
280
  "output_type": "execute_result"
281
  }
282
  ],
283
  "source": [
284
+ "# can read from S3 and plot the whole thing. Note gdf has no metadata.\n",
285
+ "\n",
286
  "import leafmap.maplibregl as leafmap\n",
287
+ "import ibis\n",
288
+ "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
289
+ "gdf = conn.read_parquet(\"https://minio.carlboettiger.info/public-biodiversity/ca30x30/ca2024_diffs.parquet\").execute()\n",
290
  "m = leafmap.Map()\n",
291
  "m.add_gdf(gdf)\n",
292
+ "#m.to_html(\"ca2024.html\")\n",
293
+ "m"
294
+ ]
295
+ },
296
+ {
297
+ "cell_type": "code",
298
+ "execution_count": 25,
299
+ "id": "17af39a0-9a69-4bfa-9084-d7b26adf74fc",
300
+ "metadata": {},
301
+ "outputs": [],
302
+ "source": [
303
+ "path = \"ca2024.html\"\n",
304
+ "size = os.path.getsize(path)\n",
305
+ "with open(path, \"rb\") as file_data:\n",
306
+ " client.put_object(\"public-biodiversity\", \"ca30x30/\"+path, file_data, length = size)\n",
307
+ "\n",
308
+ "\n"
309
  ]
310
  },
311
  {
 
315
  "metadata": {},
316
  "outputs": [],
317
  "source": [
318
+ "# \"TO 'new2024.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON', LAYER_CREATION_OPTIONS 'WRITE_BBOX=YES')\""
319
  ]
320
  }
321
  ],
preprocess.ipynb CHANGED
@@ -31,32 +31,63 @@
31
  "metadata": {},
32
  "outputs": [],
33
  "source": [
34
- "buffer = -0.00001\n",
35
  "\n",
36
  "tbl = (\n",
37
  " conn.read_parquet(ca_parquet)\n",
38
  " .cast({\"SHAPE\": \"geometry\"})\n",
39
  " .rename(geom = \"SHAPE\")\n",
40
- " # .filter(_.UNIT_NAME == \"Angeles National Forest\")\n",
41
  " .filter(_.reGAP < 3) \n",
42
  ")\n",
43
  "tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer))\n",
44
  "tbl_2024 = tbl.filter(_.Release_Year == 2024)\n",
45
  "intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))\n",
46
- "\n",
47
- "new2024 = intersects.select(\"OBJECTID\").mutate(established = 2024)"
48
  ]
49
  },
50
  {
51
  "cell_type": "code",
52
  "execution_count": 3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  "id": "275c171a-f82f-4ee8-991c-1e34eb83a33d",
54
  "metadata": {},
55
  "outputs": [
56
  {
57
  "data": {
58
  "application/vnd.jupyter.widget-view+json": {
59
- "model_id": "dd86bb91838d45aa87197fc49a3b2362",
60
  "version_major": 2,
61
  "version_minor": 0
62
  },
@@ -66,9 +97,21 @@
66
  },
67
  "metadata": {},
68
  "output_type": "display_data"
 
 
 
 
 
 
 
 
69
  }
70
  ],
71
  "source": [
 
 
 
 
72
  "ca = (conn\n",
73
  " .read_parquet(ca_parquet)\n",
74
  " .cast({\"SHAPE\": \"geometry\"})\n",
@@ -78,12 +121,61 @@
78
  " .left_join(new2024, \"OBJECTID\")\n",
79
  " .mutate(established=_.established.fill_null(2023))\n",
80
  " .mutate(geom = _.SHAPE.convert(\"epsg:3310\",\"epsg:4326\"))\n",
81
- " .rename(name = \"UNIT_NAME\", access_type = \"ACCESS_TYP\", manager = \"MNG_AGNCY\",\n",
82
- " manager_type = \"MNG_AG_LEV\", id = \"OBJECTID\", type = \"TYPE\")\n",
83
  " .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,\n",
84
  " _.Easement, _.Acres, _.id, _.type, _.geom)\n",
85
  " )\n",
86
- "ca.execute().to_parquet(\"ca2024.parquet\")"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  ]
88
  },
89
  {
@@ -111,14 +203,6 @@
111
  "ca2024 = conn.read_parquet(\"ca2024.parquet\")"
112
  ]
113
  },
114
- {
115
- "cell_type": "markdown",
116
- "id": "ebbb2650-4442-4e54-8467-5e681d6fab9e",
117
- "metadata": {},
118
- "source": [
119
- "Using difference (overlap) instead:"
120
- ]
121
- },
122
  {
123
  "cell_type": "code",
124
  "execution_count": 6,
@@ -156,88 +240,82 @@
156
  " <th>0</th>\n",
157
  " <td>California Department of Fish and Wildlife</td>\n",
158
  " <td>State</td>\n",
159
- " <td>42086.259379</td>\n",
160
  " </tr>\n",
161
  " <tr>\n",
162
  " <th>1</th>\n",
163
  " <td>California Department of Parks and Recreation</td>\n",
164
  " <td>State</td>\n",
165
- " <td>17931.321473</td>\n",
166
  " </tr>\n",
167
  " <tr>\n",
168
  " <th>2</th>\n",
169
  " <td>California Tahoe Conservancy</td>\n",
170
  " <td>State</td>\n",
171
- " <td>4803.250929</td>\n",
172
  " </tr>\n",
173
  " <tr>\n",
174
  " <th>3</th>\n",
175
  " <td>California Department of Water Resources</td>\n",
176
  " <td>State</td>\n",
177
- " <td>2248.610289</td>\n",
178
  " </tr>\n",
179
  " <tr>\n",
180
  " <th>4</th>\n",
181
- " <td>University of California</td>\n",
182
  " <td>State</td>\n",
183
- " <td>1860.854444</td>\n",
184
  " </tr>\n",
185
  " <tr>\n",
186
  " <th>5</th>\n",
187
- " <td>California Department of Forestry and Fire Pro...</td>\n",
188
  " <td>State</td>\n",
189
- " <td>1089.029581</td>\n",
190
  " </tr>\n",
191
  " <tr>\n",
192
  " <th>6</th>\n",
193
- " <td>Coachella Valley Mountains Conservancy</td>\n",
194
  " <td>State</td>\n",
195
- " <td>119.907070</td>\n",
196
  " </tr>\n",
197
  " <tr>\n",
198
  " <th>7</th>\n",
199
- " <td>California State Lands Commission</td>\n",
200
  " <td>State</td>\n",
201
- " <td>109.016475</td>\n",
202
  " </tr>\n",
203
  " <tr>\n",
204
  " <th>8</th>\n",
205
- " <td>California State Coastal Conservancy</td>\n",
206
  " <td>State</td>\n",
207
- " <td>97.314705</td>\n",
208
  " </tr>\n",
209
  " <tr>\n",
210
  " <th>9</th>\n",
211
- " <td>California State University Sonoma</td>\n",
212
  " <td>State</td>\n",
213
- " <td>38.760956</td>\n",
214
  " </tr>\n",
215
  " <tr>\n",
216
  " <th>10</th>\n",
217
  " <td>Other State</td>\n",
218
  " <td>State</td>\n",
219
- " <td>32.927882</td>\n",
220
  " </tr>\n",
221
  " <tr>\n",
222
  " <th>11</th>\n",
223
- " <td>San Joaquin River Conservancy</td>\n",
224
  " <td>State</td>\n",
225
- " <td>1.531470</td>\n",
226
  " </tr>\n",
227
  " <tr>\n",
228
  " <th>12</th>\n",
229
- " <td>California Department of Transportation</td>\n",
230
  " <td>State</td>\n",
231
- " <td>1.261433</td>\n",
232
  " </tr>\n",
233
  " <tr>\n",
234
  " <th>13</th>\n",
235
- " <td>Unknown</td>\n",
236
- " <td>State</td>\n",
237
- " <td>0.255531</td>\n",
238
- " </tr>\n",
239
- " <tr>\n",
240
- " <th>14</th>\n",
241
  " <td>California State University</td>\n",
242
  " <td>State</td>\n",
243
  " <td>0.021589</td>\n",
@@ -252,34 +330,32 @@
252
  "1 California Department of Parks and Recreation State \n",
253
  "2 California Tahoe Conservancy State \n",
254
  "3 California Department of Water Resources State \n",
255
- "4 University of California State \n",
256
- "5 California Department of Forestry and Fire Pro... State \n",
257
- "6 Coachella Valley Mountains Conservancy State \n",
258
- "7 California State Lands Commission State \n",
259
- "8 California State Coastal Conservancy State \n",
260
- "9 California State University Sonoma State \n",
261
  "10 Other State State \n",
262
- "11 San Joaquin River Conservancy State \n",
263
- "12 California Department of Transportation State \n",
264
- "13 Unknown State \n",
265
- "14 California State University State \n",
266
  "\n",
267
  " area \n",
268
- "0 42086.259379 \n",
269
- "1 17931.321473 \n",
270
- "2 4803.250929 \n",
271
- "3 2248.610289 \n",
272
- "4 1860.854444 \n",
273
- "5 1089.029581 \n",
274
- "6 119.907070 \n",
275
- "7 109.016475 \n",
276
- "8 97.314705 \n",
277
- "9 38.760956 \n",
278
- "10 32.927882 \n",
279
- "11 1.531470 \n",
280
- "12 1.261433 \n",
281
- "13 0.255531 \n",
282
- "14 0.021589 "
283
  ]
284
  },
285
  "execution_count": 6,
@@ -302,40 +378,26 @@
302
  },
303
  {
304
  "cell_type": "code",
305
- "execution_count": 7,
306
  "id": "c62854f6-1456-4207-8c69-53af17970102",
307
  "metadata": {},
308
- "outputs": [
309
- {
310
- "data": {
311
- "application/vnd.jupyter.widget-view+json": {
312
- "model_id": "10329a95c7b84de4b598f0ccf4c6af20",
313
- "version_major": 2,
314
- "version_minor": 1
315
- },
316
- "text/plain": [
317
- "Map(height='600px', map_options={'bearing': 0, 'center': (0, 20), 'pitch': 0, 'style': 'https://basemaps.carto…"
318
- ]
319
- },
320
- "execution_count": 7,
321
- "metadata": {},
322
- "output_type": "execute_result"
323
- }
324
- ],
325
  "source": [
326
- "gdf = ca2024.filter(_.manager == \"California Department of Parks and Recreation\", _.established== 2024).execute()\n",
327
  "\n",
 
328
  "established = {'property': 'established',\n",
329
  " 'type': 'categorical',\n",
330
  " 'stops': [\n",
331
  " [2023, \"#26542C80\"], \n",
332
- " [2024, \"#F3AB3D80\"]]\n",
333
- " }\n",
334
  "paint = {\"fill-color\": established}\n",
335
  "\n",
 
336
  "m = leafmap.Map(style=\"positron\")\n",
337
- "m.add_gdf(gdf,layer_type=\"fill\", name = \"CA 30x30\", paint = paint)\n",
 
338
  "m.add_layer_control()\n",
 
339
  "m"
340
  ]
341
  }
 
31
  "metadata": {},
32
  "outputs": [],
33
  "source": [
34
+ "buffer = -2\n",
35
  "\n",
36
  "tbl = (\n",
37
  " conn.read_parquet(ca_parquet)\n",
38
  " .cast({\"SHAPE\": \"geometry\"})\n",
39
  " .rename(geom = \"SHAPE\")\n",
40
+ "# .filter(_.UNIT_NAME == \"Angeles National Forest\")\n",
41
  " .filter(_.reGAP < 3) \n",
42
  ")\n",
43
  "tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer))\n",
44
  "tbl_2024 = tbl.filter(_.Release_Year == 2024)\n",
45
  "intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))\n",
46
+ "\n"
 
47
  ]
48
  },
49
  {
50
  "cell_type": "code",
51
  "execution_count": 3,
52
+ "id": "a0b75637-e015-4be4-86e1-c9757ac43d0f",
53
+ "metadata": {},
54
+ "outputs": [],
55
+ "source": [
56
+ "## Testing, run only on subset data\n",
57
+ "if False:\n",
58
+ " gdf = intersects.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n",
59
+ " gdf_2023 = tbl_2023.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n",
60
+ " gdf_2024 = tbl_2024.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n",
61
+ " # gdf = ca2024\n",
62
+ " established = {'property': 'established',\n",
63
+ " 'type': 'categorical',\n",
64
+ " 'stops': [\n",
65
+ " [2023, \"#26542C80\"], \n",
66
+ " [2024, \"#F3AB3D80\"]]\n",
67
+ " }\n",
68
+ " inter = {\"fill-color\": \"#F3AB3D\"}\n",
69
+ " p2024 = {\"fill-color\": \"#26542C\"}\n",
70
+ " p2023 = {\"fill-color\": \"#8B0A1A\"}\n",
71
+ " \n",
72
+ " m = leafmap.Map(style=\"positron\")\n",
73
+ " m.add_gdf(gdf_2024,layer_type=\"fill\", name = \"2024\", paint = p2024)\n",
74
+ " m.add_gdf(gdf_2023,layer_type=\"fill\", name = \"2023\", paint = p2023)\n",
75
+ " m.add_gdf(gdf,layer_type=\"fill\", name = \"intersects\", paint = inter)\n",
76
+ " \n",
77
+ " m.add_layer_control()\n",
78
+ " m"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 4,
84
  "id": "275c171a-f82f-4ee8-991c-1e34eb83a33d",
85
  "metadata": {},
86
  "outputs": [
87
  {
88
  "data": {
89
  "application/vnd.jupyter.widget-view+json": {
90
+ "model_id": "f1616caa5ca54678a00caa974721de2b",
91
  "version_major": 2,
92
  "version_minor": 0
93
  },
 
97
  },
98
  "metadata": {},
99
  "output_type": "display_data"
100
+ },
101
+ {
102
+ "name": "stdout",
103
+ "output_type": "stream",
104
+ "text": [
105
+ "CPU times: user 55min 28s, sys: 2.94 s, total: 55min 31s\n",
106
+ "Wall time: 22min 6s\n"
107
+ ]
108
  }
109
  ],
110
  "source": [
111
+ "%%time\n",
112
+ "\n",
113
+ "new2024 = intersects.select(\"OBJECTID\").mutate(established = 2024)\n",
114
+ "\n",
115
  "ca = (conn\n",
116
  " .read_parquet(ca_parquet)\n",
117
  " .cast({\"SHAPE\": \"geometry\"})\n",
 
121
  " .left_join(new2024, \"OBJECTID\")\n",
122
  " .mutate(established=_.established.fill_null(2023))\n",
123
  " .mutate(geom = _.SHAPE.convert(\"epsg:3310\",\"epsg:4326\"))\n",
124
+ " .rename(name = \"cpad_PARK_NAME\", access_type = \"cpad_ACCESS_TYP\", manager = \"cpad_MNG_AGENCY\",\n",
125
+ " manager_type = \"cpad_MNG_AG_LEV\", id = \"OBJECTID\", type = \"TYPE\")\n",
126
  " .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,\n",
127
  " _.Easement, _.Acres, _.id, _.type, _.geom)\n",
128
  " )\n",
129
+ "ca2024 = ca.execute()\n",
130
+ "\n",
131
+ "ca2024.to_parquet(\"ca2024.parquet\")"
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "execution_count": 1,
137
+ "id": "cfac7aa4-e418-4d7c-91e0-04ff8eae804c",
138
+ "metadata": {},
139
+ "outputs": [
140
+ {
141
+ "name": "stdout",
142
+ "output_type": "stream",
143
+ "text": [
144
+ "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
145
+ "Token is valid (permission: write).\n",
146
+ "Your token has been saved to /home/jovyan/.cache/huggingface/token\n",
147
+ "Login successful\n"
148
+ ]
149
+ },
150
+ {
151
+ "data": {
152
+ "application/vnd.jupyter.widget-view+json": {
153
+ "model_id": "9e680dc3991c4c9b808447f04d056f53",
154
+ "version_major": 2,
155
+ "version_minor": 0
156
+ },
157
+ "text/plain": [
158
+ "ca2024.parquet: 0%| | 0.00/137M [00:00<?, ?B/s]"
159
+ ]
160
+ },
161
+ "metadata": {},
162
+ "output_type": "display_data"
163
+ }
164
+ ],
165
+ "source": [
166
+ "## Upload to Huggingface\n",
167
+ "# https://huggingface.co/datasets/boettiger-lab/ca-30x30/\n",
168
+ "\n",
169
+ "from huggingface_hub import HfApi, login\n",
170
+ "import streamlit as st\n",
171
+ "login(st.secrets[\"HF_TOKEN\"])\n",
172
+ "api = HfApi()\n",
173
+ "info = api.upload_file(\n",
174
+ " path_or_fileobj=\"ca2024.parquet\",\n",
175
+ " path_in_repo=\"ca2024.parquet\",\n",
176
+ " repo_id=\"boettiger-lab/ca-30x30\",\n",
177
+ " repo_type=\"dataset\",\n",
178
+ " )\n"
179
  ]
180
  },
181
  {
 
203
  "ca2024 = conn.read_parquet(\"ca2024.parquet\")"
204
  ]
205
  },
 
 
 
 
 
 
 
 
206
  {
207
  "cell_type": "code",
208
  "execution_count": 6,
 
240
  " <th>0</th>\n",
241
  " <td>California Department of Fish and Wildlife</td>\n",
242
  " <td>State</td>\n",
243
+ " <td>54853.556568</td>\n",
244
  " </tr>\n",
245
  " <tr>\n",
246
  " <th>1</th>\n",
247
  " <td>California Department of Parks and Recreation</td>\n",
248
  " <td>State</td>\n",
249
+ " <td>21439.451269</td>\n",
250
  " </tr>\n",
251
  " <tr>\n",
252
  " <th>2</th>\n",
253
  " <td>California Tahoe Conservancy</td>\n",
254
  " <td>State</td>\n",
255
+ " <td>6119.753048</td>\n",
256
  " </tr>\n",
257
  " <tr>\n",
258
  " <th>3</th>\n",
259
  " <td>California Department of Water Resources</td>\n",
260
  " <td>State</td>\n",
261
+ " <td>4033.217739</td>\n",
262
  " </tr>\n",
263
  " <tr>\n",
264
  " <th>4</th>\n",
265
+ " <td>California State University Sonoma</td>\n",
266
  " <td>State</td>\n",
267
+ " <td>3842.054169</td>\n",
268
  " </tr>\n",
269
  " <tr>\n",
270
  " <th>5</th>\n",
271
+ " <td>University of California</td>\n",
272
  " <td>State</td>\n",
273
+ " <td>2050.549176</td>\n",
274
  " </tr>\n",
275
  " <tr>\n",
276
  " <th>6</th>\n",
277
+ " <td>California Department of Forestry and Fire Pro...</td>\n",
278
  " <td>State</td>\n",
279
+ " <td>1212.712394</td>\n",
280
  " </tr>\n",
281
  " <tr>\n",
282
  " <th>7</th>\n",
283
+ " <td>Coachella Valley Mountains Conservancy</td>\n",
284
  " <td>State</td>\n",
285
+ " <td>167.224090</td>\n",
286
  " </tr>\n",
287
  " <tr>\n",
288
  " <th>8</th>\n",
289
+ " <td>California State Lands Commission</td>\n",
290
  " <td>State</td>\n",
291
+ " <td>113.344073</td>\n",
292
  " </tr>\n",
293
  " <tr>\n",
294
  " <th>9</th>\n",
295
+ " <td>California State Coastal Conservancy</td>\n",
296
  " <td>State</td>\n",
297
+ " <td>97.314705</td>\n",
298
  " </tr>\n",
299
  " <tr>\n",
300
  " <th>10</th>\n",
301
  " <td>Other State</td>\n",
302
  " <td>State</td>\n",
303
+ " <td>55.542241</td>\n",
304
  " </tr>\n",
305
  " <tr>\n",
306
  " <th>11</th>\n",
307
+ " <td>California Department of Transportation</td>\n",
308
  " <td>State</td>\n",
309
+ " <td>1.957490</td>\n",
310
  " </tr>\n",
311
  " <tr>\n",
312
  " <th>12</th>\n",
313
+ " <td>San Joaquin River Conservancy</td>\n",
314
  " <td>State</td>\n",
315
+ " <td>1.531470</td>\n",
316
  " </tr>\n",
317
  " <tr>\n",
318
  " <th>13</th>\n",
 
 
 
 
 
 
319
  " <td>California State University</td>\n",
320
  " <td>State</td>\n",
321
  " <td>0.021589</td>\n",
 
330
  "1 California Department of Parks and Recreation State \n",
331
  "2 California Tahoe Conservancy State \n",
332
  "3 California Department of Water Resources State \n",
333
+ "4 California State University Sonoma State \n",
334
+ "5 University of California State \n",
335
+ "6 California Department of Forestry and Fire Pro... State \n",
336
+ "7 Coachella Valley Mountains Conservancy State \n",
337
+ "8 California State Lands Commission State \n",
338
+ "9 California State Coastal Conservancy State \n",
339
  "10 Other State State \n",
340
+ "11 California Department of Transportation State \n",
341
+ "12 San Joaquin River Conservancy State \n",
342
+ "13 California State University State \n",
 
343
  "\n",
344
  " area \n",
345
+ "0 54853.556568 \n",
346
+ "1 21439.451269 \n",
347
+ "2 6119.753048 \n",
348
+ "3 4033.217739 \n",
349
+ "4 3842.054169 \n",
350
+ "5 2050.549176 \n",
351
+ "6 1212.712394 \n",
352
+ "7 167.224090 \n",
353
+ "8 113.344073 \n",
354
+ "9 97.314705 \n",
355
+ "10 55.542241 \n",
356
+ "11 1.957490 \n",
357
+ "12 1.531470 \n",
358
+ "13 0.021589 "
 
359
  ]
360
  },
361
  "execution_count": 6,
 
378
  },
379
  {
380
  "cell_type": "code",
381
+ "execution_count": null,
382
  "id": "c62854f6-1456-4207-8c69-53af17970102",
383
  "metadata": {},
384
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  "source": [
 
386
  "\n",
387
+ "gdf = ca2024.execute()\n",
388
  "established = {'property': 'established',\n",
389
  " 'type': 'categorical',\n",
390
  " 'stops': [\n",
391
  " [2023, \"#26542C80\"], \n",
392
+ " [2024, \"#F3AB3D80\"]]}\n",
 
393
  "paint = {\"fill-color\": established}\n",
394
  "\n",
395
+ "\n",
396
  "m = leafmap.Map(style=\"positron\")\n",
397
+ "m.add_gdf(gdf,layer_type=\"fill\", name = \"intersects\", paint = paint)\n",
398
+ "\n",
399
  "m.add_layer_control()\n",
400
+ "m.to_html(\"ca2024.html\")\n",
401
  "m"
402
  ]
403
  }