cassiebuhler commited on
Commit
5ee179c
·
1 Parent(s): 0c88eb4

zonal stats

Browse files
Files changed (1) hide show
  1. get_zonal_stats.ipynb +1052 -0
get_zonal_stats.ipynb ADDED
@@ -0,0 +1,1052 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "39bf1de3-cba6-475a-a988-ad48e5af4a04",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Get zonal stats "
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 2,
14
+ "id": "ba047a55-642d-4c27-a367-5f35f4406218",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "import ibis\n",
19
+ "import ibis.selectors as s\n",
20
+ "from ibis import _\n",
21
+ "import fiona\n",
22
+ "import geopandas as gpd\n",
23
+ "import rioxarray\n",
24
+ "from shapely.geometry import box\n",
25
+ "\n",
26
+ "import rasterio\n",
27
+ "from rasterio.mask import mask\n",
28
+ "from rasterstats import zonal_stats\n",
29
+ "import pandas as pd\n",
30
+ "from joblib import Parallel, delayed\n",
31
+ "\n",
32
+ "con = ibis.duckdb.connect()\n",
33
+ "con.load_extension(\"spatial\")\n",
34
+ "threads = -1"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 19,
40
+ "id": "8b5656db-2d1d-4ca8-826d-7588126e52e8",
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "# cropping US data to only CA \n",
45
+ "def crop_raster_to_bounds(tif_file, vector_gdf):\n",
46
+ " with rasterio.open(tif_file) as src:\n",
47
+ " # Get California's bounding box in the same CRS as the raster\n",
48
+ " california_bounds = vector_gdf.total_bounds\n",
49
+ " california_bounds = rasterio.coords.BoundingBox(\n",
50
+ " *california_bounds\n",
51
+ " )\n",
52
+ " # Crop the raster to the California bounding box\n",
53
+ " out_image, out_transform = mask(src, [california_bounds], crop=True)\n",
54
+ " out_meta = src.meta.copy()\n",
55
+ " out_meta.update({\n",
56
+ " \"driver\": \"GTiff\",\n",
57
+ " \"height\": out_image.shape[1],\n",
58
+ " \"width\": out_image.shape[2],\n",
59
+ " \"transform\": out_transform\n",
60
+ " })\n",
61
+ " print(\"Unique values in cropped raster:\", np.unique(out_image))\n",
62
+ "\n",
63
+ " return out_image, out_meta\n"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 20,
69
+ "id": "9a0e3446-16ac-40b0-9e34-db0157038c5a",
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "def big_zonal_stats(vec_file, tif_file, stats, col_name, n_jobs, verbose=10, timeout=10000):\n",
74
+ " gdf = gpd.read_parquet(vec_file)\n",
75
+ " if gdf.crs is None:\n",
76
+ " gdf = gdf.set_crs(\"EPSG:4326\")\n",
77
+ " gdf = gdf.rename(columns={\"geom\": \"geometry\"})\n",
78
+ " gdf = gdf.set_geometry(\"geometry\")\n",
79
+ " gdf = gdf[gdf[\"geometry\"].notna()].copy()\n",
80
+ "\n",
81
+ " with rasterio.open(tif_file) as src:\n",
82
+ " raster_crs = src.crs\n",
83
+ " gdf = gdf.to_crs(raster_crs) # Transform vector to raster CRS\n",
84
+ " \n",
85
+ " # CA bounding box + convert it to a polygon in raster CRS\n",
86
+ " california_polygon = box(*gdf.total_bounds)\n",
87
+ " \n",
88
+ " out_image, out_transform = mask(src, [california_polygon], crop=True, nodata=src.nodata)\n",
89
+ "\n",
90
+ " # If raster is 3D, select the first band\n",
91
+ " if out_image.ndim == 3:\n",
92
+ " out_image = out_image[0]\n",
93
+ "\n",
94
+ " # compute zonal statistics for each geometry slice\n",
95
+ " def get_stats(geom_slice):\n",
96
+ " geom = [geom_slice.geometry]\n",
97
+ " stats_result = zonal_stats(\n",
98
+ " geom, out_image, stats=stats, affine=out_transform, all_touched=True, nodata=src.nodata\n",
99
+ " )\n",
100
+ " return stats_result[0] if stats_result and stats_result[0].get(\"mean\") is not None else {'mean': None}\n",
101
+ "\n",
102
+ " output = [get_stats(row) for row in gdf.itertuples()]\n",
103
+ " gdf[col_name] = [res['mean'] for res in output]\n",
104
+ "\n",
105
+ " return gdf"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": 8,
111
+ "id": "ce66bae6-bac5-4837-9b01-fde16a00c303",
112
+ "metadata": {},
113
+ "outputs": [],
114
+ "source": [
115
+ "# aws s3 cp s3://vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif . --endpoint-url=https://data.source.coop\n",
116
+ "# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_bii_100m_cog.tif . --endpoint-url=https://data.source.coop\n",
117
+ "# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_fii_100m_cog.tif . --endpoint-url=https://data.source.coop\n",
118
+ "# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_expansion_100m_cog.tif . --endpoint-url=https://data.source.coop\n",
119
+ "# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_reduction_100m_cog.tif . --endpoint-url=https://data.source.coop\n",
120
+ "# aws s3 cp s3://cboettig/carbon/cogs/irrecoverable_c_total_2018.tif . --endpoint-url=https://data.source.coop\n",
121
+ "# aws s3 cp s3://cboettig/carbon/cogs/manageable_c_total_2018.tif . --endpoint-url=https://data.source.coop\n",
122
+ "# ! aws s3 cp s3://cboettig/justice40/disadvantaged-communities.parquet . --endpoint-url=https://data.source.coop\n",
123
+ "# minio/shared-biodiversity/redlist/cog/combined_sr_2022.tif\n",
124
+ "# /home/rstudio/minio/shared-biodiversity/redlist/cog/combined_rwr_2022.tif\n",
125
+ "# ! aws s3 cp s3://cboettig/social-vulnerability/svi2020_us_tract.parquet . --endpoint-url=https://data.source.coop\n"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "code",
130
+ "execution_count": 21,
131
+ "id": "66dec912-ad8a-41cf-a5c2-6ec9cc350984",
132
+ "metadata": {},
133
+ "outputs": [
134
+ {
135
+ "name": "stdout",
136
+ "output_type": "stream",
137
+ "text": [
138
+ "CPU times: user 1min 52s, sys: 5.01 s, total: 1min 57s\n",
139
+ "Wall time: 1min 57s\n"
140
+ ]
141
+ }
142
+ ],
143
+ "source": [
144
+ "%%time\n",
145
+ "tif_file = 'SpeciesRichness_All.tif'\n",
146
+ "vec_file = \"/home/rstudio/github/ca-30x30/ca2024-30m.parquet\"\n",
147
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"richness\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
148
+ ]
149
+ },
150
+ {
151
+ "cell_type": "code",
152
+ "execution_count": 22,
153
+ "id": "b081ec1a-ea91-485e-95f9-12cd06c2002a",
154
+ "metadata": {},
155
+ "outputs": [
156
+ {
157
+ "name": "stdout",
158
+ "output_type": "stream",
159
+ "text": [
160
+ "CPU times: user 1min 50s, sys: 4.47 s, total: 1min 54s\n",
161
+ "Wall time: 1min 54s\n"
162
+ ]
163
+ }
164
+ ],
165
+ "source": [
166
+ "%%time\n",
167
+ "tif_file = 'RSR_All.tif'\n",
168
+ "vec_file = './cpad-stats-temp.parquet'\n",
169
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'],\n",
170
+ " col_name = \"rsr\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")"
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "code",
175
+ "execution_count": 23,
176
+ "id": "33ac0fb7-2cde-448d-a634-1973e34ac14f",
177
+ "metadata": {},
178
+ "outputs": [
179
+ {
180
+ "name": "stderr",
181
+ "output_type": "stream",
182
+ "text": [
183
+ "/opt/venv/lib/python3.10/site-packages/rasterstats/io.py:335: NodataWarning: Setting nodata to -999; specify nodata explicitly\n",
184
+ " warnings.warn(\n"
185
+ ]
186
+ },
187
+ {
188
+ "name": "stdout",
189
+ "output_type": "stream",
190
+ "text": [
191
+ "CPU times: user 1min 58s, sys: 4.93 s, total: 2min 3s\n",
192
+ "Wall time: 2min 3s\n"
193
+ ]
194
+ }
195
+ ],
196
+ "source": [
197
+ "%%time\n",
198
+ "tif_file = 'deforest_carbon_100m_cog.tif'\n",
199
+ "vec_file = './cpad-stats-temp.parquet'\n",
200
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], \n",
201
+ " col_name = \"deforest_carbon\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": 24,
207
+ "id": "d2a8c10f-e94b-4eef-940f-2af9599edee1",
208
+ "metadata": {},
209
+ "outputs": [
210
+ {
211
+ "name": "stdout",
212
+ "output_type": "stream",
213
+ "text": [
214
+ "CPU times: user 1min 53s, sys: 4.81 s, total: 1min 58s\n",
215
+ "Wall time: 1min 58s\n"
216
+ ]
217
+ }
218
+ ],
219
+ "source": [
220
+ "%%time\n",
221
+ "tif_file = 'natcrop_bii_100m_cog.tif'\n",
222
+ "vec_file = './cpad-stats-temp.parquet'\n",
223
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], \n",
224
+ " col_name = \"biodiversity_intactness_loss\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
225
+ ]
226
+ },
227
+ {
228
+ "cell_type": "code",
229
+ "execution_count": 25,
230
+ "id": "1c318f39-7ca0-4f3c-80fb-73f72202e4e0",
231
+ "metadata": {},
232
+ "outputs": [
233
+ {
234
+ "name": "stdout",
235
+ "output_type": "stream",
236
+ "text": [
237
+ "CPU times: user 1min 53s, sys: 4.9 s, total: 1min 58s\n",
238
+ "Wall time: 1min 58s\n"
239
+ ]
240
+ }
241
+ ],
242
+ "source": [
243
+ "%%time\n",
244
+ "tif_file = 'natcrop_fii_100m_cog.tif'\n",
245
+ "vec_file = './cpad-stats-temp.parquet'\n",
246
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'],\n",
247
+ " col_name = \"forest_integrity_loss\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n",
248
+ "\n"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": 7,
254
+ "id": "aef9070a-c87a-463e-81b8-3cc6c5c9d484",
255
+ "metadata": {},
256
+ "outputs": [
257
+ {
258
+ "name": "stderr",
259
+ "output_type": "stream",
260
+ "text": [
261
+ "/opt/conda/lib/python3.12/site-packages/rasterstats/io.py:335: NodataWarning: Setting nodata to -999; specify nodata explicitly\n",
262
+ " warnings.warn(\n"
263
+ ]
264
+ },
265
+ {
266
+ "name": "stdout",
267
+ "output_type": "stream",
268
+ "text": [
269
+ "CPU times: user 3min 13s, sys: 55 s, total: 4min 8s\n",
270
+ "Wall time: 4min 8s\n"
271
+ ]
272
+ }
273
+ ],
274
+ "source": [
275
+ "%%time\n",
276
+ "tif_file = 'natcrop_expansion_100m_cog.tif'\n",
277
+ "vec_file = './cpad-stats-temp.parquet'\n",
278
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"crop_expansion\", n_jobs=threads, verbose=0)\n",
279
+ "gpd.GeoDataFrame(df, geometry=\"geometry\").to_parquet(\"cpad-stats-temp.parquet\")\n"
280
+ ]
281
+ },
282
+ {
283
+ "cell_type": "code",
284
+ "execution_count": null,
285
+ "id": "d94f937b-b32c-4de1-b4ac-93ce33f0919f",
286
+ "metadata": {},
287
+ "outputs": [],
288
+ "source": [
289
+ "%%time\n",
290
+ "tif_file = 'natcrop_reduction_100m_cog.tif'\n",
291
+ "vec_file = './cpad-stats-temp.parquet'\n",
292
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"crop_reduction\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
293
+ ]
294
+ },
295
+ {
296
+ "cell_type": "code",
297
+ "execution_count": null,
298
+ "id": "19c3e402-8712-450f-b3dd-af9d0c01689c",
299
+ "metadata": {},
300
+ "outputs": [],
301
+ "source": [
302
+ "%%time\n",
303
+ "tif_file = 'irrecoverable_c_total_2018.tif'\n",
304
+ "vec_file = './cpad-stats-temp.parquet'\n",
305
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"irrecoverable_carbon\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n",
306
+ "\n"
307
+ ]
308
+ },
309
+ {
310
+ "cell_type": "code",
311
+ "execution_count": null,
312
+ "id": "c55c777a-48ce-4403-a171-cfc0d2351df6",
313
+ "metadata": {},
314
+ "outputs": [],
315
+ "source": [
316
+ "%%time\n",
317
+ "tif_file = 'manageable_c_total_2018.tif'\n",
318
+ "vec_file = './cpad-stats-temp.parquet'\n",
319
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"manageable_carbon\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
320
+ ]
321
+ },
322
+ {
323
+ "cell_type": "code",
324
+ "execution_count": null,
325
+ "id": "2ce56a66-34e3-4f61-95ae-65d1f06bc468",
326
+ "metadata": {},
327
+ "outputs": [],
328
+ "source": [
329
+ "%%time\n",
330
+ "tif_file = 'combined_rwr_2022.tif'\n",
331
+ "vec_file = './cpad-stats-temp.parquet'\n",
332
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"all_species_rwr\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
333
+ ]
334
+ },
335
+ {
336
+ "cell_type": "code",
337
+ "execution_count": null,
338
+ "id": "a3260b3a-d2eb-4cda-afac-679b362d8b71",
339
+ "metadata": {},
340
+ "outputs": [],
341
+ "source": [
342
+ "%%time\n",
343
+ "tif_file = 'combined_sr_2022.tif'\n",
344
+ "vec_file = './cpad-stats-temp.parquet'\n",
345
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"all_species_richness\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
346
+ ]
347
+ },
348
+ {
349
+ "cell_type": "code",
350
+ "execution_count": null,
351
+ "id": "d5133f36-404e-4f6a-a90b-eb5f098e6f06",
352
+ "metadata": {},
353
+ "outputs": [],
354
+ "source": [
355
+ "%%time\n",
356
+ "tif_file = 'combined_sr_2022.tif'\n",
357
+ "vec_file = './cpad-stats-temp.parquet'\n",
358
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"all_species_richness\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
359
+ ]
360
+ },
361
+ {
362
+ "cell_type": "code",
363
+ "execution_count": 20,
364
+ "id": "6bdaba61-30c1-49d6-a4e6-db68f1daafa3",
365
+ "metadata": {},
366
+ "outputs": [
367
+ {
368
+ "name": "stdout",
369
+ "output_type": "stream",
370
+ "text": [
371
+ "CPU times: user 3min 16s, sys: 57 s, total: 4min 13s\n",
372
+ "Wall time: 4min 12s\n"
373
+ ]
374
+ }
375
+ ],
376
+ "source": [
377
+ "%%time\n",
378
+ "tif_file = 'hfp_2021_100m_v1-2_cog.tif'\n",
379
+ "vec_file = './cpad-stats-temp.parquet'\n",
380
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"human_impact\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
381
+ ]
382
+ },
383
+ {
384
+ "cell_type": "markdown",
385
+ "id": "f8e037d4-7a34-42bc-941f-0c09ee80ef3b",
386
+ "metadata": {},
387
+ "source": [
388
+ "# Convert vector to tif "
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": 24,
394
+ "id": "c4a19013-65f1-4eef-be2d-0cf1be3d0f7f",
395
+ "metadata": {},
396
+ "outputs": [],
397
+ "source": [
398
+ "import geopandas as gpd\n",
399
+ "import numpy as np\n",
400
+ "import rasterio\n",
401
+ "from rasterio.features import rasterize\n",
402
+ "from rasterio.transform import from_bounds\n",
403
+ "\n",
404
+ "def get_geotiff(gdf, output_file,col):\n",
405
+ " gdf = gdf.set_geometry(\"geometry\")\n",
406
+ " gdf = gdf.set_crs(\"EPSG:4326\")\n",
407
+ " print(gdf.crs)\n",
408
+ "\n",
409
+ " # Set raster properties\n",
410
+ " minx, miny, maxx, maxy = gdf.total_bounds # Get the bounds of the geometry\n",
411
+ " pixel_size = 0.01 # Define the pixel size in units of the CRS\n",
412
+ " width = int((maxx - minx) / pixel_size)\n",
413
+ " height = int((maxy - miny) / pixel_size)\n",
414
+ " transform = from_bounds(minx, miny, maxx, maxy, width, height)\n",
415
+ " \n",
416
+ " # Define rasterization with continuous values\n",
417
+ " shapes = ((geom, value) for geom, value in zip(gdf.geometry, gdf[col]))\n",
418
+ " raster = rasterize(\n",
419
+ " shapes,\n",
420
+ " out_shape=(height, width),\n",
421
+ " transform=transform,\n",
422
+ " fill=0.0, # Background value for areas outside the geometry\n",
423
+ " dtype=\"float32\" # Set data type to handle continuous values\n",
424
+ " )\n",
425
+ " print(\"Unique values in raster:\", np.unique(raster))\n",
426
+ "\n",
427
+ " # Define GeoTIFF metadata\n",
428
+ " out_meta = {\n",
429
+ " \"driver\": \"GTiff\",\n",
430
+ " \"height\": height,\n",
431
+ " \"width\": width,\n",
432
+ " \"count\": 1,\n",
433
+ " \"dtype\": raster.dtype,\n",
434
+ " \"crs\": gdf.crs,\n",
435
+ " \"transform\": transform,\n",
436
+ " \"compress\": \"deflate\" # Use compression to reduce file size\n",
437
+ " }\n",
438
+ " \n",
439
+ " # Write to a GeoTIFF file with COG options\n",
440
+ " with rasterio.open(output_file, \"w\", **out_meta) as dest:\n",
441
+ " dest.write(raster, 1)\n",
442
+ " dest.build_overviews([2, 4, 8, 16], rasterio.enums.Resampling.average)\n",
443
+ " dest.update_tags(1, TIFFTAG_RESOLUTION_UNIT=\"Meter\")\n"
444
+ ]
445
+ },
446
+ {
447
+ "cell_type": "code",
448
+ "execution_count": 25,
449
+ "id": "4e678f01-73af-4f99-a565-e9b7f04d9547",
450
+ "metadata": {},
451
+ "outputs": [],
452
+ "source": [
453
+ "# clean up SVI data\n",
454
+ "svi_df = (con\n",
455
+ " .read_parquet(\"svi2020_us_tract.parquet\")\n",
456
+ " .select(\"RPL_THEMES\",\"RPL_THEME1\",\"RPL_THEME2\",\"RPL_THEME3\",\"RPL_THEME4\",\"Shape\")\n",
457
+ " .rename(SVI = \"RPL_THEMES\", socioeconomic = \"RPL_THEME1\", \n",
458
+ " household_char = \"RPL_THEME2\", racial_ethnic_minority = \"RPL_THEME3\",\n",
459
+ " housing_transit = \"RPL_THEME4\", geometry = \"Shape\")\n",
460
+ ".cast({\"geometry\":\"geometry\"})\n",
461
+ ")\n",
462
+ "svi_df.execute().to_parquet(\"svi2020_us_tract_clean.parquet\")\n"
463
+ ]
464
+ },
465
+ {
466
+ "cell_type": "code",
467
+ "execution_count": 27,
468
+ "id": "c5046d6b-9798-46d3-a1bc-548e29414007",
469
+ "metadata": {},
470
+ "outputs": [
471
+ {
472
+ "name": "stdout",
473
+ "output_type": "stream",
474
+ "text": [
475
+ "EPSG:4326\n",
476
+ "Unique values in raster: [-9.990e+02 0.000e+00 1.000e-04 ... 9.998e-01 9.999e-01 1.000e+00]\n",
477
+ "EPSG:4326\n",
478
+ "Unique values in raster: [-9.990e+02 0.000e+00 4.000e-04 ... 9.998e-01 9.999e-01 1.000e+00]\n",
479
+ "EPSG:4326\n",
480
+ "Unique values in raster: [-9.990e+02 0.000e+00 3.000e-04 ... 9.998e-01 9.999e-01 1.000e+00]\n",
481
+ "EPSG:4326\n",
482
+ "Unique values in raster: [-9.990e+02 0.000e+00 2.400e-03 ... 9.943e-01 9.952e-01 9.959e-01]\n",
483
+ "EPSG:4326\n",
484
+ "Unique values in raster: [-9.990e+02 0.000e+00 9.000e-03 ... 9.998e-01 9.999e-01 1.000e+00]\n"
485
+ ]
486
+ }
487
+ ],
488
+ "source": [
489
+ "gdf = gpd.read_parquet(\"svi2020_us_tract_clean.parquet\")\n",
490
+ "svi = gdf[['SVI','geometry']]\n",
491
+ "socio = gdf[['socioeconomic','geometry']]\n",
492
+ "house = gdf[['household_char','geometry']]\n",
493
+ "minority = gdf[['racial_ethnic_minority','geometry']]\n",
494
+ "transit = gdf[['housing_transit','geometry']]\n",
495
+ "\n",
496
+ "#convert SVI parquet to tif\n",
497
+ "get_geotiff(svi,\"svi.tif\",\"SVI\")\n",
498
+ "get_geotiff(socio,\"svi_socioeconomic.tif\",\"socioeconomic\")\n",
499
+ "get_geotiff(house,\"svi_household.tif\",\"household_char\")\n",
500
+ "get_geotiff(minority,\"svi_minority.tif\",\"racial_ethnic_minority\")\n",
501
+ "get_geotiff(transit,\"svi_transit.tif\",\"housing_transit\")"
502
+ ]
503
+ },
504
+ {
505
+ "cell_type": "code",
506
+ "execution_count": 28,
507
+ "id": "6a36b77f-d0be-45bd-9318-da4b57eaf353",
508
+ "metadata": {},
509
+ "outputs": [
510
+ {
511
+ "name": "stdout",
512
+ "output_type": "stream",
513
+ "text": [
514
+ "CPU times: user 3min 26s, sys: 56.6 s, total: 4min 23s\n",
515
+ "Wall time: 4min 27s\n"
516
+ ]
517
+ }
518
+ ],
519
+ "source": [
520
+ "%%time\n",
521
+ "tif_file = 'svi.tif'\n",
522
+ "vec_file = './cpad-stats-temp.parquet'\n",
523
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"SVI\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n",
524
+ "\n"
525
+ ]
526
+ },
527
+ {
528
+ "cell_type": "code",
529
+ "execution_count": 29,
530
+ "id": "05ef74e2-3f23-4f69-8cd3-8862cb73a259",
531
+ "metadata": {},
532
+ "outputs": [
533
+ {
534
+ "name": "stdout",
535
+ "output_type": "stream",
536
+ "text": [
537
+ "CPU times: user 3min 22s, sys: 56.4 s, total: 4min 18s\n",
538
+ "Wall time: 4min 27s\n"
539
+ ]
540
+ }
541
+ ],
542
+ "source": [
543
+ "%%time\n",
544
+ "vec_file = './cpad-stats-temp.parquet'\n",
545
+ "tif_file = 'svi_socioeconomic.tif'\n",
546
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"socioeconomic_status\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n",
547
+ "\n"
548
+ ]
549
+ },
550
+ {
551
+ "cell_type": "code",
552
+ "execution_count": 30,
553
+ "id": "23417a03-38c2-4b31-8340-f08ec8a11631",
554
+ "metadata": {},
555
+ "outputs": [
556
+ {
557
+ "name": "stdout",
558
+ "output_type": "stream",
559
+ "text": [
560
+ "CPU times: user 3min 11s, sys: 54.3 s, total: 4min 5s\n",
561
+ "Wall time: 4min 5s\n"
562
+ ]
563
+ }
564
+ ],
565
+ "source": [
566
+ "%%time\n",
567
+ "vec_file = './cpad-stats-temp.parquet'\n",
568
+ "tif_file = 'svi_household.tif'\n",
569
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"household_char\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n",
570
+ "\n"
571
+ ]
572
+ },
573
+ {
574
+ "cell_type": "code",
575
+ "execution_count": 31,
576
+ "id": "de86d7f0-6cdc-4d05-bdee-d9803cbd83bd",
577
+ "metadata": {},
578
+ "outputs": [
579
+ {
580
+ "name": "stdout",
581
+ "output_type": "stream",
582
+ "text": [
583
+ "CPU times: user 3min 11s, sys: 54.1 s, total: 4min 5s\n",
584
+ "Wall time: 4min 5s\n"
585
+ ]
586
+ }
587
+ ],
588
+ "source": [
589
+ "%%time\n",
590
+ "vec_file = './cpad-stats-temp.parquet'\n",
591
+ "tif_file = 'svi_minority.tif'\n",
592
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"racial_ethnic_minority\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
593
+ ]
594
+ },
595
+ {
596
+ "cell_type": "code",
597
+ "execution_count": 32,
598
+ "id": "0c49dd50-7dd3-4240-9af8-3e32ec656bc0",
599
+ "metadata": {},
600
+ "outputs": [
601
+ {
602
+ "name": "stdout",
603
+ "output_type": "stream",
604
+ "text": [
605
+ "CPU times: user 3min 13s, sys: 54.7 s, total: 4min 8s\n",
606
+ "Wall time: 4min 7s\n"
607
+ ]
608
+ }
609
+ ],
610
+ "source": [
611
+ "%%time\n",
612
+ "vec_file = './cpad-stats-temp.parquet'\n",
613
+ "tif_file = 'svi_transit.tif'\n",
614
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"housing_transit\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
615
+ ]
616
+ },
617
+ {
618
+ "cell_type": "code",
619
+ "execution_count": 3,
620
+ "id": "2e4de199-82d4-4e2b-8572-6fe19b57d1ee",
621
+ "metadata": {},
622
+ "outputs": [
623
+ {
624
+ "data": {
625
+ "text/html": [
626
+ "<div>\n",
627
+ "<style scoped>\n",
628
+ " .dataframe tbody tr th:only-of-type {\n",
629
+ " vertical-align: middle;\n",
630
+ " }\n",
631
+ "\n",
632
+ " .dataframe tbody tr th {\n",
633
+ " vertical-align: top;\n",
634
+ " }\n",
635
+ "\n",
636
+ " .dataframe thead th {\n",
637
+ " text-align: right;\n",
638
+ " }\n",
639
+ "</style>\n",
640
+ "<table border=\"1\" class=\"dataframe\">\n",
641
+ " <thead>\n",
642
+ " <tr style=\"text-align: right;\">\n",
643
+ " <th></th>\n",
644
+ " <th>established</th>\n",
645
+ " <th>reGAP</th>\n",
646
+ " <th>name</th>\n",
647
+ " <th>access_type</th>\n",
648
+ " <th>manager</th>\n",
649
+ " <th>manager_type</th>\n",
650
+ " <th>Easement</th>\n",
651
+ " <th>Acres</th>\n",
652
+ " <th>id</th>\n",
653
+ " <th>type</th>\n",
654
+ " <th>...</th>\n",
655
+ " <th>all_species_rwr</th>\n",
656
+ " <th>all_species_richness</th>\n",
657
+ " <th>crop_expansion</th>\n",
658
+ " <th>human_impact</th>\n",
659
+ " <th>svi</th>\n",
660
+ " <th>svi_socioeconomic_status</th>\n",
661
+ " <th>svi_household_char</th>\n",
662
+ " <th>svi_racial_ethnic_minority</th>\n",
663
+ " <th>svi_housing_transit</th>\n",
664
+ " <th>geom</th>\n",
665
+ " </tr>\n",
666
+ " </thead>\n",
667
+ " <tbody>\n",
668
+ " <tr>\n",
669
+ " <th>0</th>\n",
670
+ " <td>2024</td>\n",
671
+ " <td>2</td>\n",
672
+ " <td>Six Rivers National Forest</td>\n",
673
+ " <td>Open Access</td>\n",
674
+ " <td>United States Forest Service</td>\n",
675
+ " <td>Federal</td>\n",
676
+ " <td>0</td>\n",
677
+ " <td>0.191763</td>\n",
678
+ " <td>100001</td>\n",
679
+ " <td>Land</td>\n",
680
+ " <td>...</td>\n",
681
+ " <td>0.355</td>\n",
682
+ " <td>346.0</td>\n",
683
+ " <td>0</td>\n",
684
+ " <td>2339.00</td>\n",
685
+ " <td>0.521</td>\n",
686
+ " <td>0.29</td>\n",
687
+ " <td>0.522</td>\n",
688
+ " <td>0.428</td>\n",
689
+ " <td>0.816</td>\n",
690
+ " <td>MULTIPOLYGON Z (((-123.94358 41.95869 0, -123....</td>\n",
691
+ " </tr>\n",
692
+ " <tr>\n",
693
+ " <th>1</th>\n",
694
+ " <td>2024</td>\n",
695
+ " <td>1</td>\n",
696
+ " <td>Six Rivers National Forest</td>\n",
697
+ " <td>Open Access</td>\n",
698
+ " <td>United States Forest Service</td>\n",
699
+ " <td>Federal</td>\n",
700
+ " <td>0</td>\n",
701
+ " <td>0.247565</td>\n",
702
+ " <td>100002</td>\n",
703
+ " <td>Land</td>\n",
704
+ " <td>...</td>\n",
705
+ " <td>0.355</td>\n",
706
+ " <td>346.0</td>\n",
707
+ " <td>0</td>\n",
708
+ " <td>870.50</td>\n",
709
+ " <td>0.521</td>\n",
710
+ " <td>0.29</td>\n",
711
+ " <td>0.522</td>\n",
712
+ " <td>0.428</td>\n",
713
+ " <td>0.816</td>\n",
714
+ " <td>MULTIPOLYGON Z (((-123.98793 41.94847 0, -123....</td>\n",
715
+ " </tr>\n",
716
+ " <tr>\n",
717
+ " <th>2</th>\n",
718
+ " <td>2024</td>\n",
719
+ " <td>1</td>\n",
720
+ " <td>Six Rivers National Forest</td>\n",
721
+ " <td>Open Access</td>\n",
722
+ " <td>United States Forest Service</td>\n",
723
+ " <td>Federal</td>\n",
724
+ " <td>0</td>\n",
725
+ " <td>1.046992</td>\n",
726
+ " <td>100003</td>\n",
727
+ " <td>Land</td>\n",
728
+ " <td>...</td>\n",
729
+ " <td>0.355</td>\n",
730
+ " <td>346.0</td>\n",
731
+ " <td>0</td>\n",
732
+ " <td>429.00</td>\n",
733
+ " <td>0.521</td>\n",
734
+ " <td>0.29</td>\n",
735
+ " <td>0.522</td>\n",
736
+ " <td>0.429</td>\n",
737
+ " <td>0.816</td>\n",
738
+ " <td>MULTIPOLYGON Z (((-123.87957 41.97172 0, -123....</td>\n",
739
+ " </tr>\n",
740
+ " <tr>\n",
741
+ " <th>3</th>\n",
742
+ " <td>2024</td>\n",
743
+ " <td>1</td>\n",
744
+ " <td>Six Rivers National Forest</td>\n",
745
+ " <td>Open Access</td>\n",
746
+ " <td>United States Forest Service</td>\n",
747
+ " <td>Federal</td>\n",
748
+ " <td>0</td>\n",
749
+ " <td>0.293964</td>\n",
750
+ " <td>100004</td>\n",
751
+ " <td>Land</td>\n",
752
+ " <td>...</td>\n",
753
+ " <td>0.355</td>\n",
754
+ " <td>346.0</td>\n",
755
+ " <td>0</td>\n",
756
+ " <td>3907.00</td>\n",
757
+ " <td>0.521</td>\n",
758
+ " <td>0.29</td>\n",
759
+ " <td>0.522</td>\n",
760
+ " <td>0.428</td>\n",
761
+ " <td>0.816</td>\n",
762
+ " <td>MULTIPOLYGON Z (((-123.84466 41.99139 0, -123....</td>\n",
763
+ " </tr>\n",
764
+ " <tr>\n",
765
+ " <th>4</th>\n",
766
+ " <td>2024</td>\n",
767
+ " <td>1</td>\n",
768
+ " <td>Six Rivers National Forest</td>\n",
769
+ " <td>Open Access</td>\n",
770
+ " <td>United States Forest Service</td>\n",
771
+ " <td>Federal</td>\n",
772
+ " <td>0</td>\n",
773
+ " <td>0.912564</td>\n",
774
+ " <td>100005</td>\n",
775
+ " <td>Land</td>\n",
776
+ " <td>...</td>\n",
777
+ " <td>0.355</td>\n",
778
+ " <td>346.0</td>\n",
779
+ " <td>0</td>\n",
780
+ " <td>698.25</td>\n",
781
+ " <td>0.521</td>\n",
782
+ " <td>0.29</td>\n",
783
+ " <td>0.522</td>\n",
784
+ " <td>0.428</td>\n",
785
+ " <td>0.816</td>\n",
786
+ " <td>MULTIPOLYGON Z (((-123.86194 41.98176 0, -123....</td>\n",
787
+ " </tr>\n",
788
+ " </tbody>\n",
789
+ "</table>\n",
790
+ "<p>5 rows × 28 columns</p>\n",
791
+ "</div>"
792
+ ],
793
+ "text/plain": [
794
+ " established reGAP name access_type \\\n",
795
+ "0 2024 2 Six Rivers National Forest Open Access \n",
796
+ "1 2024 1 Six Rivers National Forest Open Access \n",
797
+ "2 2024 1 Six Rivers National Forest Open Access \n",
798
+ "3 2024 1 Six Rivers National Forest Open Access \n",
799
+ "4 2024 1 Six Rivers National Forest Open Access \n",
800
+ "\n",
801
+ " manager manager_type Easement Acres id \\\n",
802
+ "0 United States Forest Service Federal 0 0.191763 100001 \n",
803
+ "1 United States Forest Service Federal 0 0.247565 100002 \n",
804
+ "2 United States Forest Service Federal 0 1.046992 100003 \n",
805
+ "3 United States Forest Service Federal 0 0.293964 100004 \n",
806
+ "4 United States Forest Service Federal 0 0.912564 100005 \n",
807
+ "\n",
808
+ " type ... all_species_rwr all_species_richness crop_expansion \\\n",
809
+ "0 Land ... 0.355 346.0 0 \n",
810
+ "1 Land ... 0.355 346.0 0 \n",
811
+ "2 Land ... 0.355 346.0 0 \n",
812
+ "3 Land ... 0.355 346.0 0 \n",
813
+ "4 Land ... 0.355 346.0 0 \n",
814
+ "\n",
815
+ " human_impact svi svi_socioeconomic_status svi_household_char \\\n",
816
+ "0 2339.00 0.521 0.29 0.522 \n",
817
+ "1 870.50 0.521 0.29 0.522 \n",
818
+ "2 429.00 0.521 0.29 0.522 \n",
819
+ "3 3907.00 0.521 0.29 0.522 \n",
820
+ "4 698.25 0.521 0.29 0.522 \n",
821
+ "\n",
822
+ " svi_racial_ethnic_minority svi_housing_transit \\\n",
823
+ "0 0.428 0.816 \n",
824
+ "1 0.428 0.816 \n",
825
+ "2 0.429 0.816 \n",
826
+ "3 0.428 0.816 \n",
827
+ "4 0.428 0.816 \n",
828
+ "\n",
829
+ " geom \n",
830
+ "0 MULTIPOLYGON Z (((-123.94358 41.95869 0, -123.... \n",
831
+ "1 MULTIPOLYGON Z (((-123.98793 41.94847 0, -123.... \n",
832
+ "2 MULTIPOLYGON Z (((-123.87957 41.97172 0, -123.... \n",
833
+ "3 MULTIPOLYGON Z (((-123.84466 41.99139 0, -123.... \n",
834
+ "4 MULTIPOLYGON Z (((-123.86194 41.98176 0, -123.... \n",
835
+ "\n",
836
+ "[5 rows x 28 columns]"
837
+ ]
838
+ },
839
+ "execution_count": 3,
840
+ "metadata": {},
841
+ "output_type": "execute_result"
842
+ }
843
+ ],
844
+ "source": [
845
+ "## clean up\n",
846
+ "\n",
847
+ "con = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
848
+ "ca_geom = con.read_parquet(\"ca2024-30m.parquet\").cast({\"geom\":\"geometry\"}).select(\"id\",\"geom\")\n",
849
+ "\n",
850
+ "\n",
851
+ "\n",
852
+ "ca = (con\n",
853
+ " .read_parquet(\"cpad-stats-temp.parquet\")\n",
854
+ " .mutate(richness = _.richness.round(3))\n",
855
+ " .mutate(rsr = _.rsr.round(3))\n",
856
+ " .mutate(deforest_carbon = _.deforest_carbon.round(3))\n",
857
+ " .mutate(biodiversity_intactness_loss = _.biodiversity_intactness_loss.round(3))\n",
858
+ " .mutate(forest_integrity_loss = _.forest_integrity_loss.round(3))\n",
859
+ " .cast({\"crop_expansion\": \"int64\"})\n",
860
+ " .cast({\"crop_reduction\": \"int64\"})\n",
861
+ " .cast({\"manageable_carbon\": \"int64\"})\n",
862
+ " .cast({\"irrecoverable_carbon\": \"int64\"})\n",
863
+ " .mutate(all_species_rwr = _.all_species_rwr.round(3))\n",
864
+ " .mutate(all_species_richness = _.all_species_richness.round(3))\n",
865
+ " .mutate(human_impact = _.human_impact.round(3))\n",
866
+ " .mutate(svi = _.SVI.round(3))\n",
867
+ " .mutate(svi_socioeconomic_status = _.socioeconomic_status.round(3))\n",
868
+ " .mutate(svi_household_char = _.household_char.round(3))\n",
869
+ " .mutate(svi_racial_ethnic_minority = _.racial_ethnic_minority.round(3))\n",
870
+ " .mutate(svi_housing_transit = _.housing_transit.round(3))\n",
871
+ " .drop(\"geometry\",\"__index_level_0__\")\n",
872
+ " # .rename(geom = \"geometry\")\n",
873
+ " # .cast({\"geom\":\"geometry\"})\n",
874
+ " # .mutate(geom=_.geom.convert('EPSG:3857', 'EPSG:4326'))\n",
875
+ " .join(ca_geom, \"id\", how = \"inner\")\n",
876
+ " .drop(\"SVI\", \"socioeconomic_status\",\"household_char\",\"racial_ethnic_minority\",\"housing_transit\" )\n",
877
+ "\n",
878
+ " )\n",
879
+ "# \n",
880
+ "ca.head(5).execute()\n"
881
+ ]
882
+ },
883
+ {
884
+ "cell_type": "markdown",
885
+ "id": "3780de2c-3a68-442c-bb3b-64c792418979",
886
+ "metadata": {},
887
+ "source": [
888
+ "# Save as PMTiles + Upload data"
889
+ ]
890
+ },
891
+ {
892
+ "cell_type": "code",
893
+ "execution_count": 4,
894
+ "id": "05c791c9-888a-483a-9dbb-a2ba7eb1bce2",
895
+ "metadata": {},
896
+ "outputs": [
897
+ {
898
+ "name": "stderr",
899
+ "output_type": "stream",
900
+ "text": [
901
+ "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
902
+ ]
903
+ }
904
+ ],
905
+ "source": [
906
+ "import subprocess\n",
907
+ "import os\n",
908
+ "from huggingface_hub import HfApi, login\n",
909
+ "import streamlit as st\n",
910
+ "\n",
911
+ "login(st.secrets[\"HF_TOKEN\"])\n",
912
+ "# api = HfApi(add_to_git_credential=False)\n",
913
+ "api = HfApi()\n",
914
+ "\n",
915
+ "def hf_upload(file, repo_id):\n",
916
+ " info = api.upload_file(\n",
917
+ " path_or_fileobj=file,\n",
918
+ " path_in_repo=file,\n",
919
+ " repo_id=repo_id,\n",
920
+ " repo_type=\"dataset\",\n",
921
+ " )\n",
922
+ "def generate_pmtiles(input_file, output_file, max_zoom=12):\n",
923
+ " # Ensure Tippecanoe is installed\n",
924
+ " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n",
925
+ " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n",
926
+ "\n",
927
+ " # Construct the Tippecanoe command\n",
928
+ " command = [\n",
929
+ " \"tippecanoe\",\n",
930
+ " \"-o\", output_file,\n",
931
+ " \"-zg\",\n",
932
+ " \"--extend-zooms-if-still-dropping\",\n",
933
+ " \"--force\",\n",
934
+ " \"--projection\", \"EPSG:4326\", \n",
935
+ " \"-L\",\"layer:\"+input_file,\n",
936
+ " ]\n",
937
+ " # Run Tippecanoe\n",
938
+ " try:\n",
939
+ " subprocess.run(command, check=True)\n",
940
+ " print(f\"Successfully generated PMTiles file: {output_file}\")\n",
941
+ " except subprocess.CalledProcessError as e:\n",
942
+ " print(f\"Error running Tippecanoe: {e}\")\n",
943
+ "\n"
944
+ ]
945
+ },
946
+ {
947
+ "cell_type": "code",
948
+ "execution_count": 5,
949
+ "id": "1f2d179d-6d47-4e84-83c6-7cb3d969fc00",
950
+ "metadata": {},
951
+ "outputs": [
952
+ {
953
+ "name": "stderr",
954
+ "output_type": "stream",
955
+ "text": [
956
+ "cpad-stats.geojson:6: ignoring dimensions beyond two: in JSON object [-123.94358428532209,41.95869046159588,0]\n",
957
+ "cpad-stats.geojson:6: ignoring dimensions beyond two: in JSON object {\"type\":\"Feature\",\"properties\":{\"established\":2024,\"reGAP\":2,\"name\":\"Six Rivers National Forest\",\"access_type\":\"Open Access\",\"manager\":\"United States Forest Service\",\"manager_type\":\"Federal\",\"Easement\":0,\"Acres\":0.19176257,\"id\":100001,\"type\":\"Land\",\"richness\":4,\"rsr\":0.007,\"deforest_carbon\":0,\"biodiversity_intactness_loss\":0,\"forest_integrity_loss\":0,\"crop_reduction\":0,\"irrecoverable_carbon\":4,\"manageable_carbon\":85,\"all_species_rwr\":0.355,\"all_species_richness\":346,\"crop_expansion\":0,\"human_...\n",
958
+ "81196 features, 78827308 bytes of geometry and attributes, 2702235 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes\n",
959
+ "Choosing a maxzoom of -z10 for features typically 1205 feet (368 meters) apart, and at least 78 feet (24 meters) apart\n",
960
+ "Choosing a maxzoom of -z13 for resolution of about 39 feet (11 meters) within features\n",
961
+ "tile 4/2/6 size is 801307 with detail 12, >500000 \n",
962
+ "tile 5/5/12 size is 1313015 with detail 12, >500000 \n",
963
+ "tile 5/5/12 size is 780845 with detail 11, >500000 \n",
964
+ "tile 6/11/25 size is 603643 with detail 12, >500000 \n",
965
+ "tile 6/10/24 size is 1087317 with detail 12, >500000 \n",
966
+ "tile 6/10/24 size is 742154 with detail 11, >500000 \n",
967
+ "tile 7/20/49 size is 704536 with detail 12, >500000 \n",
968
+ "tile 7/20/49 size is 504066 with detail 11, >500000 \n",
969
+ "tile 7/22/51 size is 655744 with detail 12, >500000 \n",
970
+ " 99.9% 13/1332/3068 \n",
971
+ " 100.0% 13/1278/3041 \r"
972
+ ]
973
+ },
974
+ {
975
+ "name": "stdout",
976
+ "output_type": "stream",
977
+ "text": [
978
+ "Successfully generated PMTiles file: cpad-stats.pmtiles\n"
979
+ ]
980
+ },
981
+ {
982
+ "data": {
983
+ "application/vnd.jupyter.widget-view+json": {
984
+ "model_id": "82d13b05559b4f32a947f75fb2391e6a",
985
+ "version_major": 2,
986
+ "version_minor": 0
987
+ },
988
+ "text/plain": [
989
+ "cpad-stats.pmtiles: 0%| | 0.00/95.3M [00:00<?, ?B/s]"
990
+ ]
991
+ },
992
+ "metadata": {},
993
+ "output_type": "display_data"
994
+ },
995
+ {
996
+ "data": {
997
+ "application/vnd.jupyter.widget-view+json": {
998
+ "model_id": "61b3432b09064da4a46762eb514face4",
999
+ "version_major": 2,
1000
+ "version_minor": 0
1001
+ },
1002
+ "text/plain": [
1003
+ "cpad-stats.parquet: 0%| | 0.00/204M [00:00<?, ?B/s]"
1004
+ ]
1005
+ },
1006
+ "metadata": {},
1007
+ "output_type": "display_data"
1008
+ }
1009
+ ],
1010
+ "source": [
1011
+ "gdf = ca.execute().set_crs(\"EPSG:4326\")\n",
1012
+ "gdf.to_file(\"cpad-stats.geojson\")\n",
1013
+ "\n",
1014
+ "generate_pmtiles(\"cpad-stats.geojson\", \"cpad-stats.pmtiles\")\n",
1015
+ "hf_upload(\"cpad-stats.pmtiles\", \"boettiger-lab/ca-30x30\")\n",
1016
+ "\n",
1017
+ "gdf.to_parquet(\"cpad-stats.parquet\")\n",
1018
+ "hf_upload(\"cpad-stats.parquet\", \"boettiger-lab/ca-30x30\")\n",
1019
+ "\n"
1020
+ ]
1021
+ },
1022
+ {
1023
+ "cell_type": "code",
1024
+ "execution_count": null,
1025
+ "id": "bf53bfe6-2175-4aa8-b3cb-be2c1db86b1b",
1026
+ "metadata": {},
1027
+ "outputs": [],
1028
+ "source": []
1029
+ }
1030
+ ],
1031
+ "metadata": {
1032
+ "kernelspec": {
1033
+ "display_name": "Python 3 (ipykernel)",
1034
+ "language": "python",
1035
+ "name": "python3"
1036
+ },
1037
+ "language_info": {
1038
+ "codemirror_mode": {
1039
+ "name": "ipython",
1040
+ "version": 3
1041
+ },
1042
+ "file_extension": ".py",
1043
+ "mimetype": "text/x-python",
1044
+ "name": "python",
1045
+ "nbconvert_exporter": "python",
1046
+ "pygments_lexer": "ipython3",
1047
+ "version": "3.12.7"
1048
+ }
1049
+ },
1050
+ "nbformat": 4,
1051
+ "nbformat_minor": 5
1052
+ }