cboettig commited on
Commit
81d856c
·
1 Parent(s): d48b10c

tippecanoe

Browse files
Files changed (1) hide show
  1. preprocess.ipynb +70 -218
preprocess.ipynb CHANGED
@@ -10,7 +10,7 @@
10
  },
11
  {
12
  "cell_type": "code",
13
- "execution_count": 1,
14
  "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
15
  "metadata": {},
16
  "outputs": [],
@@ -26,7 +26,7 @@
26
  },
27
  {
28
  "cell_type": "code",
29
- "execution_count": 2,
30
  "id": "a0cb34b1-8d70-49bf-80c6-244ecc8ddf84",
31
  "metadata": {},
32
  "outputs": [],
@@ -48,7 +48,7 @@
48
  },
49
  {
50
  "cell_type": "code",
51
- "execution_count": 3,
52
  "id": "a0b75637-e015-4be4-86e1-c9757ac43d0f",
53
  "metadata": {},
54
  "outputs": [],
@@ -80,33 +80,10 @@
80
  },
81
  {
82
  "cell_type": "code",
83
- "execution_count": 4,
84
  "id": "275c171a-f82f-4ee8-991c-1e34eb83a33d",
85
  "metadata": {},
86
- "outputs": [
87
- {
88
- "data": {
89
- "application/vnd.jupyter.widget-view+json": {
90
- "model_id": "f1616caa5ca54678a00caa974721de2b",
91
- "version_major": 2,
92
- "version_minor": 0
93
- },
94
- "text/plain": [
95
- "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
96
- ]
97
- },
98
- "metadata": {},
99
- "output_type": "display_data"
100
- },
101
- {
102
- "name": "stdout",
103
- "output_type": "stream",
104
- "text": [
105
- "CPU times: user 55min 28s, sys: 2.94 s, total: 55min 31s\n",
106
- "Wall time: 22min 6s\n"
107
- ]
108
- }
109
- ],
110
  "source": [
111
  "%%time\n",
112
  "\n",
@@ -128,40 +105,29 @@
128
  " )\n",
129
  "ca2024 = ca.execute()\n",
130
  "\n",
131
- "ca2024.to_parquet(\"ca2024.parquet\")"
 
 
 
132
  ]
133
  },
134
  {
135
  "cell_type": "code",
136
- "execution_count": 1,
 
 
 
 
 
 
 
 
 
 
 
137
  "id": "cfac7aa4-e418-4d7c-91e0-04ff8eae804c",
138
  "metadata": {},
139
- "outputs": [
140
- {
141
- "name": "stdout",
142
- "output_type": "stream",
143
- "text": [
144
- "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
145
- "Token is valid (permission: write).\n",
146
- "Your token has been saved to /home/jovyan/.cache/huggingface/token\n",
147
- "Login successful\n"
148
- ]
149
- },
150
- {
151
- "data": {
152
- "application/vnd.jupyter.widget-view+json": {
153
- "model_id": "9e680dc3991c4c9b808447f04d056f53",
154
- "version_major": 2,
155
- "version_minor": 0
156
- },
157
- "text/plain": [
158
- "ca2024.parquet: 0%| | 0.00/137M [00:00<?, ?B/s]"
159
- ]
160
- },
161
- "metadata": {},
162
- "output_type": "display_data"
163
- }
164
- ],
165
  "source": [
166
  "## Upload to Huggingface\n",
167
  "# https://huggingface.co/datasets/boettiger-lab/ca-30x30/\n",
@@ -170,12 +136,15 @@
170
  "import streamlit as st\n",
171
  "login(st.secrets[\"HF_TOKEN\"])\n",
172
  "api = HfApi()\n",
173
- "info = api.upload_file(\n",
174
- " path_or_fileobj=\"ca2024.parquet\",\n",
175
- " path_in_repo=\"ca2024.parquet\",\n",
176
- " repo_id=\"boettiger-lab/ca-30x30\",\n",
177
- " repo_type=\"dataset\",\n",
178
- " )\n"
 
 
 
179
  ]
180
  },
181
  {
@@ -205,164 +174,10 @@
205
  },
206
  {
207
  "cell_type": "code",
208
- "execution_count": 6,
209
  "id": "6f3df8c1-a603-4dd5-be84-8deaae928d0a",
210
  "metadata": {},
211
- "outputs": [
212
- {
213
- "data": {
214
- "text/html": [
215
- "<div>\n",
216
- "<style scoped>\n",
217
- " .dataframe tbody tr th:only-of-type {\n",
218
- " vertical-align: middle;\n",
219
- " }\n",
220
- "\n",
221
- " .dataframe tbody tr th {\n",
222
- " vertical-align: top;\n",
223
- " }\n",
224
- "\n",
225
- " .dataframe thead th {\n",
226
- " text-align: right;\n",
227
- " }\n",
228
- "</style>\n",
229
- "<table border=\"1\" class=\"dataframe\">\n",
230
- " <thead>\n",
231
- " <tr style=\"text-align: right;\">\n",
232
- " <th></th>\n",
233
- " <th>manager</th>\n",
234
- " <th>manager_type</th>\n",
235
- " <th>area</th>\n",
236
- " </tr>\n",
237
- " </thead>\n",
238
- " <tbody>\n",
239
- " <tr>\n",
240
- " <th>0</th>\n",
241
- " <td>California Department of Fish and Wildlife</td>\n",
242
- " <td>State</td>\n",
243
- " <td>54853.556568</td>\n",
244
- " </tr>\n",
245
- " <tr>\n",
246
- " <th>1</th>\n",
247
- " <td>California Department of Parks and Recreation</td>\n",
248
- " <td>State</td>\n",
249
- " <td>21439.451269</td>\n",
250
- " </tr>\n",
251
- " <tr>\n",
252
- " <th>2</th>\n",
253
- " <td>California Tahoe Conservancy</td>\n",
254
- " <td>State</td>\n",
255
- " <td>6119.753048</td>\n",
256
- " </tr>\n",
257
- " <tr>\n",
258
- " <th>3</th>\n",
259
- " <td>California Department of Water Resources</td>\n",
260
- " <td>State</td>\n",
261
- " <td>4033.217739</td>\n",
262
- " </tr>\n",
263
- " <tr>\n",
264
- " <th>4</th>\n",
265
- " <td>California State University Sonoma</td>\n",
266
- " <td>State</td>\n",
267
- " <td>3842.054169</td>\n",
268
- " </tr>\n",
269
- " <tr>\n",
270
- " <th>5</th>\n",
271
- " <td>University of California</td>\n",
272
- " <td>State</td>\n",
273
- " <td>2050.549176</td>\n",
274
- " </tr>\n",
275
- " <tr>\n",
276
- " <th>6</th>\n",
277
- " <td>California Department of Forestry and Fire Pro...</td>\n",
278
- " <td>State</td>\n",
279
- " <td>1212.712394</td>\n",
280
- " </tr>\n",
281
- " <tr>\n",
282
- " <th>7</th>\n",
283
- " <td>Coachella Valley Mountains Conservancy</td>\n",
284
- " <td>State</td>\n",
285
- " <td>167.224090</td>\n",
286
- " </tr>\n",
287
- " <tr>\n",
288
- " <th>8</th>\n",
289
- " <td>California State Lands Commission</td>\n",
290
- " <td>State</td>\n",
291
- " <td>113.344073</td>\n",
292
- " </tr>\n",
293
- " <tr>\n",
294
- " <th>9</th>\n",
295
- " <td>California State Coastal Conservancy</td>\n",
296
- " <td>State</td>\n",
297
- " <td>97.314705</td>\n",
298
- " </tr>\n",
299
- " <tr>\n",
300
- " <th>10</th>\n",
301
- " <td>Other State</td>\n",
302
- " <td>State</td>\n",
303
- " <td>55.542241</td>\n",
304
- " </tr>\n",
305
- " <tr>\n",
306
- " <th>11</th>\n",
307
- " <td>California Department of Transportation</td>\n",
308
- " <td>State</td>\n",
309
- " <td>1.957490</td>\n",
310
- " </tr>\n",
311
- " <tr>\n",
312
- " <th>12</th>\n",
313
- " <td>San Joaquin River Conservancy</td>\n",
314
- " <td>State</td>\n",
315
- " <td>1.531470</td>\n",
316
- " </tr>\n",
317
- " <tr>\n",
318
- " <th>13</th>\n",
319
- " <td>California State University</td>\n",
320
- " <td>State</td>\n",
321
- " <td>0.021589</td>\n",
322
- " </tr>\n",
323
- " </tbody>\n",
324
- "</table>\n",
325
- "</div>"
326
- ],
327
- "text/plain": [
328
- " manager manager_type \\\n",
329
- "0 California Department of Fish and Wildlife State \n",
330
- "1 California Department of Parks and Recreation State \n",
331
- "2 California Tahoe Conservancy State \n",
332
- "3 California Department of Water Resources State \n",
333
- "4 California State University Sonoma State \n",
334
- "5 University of California State \n",
335
- "6 California Department of Forestry and Fire Pro... State \n",
336
- "7 Coachella Valley Mountains Conservancy State \n",
337
- "8 California State Lands Commission State \n",
338
- "9 California State Coastal Conservancy State \n",
339
- "10 Other State State \n",
340
- "11 California Department of Transportation State \n",
341
- "12 San Joaquin River Conservancy State \n",
342
- "13 California State University State \n",
343
- "\n",
344
- " area \n",
345
- "0 54853.556568 \n",
346
- "1 21439.451269 \n",
347
- "2 6119.753048 \n",
348
- "3 4033.217739 \n",
349
- "4 3842.054169 \n",
350
- "5 2050.549176 \n",
351
- "6 1212.712394 \n",
352
- "7 167.224090 \n",
353
- "8 113.344073 \n",
354
- "9 97.314705 \n",
355
- "10 55.542241 \n",
356
- "11 1.957490 \n",
357
- "12 1.531470 \n",
358
- "13 0.021589 "
359
- ]
360
- },
361
- "execution_count": 6,
362
- "metadata": {},
363
- "output_type": "execute_result"
364
- }
365
- ],
366
  "source": [
367
  "# compute some summary tables:\n",
368
  "\n",
@@ -400,6 +215,43 @@
400
  "m.to_html(\"ca2024.html\")\n",
401
  "m"
402
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  }
404
  ],
405
  "metadata": {
 
10
  },
11
  {
12
  "cell_type": "code",
13
+ "execution_count": null,
14
  "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
15
  "metadata": {},
16
  "outputs": [],
 
26
  },
27
  {
28
  "cell_type": "code",
29
+ "execution_count": null,
30
  "id": "a0cb34b1-8d70-49bf-80c6-244ecc8ddf84",
31
  "metadata": {},
32
  "outputs": [],
 
48
  },
49
  {
50
  "cell_type": "code",
51
+ "execution_count": null,
52
  "id": "a0b75637-e015-4be4-86e1-c9757ac43d0f",
53
  "metadata": {},
54
  "outputs": [],
 
80
  },
81
  {
82
  "cell_type": "code",
83
+ "execution_count": null,
84
  "id": "275c171a-f82f-4ee8-991c-1e34eb83a33d",
85
  "metadata": {},
86
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  "source": [
88
  "%%time\n",
89
  "\n",
 
105
  " )\n",
106
  "ca2024 = ca.execute()\n",
107
  "\n",
108
+ "\n",
109
+ "\n",
110
+ "ca2024.to_parquet(\"ca2024.parquet\")\n",
111
+ "\n"
112
  ]
113
  },
114
  {
115
  "cell_type": "code",
116
+ "execution_count": 2,
117
+ "id": "8259b450-2152-472c-a58c-50ce0d68d78f",
118
+ "metadata": {},
119
+ "outputs": [],
120
+ "source": [
121
+ "ca2024 = conn.read_parquet(\"ca2024.parquet\")\n",
122
+ "ca2024.execute().to_file(\"ca2024.geojson\") # tippecanoe can't parse geoparquet :-("
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": null,
128
  "id": "cfac7aa4-e418-4d7c-91e0-04ff8eae804c",
129
  "metadata": {},
130
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  "source": [
132
  "## Upload to Huggingface\n",
133
  "# https://huggingface.co/datasets/boettiger-lab/ca-30x30/\n",
 
136
  "import streamlit as st\n",
137
  "login(st.secrets[\"HF_TOKEN\"])\n",
138
  "api = HfApi()\n",
139
+ "\n",
140
+ "def hf_upload(file):\n",
141
+ " info = api.upload_file(\n",
142
+ " path_or_fileobj=file,\n",
143
+ " path_in_repo=file,\n",
144
+ " repo_id=\"boettiger-lab/ca-30x30\",\n",
145
+ " repo_type=\"dataset\",\n",
146
+ " )\n",
147
+ "hf_upload(\"ca2024.parquet\")"
148
  ]
149
  },
150
  {
 
174
  },
175
  {
176
  "cell_type": "code",
177
+ "execution_count": null,
178
  "id": "6f3df8c1-a603-4dd5-be84-8deaae928d0a",
179
  "metadata": {},
180
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  "source": [
182
  "# compute some summary tables:\n",
183
  "\n",
 
215
  "m.to_html(\"ca2024.html\")\n",
216
  "m"
217
  ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": null,
222
+ "id": "2df80e1d-6b94-4884-b9f5-d9c23d3ea028",
223
+ "metadata": {},
224
+ "outputs": [],
225
+ "source": [
226
+ "import subprocess\n",
227
+ "import os\n",
228
+ "\n",
229
+ "def generate_pmtiles(input_file, output_file, max_zoom=12):\n",
230
+ " # Ensure Tippecanoe is installed\n",
231
+ " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n",
232
+ " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n",
233
+ "\n",
234
+ " # Construct the Tippecanoe command\n",
235
+ " command = [\n",
236
+ " \"tippecanoe\",\n",
237
+ " \"-o\", output_file,\n",
238
+ " \"-z\", str(max_zoom),\n",
239
+ " \"--drop-densest-as-needed\",\n",
240
+ " \"--extend-zooms-if-still-dropping\",\n",
241
+ " \"--force\",\n",
242
+ " input_file\n",
243
+ " ]\n",
244
+ "\n",
245
+ " # Run Tippecanoe\n",
246
+ " try:\n",
247
+ " subprocess.run(command, check=True)\n",
248
+ " print(f\"Successfully generated PMTiles file: {output_file}\")\n",
249
+ " except subprocess.CalledProcessError as e:\n",
250
+ " print(f\"Error running Tippecanoe: {e}\")\n",
251
+ "\n",
252
+ "generate_pmtiles(\"ca2024.geojson\", \"ca2024-tippe.pmtiles\")\n",
253
+ "hf_upload(\"ca2024-tippe.pmtiles\")"
254
+ ]
255
  }
256
  ],
257
  "metadata": {