File size: 5,971 Bytes
d2c79b3 c2b1848 4590b2a c2b1848 d2c79b3 81d856c d2c79b3 0c88eb4 d2c79b3 0c88eb4 d2c79b3 0c88eb4 d2c79b3 81d856c 0c88eb4 d2c79b3 0c88eb4 d2c79b3 c2b1848 d2c79b3 0c88eb4 d2c79b3 0c88eb4 d2c79b3 0c88eb4 d2c79b3 81d856c 0c88eb4 d2c79b3 81d856c d2c79b3 d48b10c 0c88eb4 d48b10c d2c79b3 0c88eb4 d2c79b3 d48b10c 0c88eb4 d2c79b3 81d856c 0c88eb4 81d856c d48b10c 81d856c d48b10c 81d856c 0c88eb4 d2c79b3 81d856c 0c88eb4 81d856c 0c88eb4 81d856c d2c79b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
{
"cells": [
{
"cell_type": "markdown",
"id": "4b4adc2a-bf0c-4ace-87be-dbaf90be0125",
"metadata": {},
"source": [
"# Pre-processing"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"import ibis\n",
"from ibis import _\n",
"\n",
"conn = ibis.duckdb.connect(\"tmp3\", extensions=[\"spatial\"])\n",
"# ca_parquet = \"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\"\n",
"# or use local copy:\n",
"ca_parquet = \"ca_areas.parquet\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a3d4f189-1563-4868-9f1f-64d67569df27",
"metadata": {},
"outputs": [],
"source": [
"# negative buffer to account for overlapping boundaries. \n",
"buffer = -30 #30m buffer \n",
"\n",
"tbl = (\n",
" conn.read_parquet(ca_parquet)\n",
" .cast({\"SHAPE\": \"geometry\"})\n",
" .rename(geom = \"SHAPE\")\n",
" .filter(_.reGAP < 3) # only gap 1 and 2 count towards 30x30\n",
")\n",
"\n",
"# polygons with release_year 2024 are a superset of release_year 2023. \n",
"# use anti_join to isolate the objects that are in release_year 2024 but not release_year 2023 (aka newly established). \n",
"tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer)) \n",
"tbl_2024 = tbl.filter(_.Release_Year == 2024)\n",
"intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a59c976b-3c36-40f9-a15b-cefcd155c647",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"new2024 = intersects.select(\"OBJECTID\").mutate(established = 2024) # saving IDs to join on\n",
"\n",
"ca = (conn\n",
" .read_parquet(ca_parquet)\n",
" .cast({\"SHAPE\": \"geometry\"})\n",
" .mutate(area = _.SHAPE.area())\n",
" .filter(_.Release_Year == 2024) # having both 2023 and 2024 is redudant since 2024 is the superset.\n",
" .left_join(new2024, \"OBJECTID\") # newly established 2024 polygons \n",
" .mutate(established=_.established.fill_null(2023)) \n",
" .mutate(geom = _.SHAPE.convert(\"epsg:3310\",\"epsg:4326\"))\n",
" .rename(name = \"cpad_PARK_NAME\", access_type = \"cpad_ACCESS_TYP\", manager = \"cpad_MNG_AGENCY\",\n",
" manager_type = \"cpad_MNG_AG_LEV\", id = \"OBJECTID\", type = \"TYPE\")\n",
" .mutate(manager = _.manager.substitute({\"\": \"Unknown\"})) \n",
" .mutate(manager_type = _.manager_type.substitute({\"\": \"Unknown\"}))\n",
" .mutate(access_type = _.access_type.substitute({\"\": \"Unknown Access\"}))\n",
" .mutate(name = _.name.substitute({\"\": \"Unknown\"}))\n",
" .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,\n",
" _.Easement, _.Acres, _.id, _.type, _.geom)\n",
" )\n",
"\n",
"ca2024 = ca.execute()\n",
"ca2024.to_parquet(\"ca2024-30m.parquet\")\n",
"ca2024.to_file(\"ca2024-30m.geojson\") # tippecanoe can't parse geoparquet :-("
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cfac7aa4-e418-4d7c-91e0-04ff8eae804c",
"metadata": {},
"outputs": [],
"source": [
"## Upload to Huggingface\n",
"# https://huggingface.co/datasets/boettiger-lab/ca-30x30/\n",
"\n",
"from huggingface_hub import HfApi, login\n",
"import streamlit as st\n",
"login(st.secrets[\"HF_TOKEN\"])\n",
"api = HfApi()\n",
"\n",
"def hf_upload(file):\n",
" info = api.upload_file(\n",
" path_or_fileobj=file,\n",
" path_in_repo=file,\n",
" repo_id=\"boettiger-lab/ca-30x30\",\n",
" repo_type=\"dataset\",\n",
" )\n",
" \n",
"hf_upload(\"ca2024-30m.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2df80e1d-6b94-4884-b9f5-d9c23d3ea028",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"import subprocess\n",
"import os\n",
"\n",
"def generate_pmtiles(input_file, output_file, max_zoom=12):\n",
" # Ensure Tippecanoe is installed\n",
" if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n",
" raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n",
"\n",
" # Construct the Tippecanoe command\n",
" command = [\n",
" \"tippecanoe\",\n",
" \"-o\", output_file,\n",
" \"-z\", str(max_zoom),\n",
" \"--drop-densest-as-needed\",\n",
" \"--extend-zooms-if-still-dropping\",\n",
" \"--force\",\n",
" input_file\n",
" ]\n",
"\n",
" # Run Tippecanoe\n",
" try:\n",
" subprocess.run(command, check=True)\n",
" print(f\"Successfully generated PMTiles file: {output_file}\")\n",
" except subprocess.CalledProcessError as e:\n",
" print(f\"Error running Tippecanoe: {e}\")\n",
"\n",
"generate_pmtiles(\"ca2024-30m.geojson\", \"ca2024-30m-tippe.pmtiles\")\n",
"hf_upload(\"ca2024-30m-tippe.pmtiles\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|