{ "cells": [ { "cell_type": "markdown", "id": "4b4adc2a-bf0c-4ace-87be-dbaf90be0125", "metadata": {}, "source": [ "# Pre-processing" ] }, { "cell_type": "code", "execution_count": null, "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f", "metadata": {}, "outputs": [], "source": [ "import ibis\n", "from ibis import _\n", "\n", "conn = ibis.duckdb.connect(\"tmp\", extensions=[\"spatial\"])\n", "ca_parquet = \"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\"\n", "# or use local copy:\n", "ca_parquet = \"/home/rstudio/source.coop/cboettig/ca30x30/ca_areas.parquet\"\n" ] }, { "cell_type": "code", "execution_count": null, "id": "a0cb34b1-8d70-49bf-80c6-244ecc8ddf84", "metadata": {}, "outputs": [], "source": [ "buffer = -2\n", "\n", "tbl = (\n", " conn.read_parquet(ca_parquet)\n", " .cast({\"SHAPE\": \"geometry\"})\n", " .rename(geom = \"SHAPE\")\n", "# .filter(_.UNIT_NAME == \"Angeles National Forest\")\n", " .filter(_.reGAP < 3) \n", ")\n", "tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer))\n", "tbl_2024 = tbl.filter(_.Release_Year == 2024)\n", "intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "a0b75637-e015-4be4-86e1-c9757ac43d0f", "metadata": {}, "outputs": [], "source": [ "## Testing, run only on subset data\n", "if False:\n", " gdf = intersects.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n", " gdf_2023 = tbl_2023.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n", " gdf_2024 = tbl_2024.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n", " # gdf = ca2024\n", " established = {'property': 'established',\n", " 'type': 'categorical',\n", " 'stops': [\n", " [2023, \"#26542C80\"], \n", " [2024, \"#F3AB3D80\"]]\n", " }\n", " inter = {\"fill-color\": \"#F3AB3D\"}\n", " p2024 = {\"fill-color\": \"#26542C\"}\n", " p2023 = {\"fill-color\": \"#8B0A1A\"}\n", " \n", " m = leafmap.Map(style=\"positron\")\n", " m.add_gdf(gdf_2024,layer_type=\"fill\", name = \"2024\", paint = p2024)\n", " m.add_gdf(gdf_2023,layer_type=\"fill\", name = \"2023\", paint = p2023)\n", " m.add_gdf(gdf,layer_type=\"fill\", name = \"intersects\", paint = inter)\n", " \n", " m.add_layer_control()\n", " m" ] }, { "cell_type": "code", "execution_count": null, "id": "275c171a-f82f-4ee8-991c-1e34eb83a33d", "metadata": {}, "outputs": [], "source": [ "%%time\n", "\n", "new2024 = intersects.select(\"OBJECTID\").mutate(established = 2024)\n", "\n", "ca = (conn\n", " .read_parquet(ca_parquet)\n", " .cast({\"SHAPE\": \"geometry\"})\n", " .mutate(area = _.SHAPE.area())\n", " .filter(_.Release_Year == 2024)\n", " .filter(_.reGAP < 3)\n", " .left_join(new2024, \"OBJECTID\")\n", " .mutate(established=_.established.fill_null(2023))\n", " .mutate(geom = _.SHAPE.convert(\"epsg:3310\",\"epsg:4326\"))\n", " .rename(name = \"cpad_PARK_NAME\", access_type = \"cpad_ACCESS_TYP\", manager = \"cpad_MNG_AGENCY\",\n", " manager_type = \"cpad_MNG_AG_LEV\", id = \"OBJECTID\", type = \"TYPE\")\n", " .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,\n", " _.Easement, _.Acres, _.id, _.type, _.geom)\n", " )\n", "ca2024 = ca.execute()\n", "\n", "\n", "\n", "ca2024.to_parquet(\"ca2024.parquet\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "8259b450-2152-472c-a58c-50ce0d68d78f", "metadata": {}, "outputs": [], "source": [ "ca2024 = conn.read_parquet(\"ca2024.parquet\")\n", "ca2024.execute().to_file(\"ca2024.geojson\") # tippecanoe can't parse geoparquet :-(" ] }, { "cell_type": "code", "execution_count": null, "id": "cfac7aa4-e418-4d7c-91e0-04ff8eae804c", "metadata": {}, "outputs": [], "source": [ "## Upload to Huggingface\n", "# https://huggingface.co/datasets/boettiger-lab/ca-30x30/\n", "\n", "from huggingface_hub import HfApi, login\n", "import streamlit as st\n", "login(st.secrets[\"HF_TOKEN\"])\n", "api = HfApi()\n", "\n", "def hf_upload(file):\n", " info = api.upload_file(\n", " path_or_fileobj=file,\n", " path_in_repo=file,\n", " repo_id=\"boettiger-lab/ca-30x30\",\n", " repo_type=\"dataset\",\n", " )\n", "hf_upload(\"ca2024.parquet\")" ] }, { "cell_type": "markdown", "id": "cebd0ff5-8353-4b84-b9ee-182b74613554", "metadata": {}, "source": [ "# Testing & visualization\n", "\n", "`ca2024.parquet()` now contains all we need. The code below illustrates some quick examples of the kinds of visualizations and summaries we might want to compute with this data. \n" ] }, { "cell_type": "code", "execution_count": 1, "id": "55afe07c-8681-4308-bbb9-e460f7380f86", "metadata": {}, "outputs": [], "source": [ "import leafmap.maplibregl as leafmap\n", "import ibis\n", "from ibis import _\n", "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n", "\n", "ca2024 = conn.read_parquet(\"ca2024.parquet\")" ] }, { "cell_type": "code", "execution_count": null, "id": "6f3df8c1-a603-4dd5-be84-8deaae928d0a", "metadata": {}, "outputs": [], "source": [ "# compute some summary tables:\n", "\n", "(ca2024\n", " .filter(_.established == 2024)\n", " .filter(_.manager_type == \"State\")\n", " .group_by(_.manager, _.manager_type)\n", " .agg(area = _.Acres.sum())\n", " .order_by(_.area.desc())\n", " .execute()\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "c62854f6-1456-4207-8c69-53af17970102", "metadata": {}, "outputs": [], "source": [ "\n", "gdf = ca2024.execute()\n", "established = {'property': 'established',\n", " 'type': 'categorical',\n", " 'stops': [\n", " [2023, \"#26542C80\"], \n", " [2024, \"#F3AB3D80\"]]}\n", "paint = {\"fill-color\": established}\n", "\n", "\n", "m = leafmap.Map(style=\"positron\")\n", "m.add_gdf(gdf,layer_type=\"fill\", name = \"intersects\", paint = paint)\n", "\n", "m.add_layer_control()\n", "m.to_html(\"ca2024.html\")\n", "m" ] }, { "cell_type": "code", "execution_count": null, "id": "2df80e1d-6b94-4884-b9f5-d9c23d3ea028", "metadata": {}, "outputs": [], "source": [ "import subprocess\n", "import os\n", "\n", "def generate_pmtiles(input_file, output_file, max_zoom=12):\n", " # Ensure Tippecanoe is installed\n", " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n", " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n", "\n", " # Construct the Tippecanoe command\n", " command = [\n", " \"tippecanoe\",\n", " \"-o\", output_file,\n", " \"-z\", str(max_zoom),\n", " \"--drop-densest-as-needed\",\n", " \"--extend-zooms-if-still-dropping\",\n", " \"--force\",\n", " input_file\n", " ]\n", "\n", " # Run Tippecanoe\n", " try:\n", " subprocess.run(command, check=True)\n", " print(f\"Successfully generated PMTiles file: {output_file}\")\n", " except subprocess.CalledProcessError as e:\n", " print(f\"Error running Tippecanoe: {e}\")\n", "\n", "generate_pmtiles(\"ca2024.geojson\", \"ca2024-tippe.pmtiles\")\n", "hf_upload(\"ca2024-tippe.pmtiles\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" } }, "nbformat": 4, "nbformat_minor": 5 }