{ "cells": [ { "cell_type": "markdown", "id": "4b4adc2a-bf0c-4ace-87be-dbaf90be0125", "metadata": {}, "source": [ "# Pre-processing" ] }, { "cell_type": "code", "execution_count": 1, "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f", "metadata": {}, "outputs": [], "source": [ "import ibis\n", "from ibis import _\n", "\n", "conn = ibis.duckdb.connect(\"tmp\", extensions=[\"spatial\"])\n", "ca_parquet = \"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\"\n", "# or use local copy:\n", "ca_parquet = \"/home/rstudio/source.coop/cboettig/ca30x30/ca_areas.parquet\"\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "a0cb34b1-8d70-49bf-80c6-244ecc8ddf84", "metadata": {}, "outputs": [], "source": [ "buffer = -2\n", "\n", "tbl = (\n", " conn.read_parquet(ca_parquet)\n", " .cast({\"SHAPE\": \"geometry\"})\n", " .rename(geom = \"SHAPE\")\n", "# .filter(_.UNIT_NAME == \"Angeles National Forest\")\n", " .filter(_.reGAP < 3) \n", ")\n", "tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer))\n", "tbl_2024 = tbl.filter(_.Release_Year == 2024)\n", "intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))\n", "\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "a0b75637-e015-4be4-86e1-c9757ac43d0f", "metadata": {}, "outputs": [], "source": [ "## Testing, run only on subset data\n", "if False:\n", " gdf = intersects.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n", " gdf_2023 = tbl_2023.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n", " gdf_2024 = tbl_2024.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n", " # gdf = ca2024\n", " established = {'property': 'established',\n", " 'type': 'categorical',\n", " 'stops': [\n", " [2023, \"#26542C80\"], \n", " [2024, \"#F3AB3D80\"]]\n", " }\n", " inter = {\"fill-color\": \"#F3AB3D\"}\n", " p2024 = {\"fill-color\": \"#26542C\"}\n", " p2023 = {\"fill-color\": \"#8B0A1A\"}\n", " \n", " m = leafmap.Map(style=\"positron\")\n", " m.add_gdf(gdf_2024,layer_type=\"fill\", name = \"2024\", paint = p2024)\n", " m.add_gdf(gdf_2023,layer_type=\"fill\", name = \"2023\", paint = p2023)\n", " m.add_gdf(gdf,layer_type=\"fill\", name = \"intersects\", paint = inter)\n", " \n", " m.add_layer_control()\n", " m" ] }, { "cell_type": "code", "execution_count": 4, "id": "275c171a-f82f-4ee8-991c-1e34eb83a33d", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f1616caa5ca54678a00caa974721de2b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 55min 28s, sys: 2.94 s, total: 55min 31s\n", "Wall time: 22min 6s\n" ] } ], "source": [ "%%time\n", "\n", "new2024 = intersects.select(\"OBJECTID\").mutate(established = 2024)\n", "\n", "ca = (conn\n", " .read_parquet(ca_parquet)\n", " .cast({\"SHAPE\": \"geometry\"})\n", " .mutate(area = _.SHAPE.area())\n", " .filter(_.Release_Year == 2024)\n", " .filter(_.reGAP < 3)\n", " .left_join(new2024, \"OBJECTID\")\n", " .mutate(established=_.established.fill_null(2023))\n", " .mutate(geom = _.SHAPE.convert(\"epsg:3310\",\"epsg:4326\"))\n", " .rename(name = \"cpad_PARK_NAME\", access_type = \"cpad_ACCESS_TYP\", manager = \"cpad_MNG_AGENCY\",\n", " manager_type = \"cpad_MNG_AG_LEV\", id = \"OBJECTID\", type = \"TYPE\")\n", " .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,\n", " _.Easement, _.Acres, _.id, _.type, _.geom)\n", " )\n", "ca2024 = ca.execute()\n", "\n", "ca2024.to_parquet(\"ca2024.parquet\")" ] }, { "cell_type": "code", "execution_count": 1, "id": "cfac7aa4-e418-4d7c-91e0-04ff8eae804c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n", "Token is valid (permission: write).\n", "Your token has been saved to /home/jovyan/.cache/huggingface/token\n", "Login successful\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9e680dc3991c4c9b808447f04d056f53", "version_major": 2, "version_minor": 0 }, "text/plain": [ "ca2024.parquet: 0%| | 0.00/137M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## Upload to Huggingface\n", "# https://huggingface.co/datasets/boettiger-lab/ca-30x30/\n", "\n", "from huggingface_hub import HfApi, login\n", "import streamlit as st\n", "login(st.secrets[\"HF_TOKEN\"])\n", "api = HfApi()\n", "info = api.upload_file(\n", " path_or_fileobj=\"ca2024.parquet\",\n", " path_in_repo=\"ca2024.parquet\",\n", " repo_id=\"boettiger-lab/ca-30x30\",\n", " repo_type=\"dataset\",\n", " )\n" ] }, { "cell_type": "markdown", "id": "cebd0ff5-8353-4b84-b9ee-182b74613554", "metadata": {}, "source": [ "# Testing & visualization\n", "\n", "`ca2024.parquet()` now contains all we need. The code below illustrates some quick examples of the kinds of visualizations and summaries we might want to compute with this data. \n" ] }, { "cell_type": "code", "execution_count": 1, "id": "55afe07c-8681-4308-bbb9-e460f7380f86", "metadata": {}, "outputs": [], "source": [ "import leafmap.maplibregl as leafmap\n", "import ibis\n", "from ibis import _\n", "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n", "\n", "ca2024 = conn.read_parquet(\"ca2024.parquet\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "6f3df8c1-a603-4dd5-be84-8deaae928d0a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | manager | \n", "manager_type | \n", "area | \n", "
---|---|---|---|
0 | \n", "California Department of Fish and Wildlife | \n", "State | \n", "54853.556568 | \n", "
1 | \n", "California Department of Parks and Recreation | \n", "State | \n", "21439.451269 | \n", "
2 | \n", "California Tahoe Conservancy | \n", "State | \n", "6119.753048 | \n", "
3 | \n", "California Department of Water Resources | \n", "State | \n", "4033.217739 | \n", "
4 | \n", "California State University Sonoma | \n", "State | \n", "3842.054169 | \n", "
5 | \n", "University of California | \n", "State | \n", "2050.549176 | \n", "
6 | \n", "California Department of Forestry and Fire Pro... | \n", "State | \n", "1212.712394 | \n", "
7 | \n", "Coachella Valley Mountains Conservancy | \n", "State | \n", "167.224090 | \n", "
8 | \n", "California State Lands Commission | \n", "State | \n", "113.344073 | \n", "
9 | \n", "California State Coastal Conservancy | \n", "State | \n", "97.314705 | \n", "
10 | \n", "Other State | \n", "State | \n", "55.542241 | \n", "
11 | \n", "California Department of Transportation | \n", "State | \n", "1.957490 | \n", "
12 | \n", "San Joaquin River Conservancy | \n", "State | \n", "1.531470 | \n", "
13 | \n", "California State University | \n", "State | \n", "0.021589 | \n", "