{ "cells": [ { "cell_type": "code", "execution_count": 21, "id": "e84709ab-1b47-49ee-8cbd-8aa69744b6c3", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c4382edb57b643e6907b0314c79387bd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b4ae3a5cd5f84c5b86fdcd767d330adf", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
DatabaseTable: tmp2.main.t2\n", " gid int64\n", " cpad_ACCESS_TYP string\n", " cpad_PARK_NAME string\n", " cpad_MNG_AGENCY string\n", " cpad_MNG_AG_LEV string\n", " reGAP int16\n", " Easement int16\n", " TYPE string\n", " CA_County_Name string\n", " CA_Region_Name string\n", " TerrMar string\n", " CA_Ecoregion_Name string\n", " ACCESS_TYP string\n", " MNG_AGNCY string\n", " MNG_AG_LEV string\n", " UNIT_NAME string\n", " DefaultSelection string\n", " CA_Ecoregion_Acres float32\n", " CA_Region_Acres float32\n", " CA_County_Acres float32\n", " Acres float32\n", " CA_Marine_Acres float32\n", " Release_Year int16\n", " mgmt_stack string\n", " geom geospatial:geometry\n", " SHAPE_bbox xmin: float32\n", " ymin: float32\n", " xmax: float32\n", " ymax: float32\n", "\n" ], "text/plain": [ "DatabaseTable: tmp2.main.t2\n", " gid int64\n", " cpad_ACCESS_TYP string\n", " cpad_PARK_NAME string\n", " cpad_MNG_AGENCY string\n", " cpad_MNG_AG_LEV string\n", " reGAP int16\n", " Easement int16\n", " TYPE string\n", " CA_County_Name string\n", " CA_Region_Name string\n", " TerrMar string\n", " CA_Ecoregion_Name string\n", " ACCESS_TYP string\n", " MNG_AGNCY string\n", " MNG_AG_LEV string\n", " UNIT_NAME string\n", " DefaultSelection string\n", " CA_Ecoregion_Acres float32\n", " CA_Region_Acres float32\n", " CA_County_Acres float32\n", " Acres float32\n", " CA_Marine_Acres float32\n", " Release_Year int16\n", " mgmt_stack string\n", " geom geospatial:geometry\n", " SHAPE_bbox xmin: float32\n", " ymin: float32\n", " xmax: float32\n", " ymax: float32" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import ibis\n", "from ibis import _\n", "conn = ibis.duckdb.connect(\"tmp2\", extensions=[\"spatial\"])\n", "\n", "tbl = (\n", " conn.read_parquet(\"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\")\n", " .cast({\"SHAPE\": \"geometry\"})\n", " .rename(geom = \"SHAPE\", gid = \"OBJECTID\")\n", " # .filter(_.UNIT_NAME == \"Angeles National Forest\")\n", " .filter(_.reGAP < 3) \n", ")\n", "conn.create_table(\"t1\", tbl.filter(_.Release_Year == 2024), overwrite = True)\n", "conn.create_table(\"t2\", tbl.filter(_.Release_Year == 2023), overwrite = True)\n", "\n" ] }, { "cell_type": "code", "execution_count": 23, "id": "303792ac-9b1d-41b2-a17b-5cf855d70633", "metadata": {}, "outputs": [], "source": [ "ca2024 = conn.table(\"t1\").execute()\n", "ca2023 = conn.table(\"t2\").execute()\n", "\n", "import leafmap.maplibregl as leafmap\n", "m = leafmap.Map()\n", "m.add_gdf(ca2024, name = \"2024\")\n", "m.add_gdf(ca2023, name =\"2023\")\n", "\n", "m" ] }, { "cell_type": "code", "execution_count": null, "id": "9fdd2ed2-623f-479c-b0b7-7c723f3f6728", "metadata": {}, "outputs": [], "source": [ "\n", "conn.disconnect()" ] }, { "cell_type": "code", "execution_count": 2, "id": "121c3cad-680c-4f3a-9075-638711ea1634", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c69a096d24974e9ea8ad3d5b937b723a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 22min 10s, sys: 43 s, total: 22min 53s\n", "Wall time: 11min 47s\n" ] } ], "source": [ "%%time\n", "\n", "## RUN this on a machine with a whole lot of RAM. consider filtering federal/non-federal first.\n", "import duckdb\n", "db = duckdb.connect(\"tmp2\")\n", "db.install_extension(\"spatial\")\n", "db.load_extension(\"spatial\")\n", "\n", "db.sql('''\n", "CREATE OR REPLACE TABLE diff AS (\n", "with temp as \n", "(\n", " select b.gid, st_union_agg(a.geom) as geom\n", " from t1 b join t2 a on st_intersects(a.geom, b.geom)\n", " group by b.gid\n", ") \n", "select st_difference(b.geom,coalesce(t.geom, 'GEOMETRYCOLLECTION EMPTY'::geometry)) as geom\n", "from t1 b left join temp t on b.gid = t.gid\n", ")\n", "''')\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "cf3d3e5b-2ff1-4ef9-a147-01f15e970e49", "metadata": {}, "outputs": [], "source": [ "## Cannot go straight to geoparquet due to M geometries!\n", "#db.table(\"diff\").to_parquet(\"diff.parquet\")\n", "\n", "## This doesn't work either: \n", "#db.sql('''CREATE OR REPLACE TABLE diff2024 AS SELECT *, st_force2d(geom) AS geom FROM diff''')\n", "\n", "## We could cast geom as blob...." ] }, { "cell_type": "code", "execution_count": 10, "id": "308dc665-1323-4e9b-bc2d-69201c325c4b", "metadata": {}, "outputs": [], "source": [ "# F*ck it. Let's do it all in RAM via geopandas, which drops M geoms due to a fortunate bug! \n", "conn = ibis.duckdb.connect(\"tmp2\", extensions=[\"spatial\"])\n", "gdf = conn.table(\"diff\").mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n", "gdf.to_parquet(\"ca2024_diffs.parquet\")\n" ] }, { "cell_type": "code", "execution_count": 19, "id": "063a11d8-15d7-4b91-b67c-3ccae3edcc8d", "metadata": {}, "outputs": [], "source": [ "# stash in our team S3 storage \n", "\n", "import streamlit as st\n", "from minio import Minio\n", "import os\n", "# Get signed URLs to access license-controlled layers\n", "key = st.secrets[\"MINIO_KEY\"]\n", "secret = st.secrets[\"MINIO_SECRET\"]\n", "client = Minio(\"minio.carlboettiger.info\", key, secret, secure=True)\n", "\n", "size = os.path.getsize(\"ca2024_diffs.parquet\")\n", "with open(\"ca2024_diffs.parquet\", \"rb\") as file_data:\n", " client.put_object(\"public-biodiversity\", \"ca30x30/ca2024_diffs.parquet\", file_data, length = size)\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 26, "id": "34425101-0592-42fd-9d62-22c9e7a6d6ac", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c5cc696c15374d519fb940e054a902f0", "version_major": 2, "version_minor": 1 }, "text/plain": [ "Map(height='600px', map_options={'bearing': 0, 'center': (0, 20), 'pitch': 0, 'style': 'https://basemaps.carto…" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# can read from S3 and plot the whole thing. Note gdf has no metadata.\n", "\n", "import leafmap.maplibregl as leafmap\n", "import ibis\n", "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n", "gdf = conn.read_parquet(\"https://minio.carlboettiger.info/public-biodiversity/ca30x30/ca2024_diffs.parquet\").execute()\n", "m = leafmap.Map()\n", "m.add_gdf(gdf)\n", "#m.to_html(\"ca2024.html\")\n", "m" ] }, { "cell_type": "code", "execution_count": 25, "id": "17af39a0-9a69-4bfa-9084-d7b26adf74fc", "metadata": {}, "outputs": [], "source": [ "path = \"ca2024.html\"\n", "size = os.path.getsize(path)\n", "with open(path, \"rb\") as file_data:\n", " client.put_object(\"public-biodiversity\", \"ca30x30/\"+path, file_data, length = size)\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "775abaca-f21a-4cd8-ad7d-0310cc6b33b6", "metadata": {}, "outputs": [], "source": [ "# \"TO 'new2024.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON', LAYER_CREATION_OPTIONS 'WRITE_BBOX=YES')\"" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" } }, "nbformat": 4, "nbformat_minor": 5 }