{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "4b4adc2a-bf0c-4ace-87be-dbaf90be0125",
   "metadata": {},
   "source": [
    "# Pre-processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import ibis\n",
    "from ibis import _\n",
    "\n",
    "conn = ibis.duckdb.connect(\"tmp3\", extensions=[\"spatial\"])\n",
    "# ca_parquet = \"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\"\n",
    "# or use local copy:\n",
    "ca_parquet = \"ca_areas.parquet\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a3d4f189-1563-4868-9f1f-64d67569df27",
   "metadata": {},
   "outputs": [],
   "source": [
    "# negative buffer to account for overlapping boundaries. \n",
    "buffer = -30 #30m buffer \n",
    "\n",
    "tbl = (\n",
    "    conn.read_parquet(ca_parquet)\n",
    "    .cast({\"SHAPE\": \"geometry\"})\n",
    "    .rename(geom = \"SHAPE\")\n",
    "    .filter(_.reGAP < 3) # only gap 1 and 2 count towards 30x30\n",
    ")\n",
    "\n",
    "# polygons with release_year 2024 are a superset of release_year 2023. \n",
    "# use anti_join to isolate the objects that are in release_year 2024 but not release_year 2023 (aka newly established). \n",
    "tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer)) \n",
    "tbl_2024 = tbl.filter(_.Release_Year == 2024)\n",
    "intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a59c976b-3c36-40f9-a15b-cefcd155c647",
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "\n",
    "new2024 = intersects.select(\"OBJECTID\").mutate(established = 2024) # saving IDs to join on\n",
    "\n",
    "ca = (conn\n",
    "      .read_parquet(ca_parquet)\n",
    "      .cast({\"SHAPE\": \"geometry\"})\n",
    "      .mutate(area = _.SHAPE.area())\n",
    "      .filter(_.Release_Year == 2024) # having both 2023 and 2024 is redudant since 2024 is the superset.\n",
    "      .left_join(new2024, \"OBJECTID\") # newly established 2024 polygons \n",
    "      .mutate(established=_.established.fill_null(2023)) \n",
    "      .mutate(geom = _.SHAPE.convert(\"epsg:3310\",\"epsg:4326\"))\n",
    "      .rename(name = \"cpad_PARK_NAME\", access_type = \"cpad_ACCESS_TYP\", manager = \"cpad_MNG_AGENCY\",\n",
    "              manager_type = \"cpad_MNG_AG_LEV\", id = \"OBJECTID\", type = \"TYPE\")\n",
    "      .mutate(manager = _.manager.substitute({\"\": \"Unknown\"})) \n",
    "      .mutate(manager_type = _.manager_type.substitute({\"\": \"Unknown\"}))\n",
    "      .mutate(access_type = _.access_type.substitute({\"\": \"Unknown Access\"}))\n",
    "      .mutate(name = _.name.substitute({\"\": \"Unknown\"}))\n",
    "      .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,\n",
    "              _.Easement, _.Acres, _.id, _.type, _.geom)\n",
    "     )\n",
    "\n",
    "ca2024 = ca.execute()\n",
    "ca2024.to_parquet(\"ca2024-30m.parquet\")\n",
    "ca2024.to_file(\"ca2024-30m.geojson\") # tippecanoe can't parse geoparquet :-("
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cfac7aa4-e418-4d7c-91e0-04ff8eae804c",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Upload to Huggingface\n",
    "# https://huggingface.co/datasets/boettiger-lab/ca-30x30/\n",
    "\n",
    "from huggingface_hub import HfApi, login\n",
    "import streamlit as st\n",
    "login(st.secrets[\"HF_TOKEN\"])\n",
    "api = HfApi()\n",
    "\n",
    "def hf_upload(file):\n",
    "    info = api.upload_file(\n",
    "            path_or_fileobj=file,\n",
    "            path_in_repo=file,\n",
    "            repo_id=\"boettiger-lab/ca-30x30\",\n",
    "            repo_type=\"dataset\",\n",
    "        )\n",
    "    \n",
    "hf_upload(\"ca2024-30m.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2df80e1d-6b94-4884-b9f5-d9c23d3ea028",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import subprocess\n",
    "import os\n",
    "\n",
    "def generate_pmtiles(input_file, output_file, max_zoom=12):\n",
    "    # Ensure Tippecanoe is installed\n",
    "    if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n",
    "        raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n",
    "\n",
    "    # Construct the Tippecanoe command\n",
    "    command = [\n",
    "        \"tippecanoe\",\n",
    "        \"-o\", output_file,\n",
    "        \"-z\", str(max_zoom),\n",
    "        \"--drop-densest-as-needed\",\n",
    "        \"--extend-zooms-if-still-dropping\",\n",
    "        \"--force\",\n",
    "        input_file\n",
    "    ]\n",
    "\n",
    "    # Run Tippecanoe\n",
    "    try:\n",
    "        subprocess.run(command, check=True)\n",
    "        print(f\"Successfully generated PMTiles file: {output_file}\")\n",
    "    except subprocess.CalledProcessError as e:\n",
    "        print(f\"Error running Tippecanoe: {e}\")\n",
    "\n",
    "generate_pmtiles(\"ca2024-30m.geojson\", \"ca2024-30m-tippe.pmtiles\")\n",
    "hf_upload(\"ca2024-30m-tippe.pmtiles\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}