Spaces:

boettiger-lab
/

preview-ca-30x30

Running

File size: 21,671 Bytes

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "4b4adc2a-bf0c-4ace-87be-dbaf90be0125",
   "metadata": {},
   "source": [
    "# Pre-processing script"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import ibis\n",
    "from ibis import _\n",
    "import geopandas as gpd\n",
    "import duckdb\n",
    "from cng.utils import ST_MakeValid\n",
    "\n",
    "con = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
    "path = '../data/ca-layers/'\n",
    "\n",
    "# CA Nature data \n",
    "ca_raw_parquet = \"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\"\n",
    "\n",
    "# Boundary of CA, used to computed 'non-conserved' areas\n",
    "ca_boundary_shape = \"../data/ca_shape\"\n",
    "ca_boundary_parquet = path + \"ca_boundary.parquet\"\n",
    "\n",
    "# Ecoregions\n",
    "ca_ecoregions_shape = \"../data/ecoregions/ACE_Ecoregions_BaileyDerived_2022.shp\"\n",
    "ca_ecoregions_parquet = path + \"ace_ecoregions.parquet\"\n",
    "\n",
    "# file to save non-conserved areas; costly operation so we save results \n",
    "ca_nonconserved_parquet = path + \"ca-30x30-nonconserved-500m-simplified.parquet\" \n",
    "ca_nonconserved_eco_parquet = path + \"ca-30x30-nonconserved-500m-simplified-eco.parquet\" \n",
    "\n",
    "# temp file used to compute zonal stats: has conserved + non-conserved areas \n",
    "ca_temp_parquet = path + \"ca-30x30-temp.parquet\"  \n",
    "\n",
    "# final files: conserved + non-conserved areas + zonal stats \n",
    "ca_parquet = path + \"ca-30x30.parquet\"\n",
    "ca_pmtiles = path + \"ca-30x30.pmtiles\" #excludes non-conserved geometries\n",
    "\n",
    "#vector data \n",
    "svi = path + 'SVI2022_US_tract' #EPSG:4326\n",
    "justice40 = path + 'disadvantaged-communities'#ESRI:102039\n",
    "fire = path + 'calfire-2023' #EPSG:4326\n",
    "rxburn = path + 'calfire-rxburn-2023' #EPSG:4326\n",
    "\n",
    "#raster data \n",
    "irrecoverable_c = path + 'ca_irrecoverable_c_2018_cog' # EPSG:3857\n",
    "manageable_c = path + 'ca_manageable_c_2018_cog'# EPSG:3857\n",
    "richness = path + 'SpeciesRichness_All' # EPSG:3857\n",
    "rsr = path + 'RSR_All'# EPSG:3857"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "907235f6-48a5-4c55-b779-3bb6839acf2b",
   "metadata": {},
   "source": [
    "# Step 1: Computing all \"non-conserved\" areas"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c6c1cbf5-bc6e-4238-ab87-c467067235c0",
   "metadata": {},
   "source": [
    "#### Convert CA boundary to parquet "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "38091012-586e-4091-8f0d-a0aa868a04cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Using a shape file of CA boundary and converting to parquet file \n",
    "ca_boundary = gpd.read_file(ca_boundary_shape).to_crs(epsg = 3310)\n",
    "ca_boundary.to_parquet(ca_boundary_parquet)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3dfcb35b-e6a9-4a89-af05-c65909191f2b",
   "metadata": {},
   "source": [
    "#### Computing difference: Non-conserved areas = CA Boundary - Conserved Areas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7aedc147-6601-4ca7-9316-ddea5cab154a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# This chunk will take ~2 hours to run \n",
    "conn = ibis.duckdb.connect(\"tmp\", extensions=[\"spatial\"]) #save to disk\n",
    "\n",
    "# CA Boundary \n",
    "ca_all_tbl = (\n",
    "    conn.read_parquet(ca_boundary_parquet)\n",
    "    .rename(geom = \"geometry\")\n",
    "    .cast({\"geom\": \"geometry\"})\n",
    ")\n",
    "\n",
    "\n",
    "# CA-Nature data / protected areas \n",
    "tbl = (\n",
    "    conn.read_parquet(ca_raw_parquet)\n",
    "    .cast({\"SHAPE\": \"geometry\"})\n",
    "    .rename(geom = \"SHAPE\", gid = \"OBJECTID\")\n",
    ")\n",
    "\n",
    "conn.create_table(\"t1\", ca_all_tbl, overwrite = True)\n",
    "conn.create_table(\"t2\", tbl.filter(_.Release_Year == 2024), overwrite = True)\n",
    "\n",
    "# simplified all geometries 500m so the kernel doesn't crash\n",
    "# computing difference\n",
    "conn.conn.execute('''\n",
    "CREATE TABLE not_in_pad AS\n",
    "WITH t2_simplified AS (\n",
    "    SELECT ST_Simplify(geom, 500) AS geom\n",
    "    FROM t2\n",
    "),\n",
    "t2_union AS (\n",
    "    SELECT ST_Union_Agg(geom) AS geom\n",
    "    FROM t2_simplified\n",
    ")\n",
    "SELECT \n",
    "    ST_Difference(t1.geom, t2_union.geom) AS geom\n",
    "FROM \n",
    "    t1, t2_union;\n",
    "''')\n",
    "\n",
    "\n",
    "# save to parquet file so we don't have to run this again\n",
    "nonconserved = conn.table(\"not_in_pad\")\n",
    "nonconserved.execute().to_parquet(ca_nonconserved_parquet)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "845eb0ed-3392-4346-9b7e-959cd97a274f",
   "metadata": {},
   "source": [
    "#### Get ecoregions - convert them to parquet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b43fee2c-b8c4-4076-b87b-089676031165",
   "metadata": {},
   "outputs": [],
   "source": [
    "eco = gpd.read_file(ca_ecoregions_shape)\n",
    "eco.to_parquet(ca_ecoregions_parquet)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b5689850-80fa-4cc9-87e9-46074b8d9107",
   "metadata": {},
   "source": [
    "#### Compute ecoregion for non-conserved areas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "070bbdde-b141-4a63-8f8a-984dd01fd51a",
   "metadata": {},
   "outputs": [],
   "source": [
    "con = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
    "\n",
    "eco = con.read_parquet(ca_ecoregions_parquet)\n",
    "non = con.read_parquet(ca_nonconserved_parquet)\n",
    "\n",
    "con.create_table(\"eco\", eco.select(\"ECOREGION_\",\"geometry\"), overwrite = True)\n",
    "con.create_table(\"non\", non, overwrite = True)\n",
    "\n",
    "#split up the non-conserved areas by ecoregions\n",
    "con.con.execute('''\n",
    "CREATE TABLE non_conserved_eco AS\n",
    "SELECT \n",
    "    non.*, \n",
    "    eco.ECOREGION_ AS ecoregion,\n",
    "    ST_Intersection(non.geom, eco.geometry) AS geom  -- Split non into ecoregions\n",
    "FROM non\n",
    "JOIN eco \n",
    "ON ST_Intersects(non.geom, eco.geometry)\n",
    "WHERE ST_GeometryType(ST_Intersection(non.geom, eco.geometry)) IN ('POLYGON', 'MULTIPOLYGON');\n",
    "''')\n",
    "\n",
    "# save to parquet file so we don't have to run this again\n",
    "non_eco = (con.table(\"non_conserved_eco\")\n",
    "           .drop('geom')\n",
    "           .rename(geom = \"geom_1\")\n",
    "           .mutate(geom = ST_MakeValid(_.geom))\n",
    "           .mutate(id=ibis.row_number().over())\n",
    "          )\n",
    "\n",
    "non_conserved_eco = non_eco.execute()\n",
    "non_conserved_eco.to_parquet(ca_nonconserved_eco_parquet)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ce52b1e0-027e-4915-9e7b-e51e946560ed",
   "metadata": {},
   "source": [
    "#### Non-conserved areas need to match CA Nature schema when merging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f9666d1-7c2b-45af-9399-e4189bba34f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# match CA Nature schema \n",
    "nonconserved_clean = (\n",
    "    con.read_parquet(ca_nonconserved_eco_parquet)\n",
    "    .cast({\"geom\": \"geometry\"})\n",
    "    .mutate(established = ibis.null(), gap_code = 0, name = ibis.literal(\"Non-Conserved Areas\"),\n",
    "            access_type = ibis.null(), manager = ibis.null(), manager_type = ibis.null(),\n",
    "            easement = ibis.null(), type = ibis.literal(\"Land\"),\n",
    "            status = ibis.literal(\"non-conserved\"),\n",
    "            acres = _.geom.area() / 4046.8564224 #convert sq meters to acres\n",
    "           )\n",
    "    .cast({\"established\": \"string\", \"gap_code\": \"int16\", \"status\": \"string\",\"name\": \"string\",\n",
    "          \"access_type\": \"string\", \"manager\": \"string\", \"manager_type\": \"string\",\n",
    "          \"ecoregion\": \"string\", \"easement\": \"string\", \"id\": \"int64\", \"type\": \"string\",\n",
    "          \"acres\":\"float32\"}) #match schema to CA Nature\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "104254ef-f6e9-4f03-8797-de55091774d5",
   "metadata": {},
   "source": [
    "# Step 2: Isolate pre-2024 from 2024 polygons"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a3d4f189-1563-4868-9f1f-64d67569df27",
   "metadata": {},
   "outputs": [],
   "source": [
    "# negative buffer to account for overlapping boundaries. \n",
    "buffer = -30 #30m buffer \n",
    "\n",
    "tbl = (\n",
    "    con.read_parquet(ca_raw_parquet)\n",
    "    .cast({\"SHAPE\": \"geometry\"})\n",
    "    .rename(geom = \"SHAPE\")\n",
    "    .filter(_.reGAP < 3) # only gap 1 and 2 count towards 30x30\n",
    ")\n",
    "\n",
    "# polygons with release_year 2024 are a superset of release_year 2023. \n",
    "# use anti_join to isolate the objects that are in release_year 2024 but not release_year 2023 (aka newly established). \n",
    "tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer)) \n",
    "tbl_2024 = tbl.filter(_.Release_Year == 2024)\n",
    "intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1f335433-ff89-4966-bf98-c11a0b233686",
   "metadata": {},
   "source": [
    "# Step 3: Join all protected land data into single parquet file "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a59c976b-3c36-40f9-a15b-cefcd155c647",
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%time\n",
    "new2024 = intersects.select(\"OBJECTID\").mutate(established = ibis.literal(\"2024\")) # saving IDs to join on\n",
    "\n",
    "ca_merged = (con\n",
    "      .read_parquet(ca_raw_parquet)\n",
    "      .cast({\"SHAPE\": \"geometry\"})\n",
    "      .mutate(area = _.SHAPE.area())\n",
    "      .filter(_.Release_Year == 2024) # having both 2023 and 2024 is redudant since 2024 is the superset.\n",
    "      .left_join(new2024, \"OBJECTID\") # newly established 2024 polygons \n",
    "      .mutate(established=_.established.fill_null(\"pre-2024\")) \n",
    "      .rename(name = \"cpad_PARK_NAME\", access_type = \"cpad_ACCESS_TYP\", manager = \"cpad_MNG_AGENCY\",\n",
    "              manager_type = \"cpad_MNG_AG_LEV\", id = \"OBJECTID\", type = \"TYPE\", \n",
    "              ecoregion = \"CA_Ecoregion_Name\", acres = \"Acres\", gap_code = \"reGAP\", geom = \"SHAPE\")\n",
    "      .cast({\"gap_code\": \"int16\"})\n",
    "      .cast({\"id\": \"int64\"})\n",
    "      .mutate(manager = _.manager.substitute({\"\": \"Unknown\"})) \n",
    "      .mutate(manager_type = _.manager_type.substitute({\"\": \"Unknown\"}))\n",
    "      .mutate(access_type = _.access_type.substitute({\"\": \"Unknown Access\"}))\n",
    "      .mutate(name = _.name.substitute({\"\": \"Unknown\"}))\n",
    "      .mutate(manager_type = _.manager_type.substitute({\"Home Owners Association\": \"HOA\"}))\n",
    "      .mutate(easement=_.Easement.cast(\"string\").substitute({\"0\": \"False\", \"1\": \"True\"}))\n",
    "      .mutate(status=_.gap_code.cast(\"string\")\n",
    "              .substitute({\"1\": \"30x30-conserved\", \"2\": \"30x30-conserved\", \"3\": \"other-conserved\", \n",
    "                           \"4\": \"unknown\"}))\n",
    "      .select(_.established, _.gap_code, _.status, _.name, _.access_type, _.manager, _.manager_type,\n",
    "              _.ecoregion, _.easement, _.acres, _.id, _.type, _.geom)\n",
    "      .union(nonconserved_clean)\n",
    "      .mutate(acres = _.acres.round(4))\n",
    "      .mutate(geom = ST_MakeValid(_.geom))\n",
    "      .drop_null(['geom'],how = \"any\")\n",
    "     )\n",
    "\n",
    "\n",
    "gdf = ca_merged.execute()\n",
    "gdf.set_crs(\"epsg:3310\").to_parquet(ca_temp_parquet) # saving to temp file to compute zonal stats "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "44d64f2b-a65b-4ac1-9943-2d96f5c91e1d",
   "metadata": {},
   "source": [
    "# Step 4: Compute zonal stats"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e129b0cc-ee7d-4e58-a8d8-d6f2476bd62c",
   "metadata": {},
   "source": [
    "#### Functions: Reproject and compute overlap for vector data "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fdeeb7ac-efa0-4a7b-9143-72d8ec911809",
   "metadata": {},
   "outputs": [],
   "source": [
    "def reproject_vectors(file, vec): # change data layer projections to match CA Nature data \n",
    "    vec = vec.rename_geometry('geom')\n",
    "    vec[\"geom\"] = vec[\"geom\"].make_valid()\n",
    "    vec = vec.to_crs(\"EPSG:3310\")\n",
    "    vec.to_parquet(file + '-epsg3310.parquet')\n",
    "    return\n",
    "\n",
    "def vector_vector_stats(base, data_layer):\n",
    "    t1 = con.read_parquet(base).select(_.id, _.geom)\n",
    "    t2 = con.read_parquet(data_layer).select(_.geom, _.value)\n",
    "    expr = (t1\n",
    "     .left_join(t2, t1.geom.intersects(t2.geom))\n",
    "     .group_by(t1.id, t1.geom)\n",
    "     .agg(overlap_fraction = (t1.geom.intersection(t2.geom).area() / t1.geom.area() *t2.value) \n",
    "          .sum().coalesce(0).round(3) ) #weighted overlap, based on t2.value\n",
    "    )\n",
    "    ibis.to_sql(expr)\n",
    "    stats = expr.execute()\n",
    "    return stats[['id','overlap_fraction']]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f45a0f52-6d18-45b4-8585-af3f1190b000",
   "metadata": {},
   "source": [
    "#### Compute zonal stats with vector data "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b110da15-d2ac-4457-9241-f02f44dc436a",
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "vectors = [svi,justice40 ,fire,rxburn]\n",
    "names = ['svi','disadvantaged_communities','fire','rxburn']\n",
    "\n",
    "# read in data if it's not already created \n",
    "if 'gdf' not in locals(): \n",
    "    gdf_stats = gpd.read_parquet(ca_temp_parquet) \n",
    "\n",
    "else: \n",
    "    gdf_stats = gdf\n",
    "\n",
    " # set the index to the col we are joining on for gpd.join()\n",
    "gdf_stats = gdf_stats.set_index('id')\n",
    "\n",
    "for file,name in zip(vectors,names):\n",
    "    vec = gpd.read_parquet(file + '.parquet') #load in vector data layer \n",
    "\n",
    "    # filter: we only want 10 year range for fire\n",
    "    if name in ['fire','rxburn']:\n",
    "        vec = vec[vec['YEAR_']>=2013] \n",
    "        vec['value'] = 1 #used in overlap calculation, 1 = fire occured \n",
    "\n",
    "     # filter: only want CA data, not nationwide. \n",
    "    if name == 'svi': \n",
    "        vec = vec[(vec['STATE']==\"California\") & (vec['RPL_THEMES'] != -999)] #removing empty values \n",
    "        vec['value'] = vec['RPL_THEMES'] #overlap calculation is weighted on svi index\n",
    "\n",
    "    # filter: only want CA, and only disadvantaged communities \n",
    "    if name == 'disadvantaged_communities':\n",
    "        vec = vec[(vec['StateName']==\"California\") & (vec['Disadvan'] ==1)]\n",
    "        vec['value'] = 1 #used in overlap calculation, 1 = disadvantaged  \n",
    "        \n",
    "    # change projection to match CA Nature data \n",
    "    reproject_vectors(file, vec) \n",
    "\n",
    "    # compute zonal stats \n",
    "    vector_stats = vector_vector_stats(ca_temp_parquet, file + '-epsg3310.parquet') \n",
    "    vector_stats = vector_stats.rename(columns ={'overlap_fraction':name}) \n",
    "\n",
    "    # joining new zonal stats column with CA Nature data. \n",
    "    gdf_stats = gdf_stats.join(vector_stats.set_index('id')) \n",
    "\n",
    "gdf_stats = gdf_stats.reset_index()\n",
    "gdf_stats.to_parquet(ca_parquet) #save CA Nature + zonal stats "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e0fccaf3-50a8-4324-82fa-34838987334b",
   "metadata": {},
   "source": [
    "#### Function: Reproject raster data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aade11d9-87b9-403d-bad1-3069663807a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import subprocess\n",
    "\n",
    "def raster_reprojection(input_file, output_file, epsg=\"EPSG:3310\"):\n",
    "    cmd = [\n",
    "        \"gdalwarp\",\n",
    "        \"-t_srs\", epsg,\n",
    "        input_file,\n",
    "        output_file\n",
    "    ]\n",
    "    try:\n",
    "        subprocess.run(cmd, check=True)\n",
    "        print(f\"Reprojection successful! Output saved to: {output_file}\")\n",
    "    except subprocess.CalledProcessError as e:\n",
    "        print(f\"Error occurred during reprojection: {e}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "94e924fd-d927-4458-ba1f-670b4047d149",
   "metadata": {},
   "source": [
    "#### Compute zonal stats with raster data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3ce1bc61-eabd-4a73-ba34-a1707bc14f74",
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "import rasterio\n",
    "from exactextract import exact_extract\n",
    "\n",
    "rasters = [irrecoverable_c, manageable_c, richness, rsr]\n",
    "names = ['irrecoverable_carbon','manageable_carbon','richness','rsr']\n",
    "\n",
    "if 'gdf_stats' not in locals(): \n",
    "    gdf_stats = gpd.read_parquet(ca_parquet) # read in data if it's not already created \n",
    "    \n",
    "# need to make the following changes to our data for exact_extract() to work:\n",
    "gdf_stats = gdf_stats.rename(columns ={'id':'ca_id'}) #rename 'id' because it conflicts with a raster field. \n",
    "gdf_stats.to_parquet(ca_parquet) #saving updated parquet to file to use for exact_extract()\n",
    "\n",
    "for file,name in zip(rasters,names):\n",
    "    raster_reprojection(file+'.tif', file+'_epsg3310.tif') #reproject rasters to match CA Nature\n",
    "    raster_stats = exact_extract(file+'_epsg3310.tif', ca_parquet, [\"mean\"], include_cols=[\"ca_id\"], output = 'pandas') #zonal stats \n",
    "    \n",
    "    #the column we want is 'band_1_mean'; these rasters have multiple bands. \n",
    "    if name in ['irrecoverable_carbon','manageable_carbon']:\n",
    "        raster_stats = raster_stats[['ca_id','band_1_mean']] \n",
    "        raster_stats = raster_stats.rename(columns ={'band_1_mean':name}) \n",
    "\n",
    "    #these rasters have only 1 band, so zonal stats column is 'mean'\n",
    "    elif name in ['richness','rsr']:\n",
    "        raster_stats = raster_stats[['ca_id','mean']] \n",
    "        raster_stats = raster_stats.rename(columns ={'mean':name})\n",
    "\n",
    "    raster_stats[name] = raster_stats[name].round(3) #rounding stats \n",
    "     \n",
    "    # joining with gpd.join(), need to set an index \n",
    "    gdf_stats = gdf_stats.set_index(\"ca_id\").join(raster_stats.set_index(\"ca_id\")) \n",
    "\n",
    "    # exact_extract() won't work with index, so now that it's joined, we reset the index. \n",
    "    gdf_stats = gdf_stats.reset_index() \n",
    "\n",
    "gdf_stats = gdf_stats.rename(columns ={'ca_id':'id'}) #reverting back to \"id\" col name, since we are finished with exact_extract() \n",
    "\n",
    "\n",
    "# reproject to epsg:4326 since that's what pmtiles requires and we want to match that \n",
    "gdf_stats = gdf_stats.to_crs(\"epsg:4326\")\n",
    "gdf_stats.to_parquet(ca_parquet) # save results "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ec619f4e-1338-492a-a334-a7796f4f55a1",
   "metadata": {},
   "source": [
    "# Step 5: Upload file + Generate PMTiles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30f47b26-cd18-4e8c-a19b-9d1f19b10873",
   "metadata": {},
   "outputs": [],
   "source": [
    "from cng.utils import hf_upload, s3_cp,set_secrets, to_pmtiles\n",
    "\n",
    "# upload parquet to minio and HF\n",
    "hf_upload('ca-30x30.parquet', ca_parquet)\n",
    "s3_cp(ca_parquet, \"s3://public-ca30x30/ca-30x30.parquet\", \"minio\")\n",
    "\n",
    "#to use PMTiles, need to convert to geojson\n",
    "ca_geojson = (con\n",
    "            .read_parquet(ca_parquet)\n",
    "            # .filter(_.status != 'non-conserved') #omitting the non-conserved to only for pmtiles  \n",
    "            )\n",
    "\n",
    "#can't go directly from parquet -> pmtiles, need to go parquet -> geojson -> pmtiles \n",
    "ca_geojson.execute().to_file(path + 'ca-30x30.geojson') \n",
    "pmtiles = to_pmtiles(path+ 'ca-30x30.geojson', ca_pmtiles, options = ['--extend-zooms-if-still-dropping'])\n",
    "\n",
    "# upload pmtiles  to minio and HF\n",
    "hf_upload('ca-30x30.pmtiles', ca_pmtiles)\n",
    "s3_cp(ca_pmtiles, \"s3://public-ca30x30/ca-30x30.pmtiles\", \"minio\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}