# Pre-processing

In [1]:
import ibis
from ibis import _

conn = ibis.duckdb.connect("tmp", extensions=["spatial"])
ca_parquet = "https://data.source.coop/cboettig/ca30x30/ca_areas.parquet"
# or use local copy:
ca_parquet = "/home/rstudio/source.coop/cboettig/ca30x30/ca_areas.parquet"


In [2]:
buffer = -2

tbl = (
    conn.read_parquet(ca_parquet)
    .cast({"SHAPE": "geometry"})
    .rename(geom = "SHAPE")
#    .filter(_.UNIT_NAME == "Angeles National Forest")
    .filter(_.reGAP < 3) 
)
tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer))
tbl_2024 = tbl.filter(_.Release_Year == 2024)
intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))



In [3]:
## Testing, run only on subset data
if False:
    gdf = intersects.mutate(geom = _.geom.convert("epsg:3310","epsg:4326")).execute()
    gdf_2023 = tbl_2023.mutate(geom = _.geom.convert("epsg:3310","epsg:4326")).execute()
    gdf_2024 = tbl_2024.mutate(geom = _.geom.convert("epsg:3310","epsg:4326")).execute()
    # gdf = ca2024
    established = {'property': 'established',
                   'type': 'categorical',
                   'stops': [
                       [2023, "#26542C80"], 
                       [2024, "#F3AB3D80"]]
                  }
    inter = {"fill-color": "#F3AB3D"}
    p2024 = {"fill-color": "#26542C"}
    p2023 = {"fill-color": "#8B0A1A"}
    
    m = leafmap.Map(style="positron")
    m.add_gdf(gdf_2024,layer_type="fill", name = "2024", paint = p2024)
    m.add_gdf(gdf_2023,layer_type="fill", name = "2023", paint = p2023)
    m.add_gdf(gdf,layer_type="fill", name = "intersects", paint = inter)
    
    m.add_layer_control()
    m

In [4]:
%%time

new2024 = intersects.select("OBJECTID").mutate(established = 2024)

ca = (conn
      .read_parquet(ca_parquet)
      .cast({"SHAPE": "geometry"})
      .mutate(area = _.SHAPE.area())
      .filter(_.Release_Year == 2024)
      .filter(_.reGAP < 3)
      .left_join(new2024, "OBJECTID")
      .mutate(established=_.established.fill_null(2023))
      .mutate(geom = _.SHAPE.convert("epsg:3310","epsg:4326"))
      .rename(name = "cpad_PARK_NAME", access_type = "cpad_ACCESS_TYP", manager = "cpad_MNG_AGENCY",
              manager_type = "cpad_MNG_AG_LEV", id = "OBJECTID", type = "TYPE")
      .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,
              _.Easement, _.Acres, _.id, _.type, _.geom)
     )
ca2024 = ca.execute()

ca2024.to_parquet("ca2024.parquet")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

CPU times: user 55min 28s, sys: 2.94 s, total: 55min 31s
Wall time: 22min 6s


In [1]:
## Upload to Huggingface
# https://huggingface.co/datasets/boettiger-lab/ca-30x30/

from huggingface_hub import HfApi, login
import streamlit as st
login(st.secrets["HF_TOKEN"])
api = HfApi()
info = api.upload_file(
        path_or_fileobj="ca2024.parquet",
        path_in_repo="ca2024.parquet",
        repo_id="boettiger-lab/ca-30x30",
        repo_type="dataset",
    )


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/jovyan/.cache/huggingface/token
Login successful


ca2024.parquet:   0%|          | 0.00/137M [00:00<?, ?B/s]

# Testing & visualization

`ca2024.parquet()` now contains all we need.  The code below illustrates some quick examples of the kinds of visualizations and summaries we might want to compute with this data.  


In [1]:
import leafmap.maplibregl as leafmap
import ibis
from ibis import _
conn = ibis.duckdb.connect(extensions=["spatial"])

ca2024 = conn.read_parquet("ca2024.parquet")

In [6]:
# compute some summary tables:

(ca2024
 .filter(_.established == 2024)
 .filter(_.manager_type == "State")
 .group_by(_.manager, _.manager_type)
 .agg(area = _.Acres.sum())
 .order_by(_.area.desc())
 .execute()
)

Unnamed: 0,manager,manager_type,area
0,California Department of Fish and Wildlife,State,54853.556568
1,California Department of Parks and Recreation,State,21439.451269
2,California Tahoe Conservancy,State,6119.753048
3,California Department of Water Resources,State,4033.217739
4,California State University Sonoma,State,3842.054169
5,University of California,State,2050.549176
6,California Department of Forestry and Fire Pro...,State,1212.712394
7,Coachella Valley Mountains Conservancy,State,167.22409
8,California State Lands Commission,State,113.344073
9,California State Coastal Conservancy,State,97.314705


In [None]:

gdf = ca2024.execute()
established = {'property': 'established',
               'type': 'categorical',
               'stops': [
                   [2023, "#26542C80"], 
                   [2024, "#F3AB3D80"]]}
paint = {"fill-color": established}


m = leafmap.Map(style="positron")
m.add_gdf(gdf,layer_type="fill", name = "intersects", paint = paint)

m.add_layer_control()
m.to_html("ca2024.html")
m