In [21]:
import ibis
from ibis import _
conn = ibis.duckdb.connect("tmp2", extensions=["spatial"])

tbl = (
    conn.read_parquet("https://data.source.coop/cboettig/ca30x30/ca_areas.parquet")
    .cast({"SHAPE": "geometry"})
    .rename(geom = "SHAPE", gid = "OBJECTID")
   # .filter(_.UNIT_NAME == "Angeles National Forest")
    .filter(_.reGAP < 3) 
)
conn.create_table("t1", tbl.filter(_.Release_Year == 2024), overwrite = True)
conn.create_table("t2", tbl.filter(_.Release_Year == 2023), overwrite = True)



FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [23]:
ca2024 = conn.table("t1").execute()
ca2023 = conn.table("t2").execute()

import leafmap.maplibregl as leafmap
m = leafmap.Map()
m.add_gdf(ca2024, name = "2024")
m.add_gdf(ca2023, name ="2023")

m

In [None]:

conn.disconnect()

In [2]:
%%time

## RUN this on a machine with a whole lot of RAM.  consider filtering federal/non-federal first.
import duckdb
db = duckdb.connect("tmp2")
db.install_extension("spatial")
db.load_extension("spatial")

db.sql('''
CREATE OR REPLACE TABLE diff AS (
with temp as 
(
  select   b.gid, st_union_agg(a.geom) as geom
  from     t1 b join t2 a on st_intersects(a.geom, b.geom)
  group by b.gid
) 
select st_difference(b.geom,coalesce(t.geom, 'GEOMETRYCOLLECTION EMPTY'::geometry)) as geom
from t1 b left join temp t on b.gid = t.gid
)
''')


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

CPU times: user 22min 10s, sys: 43 s, total: 22min 53s
Wall time: 11min 47s


In [7]:
## Cannot go straight to geoparquet due to M geometries!
#db.table("diff").to_parquet("diff.parquet")

## This doesn't work either: 
#db.sql('''CREATE OR REPLACE TABLE diff2024 AS SELECT *, st_force2d(geom) AS geom FROM diff''')

## We could cast geom as blob....

In [10]:
# F*ck it.  Let's do it all in RAM via geopandas, which drops M geoms due to a fortunate bug! 
conn = ibis.duckdb.connect("tmp2", extensions=["spatial"])
gdf = conn.table("diff").mutate(geom = _.geom.convert("epsg:3310","epsg:4326")).execute()
gdf.to_parquet("ca2024_diffs.parquet")


In [19]:
# stash in our team S3 storage 

import streamlit as st
from minio import Minio
import os
# Get signed URLs to access license-controlled layers
key = st.secrets["MINIO_KEY"]
secret = st.secrets["MINIO_SECRET"]
client = Minio("minio.carlboettiger.info", key, secret, secure=True)

size = os.path.getsize("ca2024_diffs.parquet")
with open("ca2024_diffs.parquet", "rb") as file_data:
    client.put_object("public-biodiversity", "ca30x30/ca2024_diffs.parquet", file_data, length = size)




In [26]:
# can read from S3 and plot the whole thing.  Note gdf has no metadata.

import leafmap.maplibregl as leafmap
import ibis
conn = ibis.duckdb.connect(extensions=["spatial"])
gdf = conn.read_parquet("https://minio.carlboettiger.info/public-biodiversity/ca30x30/ca2024_diffs.parquet").execute()
m = leafmap.Map()
m.add_gdf(gdf)
#m.to_html("ca2024.html")
m

Map(height='600px', map_options={'bearing': 0, 'center': (0, 20), 'pitch': 0, 'style': 'https://basemaps.cartoâ€¦

In [25]:
path = "ca2024.html"
size = os.path.getsize(path)
with open(path, "rb") as file_data:
    client.put_object("public-biodiversity", "ca30x30/"+path, file_data, length = size)




In [None]:
# "TO 'new2024.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON', LAYER_CREATION_OPTIONS 'WRITE_BBOX=YES')"