# Get zonal stats 

In [None]:
import ibis
import ibis.selectors as s
from ibis import _
import fiona
import geopandas as gpd
import rioxarray
from shapely.geometry import box

import rasterio
from rasterio.mask import mask
from rasterstats import zonal_stats
import pandas as pd
from joblib import Parallel, delayed

con = ibis.duckdb.connect()
con.load_extension("spatial")
threads = -1

In [None]:
# cropping US data to only CA 
def crop_raster_to_bounds(tif_file, vector_gdf):
 with rasterio.open(tif_file) as src:
 # Get California's bounding box in the same CRS as the raster
 california_bounds = vector_gdf.total_bounds
 california_bounds = rasterio.coords.BoundingBox(
 *california_bounds
 )
 # Crop the raster to the California bounding box
 out_image, out_transform = mask(src, [california_bounds], crop=True)
 out_meta = src.meta.copy()
 out_meta.update({
 "driver": "GTiff",
 "height": out_image.shape[1],
 "width": out_image.shape[2],
 "transform": out_transform
 })
 print("Unique values in cropped raster:", np.unique(out_image))

 return out_image, out_meta


In [None]:
def big_zonal_stats(vec_file, tif_file, stats, col_name, n_jobs, verbose=10, timeout=10000):
 gdf = gpd.read_parquet(vec_file)
 if gdf.crs is None:
 gdf = gdf.set_crs("EPSG:4326")
 gdf = gdf.rename(columns={"geom": "geometry"})
 gdf = gdf.set_geometry("geometry")
 gdf = gdf[gdf["geometry"].notna()].copy()

 with rasterio.open(tif_file) as src:
 raster_crs = src.crs
 gdf = gdf.to_crs(raster_crs) # Transform vector to raster CRS
 
 # CA bounding box + convert it to a polygon in raster CRS
 california_polygon = box(*gdf.total_bounds)
 
 out_image, out_transform = mask(src, [california_polygon], crop=True, nodata=src.nodata)

 # If raster is 3D, select the first band
 if out_image.ndim == 3:
 out_image = out_image[0]

 # compute zonal statistics for each geometry slice
 def get_stats(geom_slice):
 geom = [geom_slice.geometry]
 stats_result = zonal_stats(
 geom, out_image, stats=stats, affine=out_transform, all_touched=True, nodata=src.nodata
 )
 return stats_result[0] if stats_result and stats_result[0].get("mean") is not None else {'mean': None}

 output = [get_stats(row) for row in gdf.itertuples()]
 gdf[col_name] = [res['mean'] for res in output]

 return gdf

In [None]:
# getting local copies of data 
# aws s3 cp s3://vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif . --endpoint-url=https://data.source.coop
# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_bii_100m_cog.tif . --endpoint-url=https://data.source.coop
# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_fii_100m_cog.tif . --endpoint-url=https://data.source.coop
# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_expansion_100m_cog.tif . --endpoint-url=https://data.source.coop
# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_reduction_100m_cog.tif . --endpoint-url=https://data.source.coop
# aws s3 cp s3://cboettig/carbon/cogs/irrecoverable_c_total_2018.tif . --endpoint-url=https://data.source.coop
# aws s3 cp s3://cboettig/carbon/cogs/manageable_c_total_2018.tif . --endpoint-url=https://data.source.coop
# ! aws s3 cp s3://cboettig/justice40/disadvantaged-communities.parquet . --endpoint-url=https://data.source.coop
# minio/shared-biodiversity/redlist/cog/combined_sr_2022.tif
# /home/rstudio/minio/shared-biodiversity/redlist/cog/combined_rwr_2022.tif
# ! aws s3 cp s3://cboettig/social-vulnerability/svi2020_us_tract.parquet . --endpoint-url=https://data.source.coop


# Biodiversity Data

In [None]:
%%time
tif_file = 'SpeciesRichness_All.tif'
vec_file = "/home/rstudio/github/ca-30x30/ca2024-30m.parquet"
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "richness", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")


In [None]:
%%time
tif_file = 'RSR_All.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'],
 col_name = "rsr", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")

In [None]:
%%time
tif_file = 'combined_sr_2022.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "all_species_richness", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")


In [None]:
%%time
tif_file = 'combined_rwr_2022.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "all_species_rwr", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")


# Carbon Data

In [None]:
%%time
tif_file = 'irrecoverable_c_total_2018.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "irrecoverable_carbon", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")



In [None]:
%%time
tif_file = 'manageable_c_total_2018.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "manageable_carbon", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")


In [None]:
%%time
tif_file = 'deforest_carbon_100m_cog.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], 
 col_name = "deforest_carbon", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")


# Human Impact Data

In [None]:
%%time
tif_file = 'natcrop_bii_100m_cog.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], 
 col_name = "biodiversity_intactness_loss", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")


In [None]:
%%time
tif_file = 'natcrop_fii_100m_cog.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'],
 col_name = "forest_integrity_loss", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")



In [None]:
%%time
tif_file = 'natcrop_expansion_100m_cog.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "crop_expansion", n_jobs=threads, verbose=0)
gpd.GeoDataFrame(df, geometry="geometry").to_parquet("cpad-stats-temp.parquet")


In [None]:
%%time
tif_file = 'natcrop_reduction_100m_cog.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "crop_reduction", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")


In [None]:
%%time
tif_file = 'hfp_2021_100m_v1-2_cog.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "human_impact", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")


# Need to convert SVI & Justice40 files to tif

In [None]:
import geopandas as gpd
import numpy as np
import rasterio
from rasterio.features import rasterize
from rasterio.transform import from_bounds

def get_geotiff(gdf, output_file,col):
 gdf = gdf.set_geometry("geometry")
 gdf = gdf.set_crs("EPSG:4326")
 print(gdf.crs)

 # Set raster properties
 minx, miny, maxx, maxy = gdf.total_bounds # Get the bounds of the geometry
 pixel_size = 0.01 # Define the pixel size in units of the CRS
 width = int((maxx - minx) / pixel_size)
 height = int((maxy - miny) / pixel_size)
 transform = from_bounds(minx, miny, maxx, maxy, width, height)
 
 # Define rasterization with continuous values
 shapes = ((geom, value) for geom, value in zip(gdf.geometry, gdf[col]))
 raster = rasterize(
 shapes,
 out_shape=(height, width),
 transform=transform,
 fill=0.0, # Background value for areas outside the geometry
 dtype="float32" # Set data type to handle continuous values
 )
 print("Unique values in raster:", np.unique(raster))

 # Define GeoTIFF metadata
 out_meta = {
 "driver": "GTiff",
 "height": height,
 "width": width,
 "count": 1,
 "dtype": raster.dtype,
 "crs": gdf.crs,
 "transform": transform,
 "compress": "deflate" # Use compression to reduce file size
 }
 
 # Write to a GeoTIFF file with COG options
 with rasterio.open(output_file, "w", **out_meta) as dest:
 dest.write(raster, 1)
 dest.build_overviews([2, 4, 8, 16], rasterio.enums.Resampling.average)
 dest.update_tags(1, TIFFTAG_RESOLUTION_UNIT="Meter")


# SVI

In [None]:
# clean up SVI data
svi_df = (con
 .read_parquet("svi2020_us_tract.parquet")
 .select("RPL_THEMES","RPL_THEME1","RPL_THEME2","RPL_THEME3","RPL_THEME4","Shape")
 .rename(SVI = "RPL_THEMES", socioeconomic = "RPL_THEME1", 
 household_char = "RPL_THEME2", racial_ethnic_minority = "RPL_THEME3",
 housing_transit = "RPL_THEME4", geometry = "Shape")
.cast({"geometry":"geometry"})
)
svi_df.execute().to_parquet("svi2020_us_tract_clean.parquet")


In [None]:
gdf = gpd.read_parquet("svi2020_us_tract_clean.parquet")
svi = gdf[['SVI','geometry']]
socio = gdf[['socioeconomic','geometry']]
house = gdf[['household_char','geometry']]
minority = gdf[['racial_ethnic_minority','geometry']]
transit = gdf[['housing_transit','geometry']]

#convert SVI parquet to tif
get_geotiff(svi,"svi.tif","SVI")
get_geotiff(socio,"svi_socioeconomic.tif","socioeconomic")
get_geotiff(house,"svi_household.tif","household_char")
get_geotiff(minority,"svi_minority.tif","racial_ethnic_minority")
get_geotiff(transit,"svi_transit.tif","housing_transit")

In [None]:
%%time
tif_file = 'svi.tif'
vec_file = './cpad-stats-temp.parquet'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "SVI", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")



In [None]:
%%time
vec_file = './cpad-stats-temp.parquet'
tif_file = 'svi_socioeconomic.tif'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "socioeconomic_status", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")



In [None]:
%%time
vec_file = './cpad-stats-temp.parquet'
tif_file = 'svi_household.tif'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "household_char", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")



In [None]:
%%time
vec_file = './cpad-stats-temp.parquet'
tif_file = 'svi_minority.tif'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "racial_ethnic_minority", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")


In [None]:
%%time
vec_file = './cpad-stats-temp.parquet'
tif_file = 'svi_transit.tif'
df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = "housing_transit", n_jobs=threads, verbose=0).to_parquet("cpad-stats-temp.parquet")


# Justice40 

In [None]:
#clean up
justice40 = (con
 .read_parquet("disadvantaged-communities.parquet")
 .rename(geometry = "SHAPE",justice40="Disadvan")
 .filter(_.StateName == "California")
 .mutate(geometry = _.geometry.convert("ESRI:102039","EPSG:4326"))
 .select("justice40","geometry")
 )
justice40.execute().to_parquet("ca_justice40.parquet")

In [None]:
# #justice40 is either 0 or 1, so we want to get the percentage of polygon where justice40 = 1. 

def big_zonal_stats_binary(vec_file, justice40_file, col_name,projected_crs="EPSG:3310"):
 # Read both vector files as GeoDataFrames
 gdf = gpd.read_parquet(vec_file)
 justice40_gdf = gpd.read_parquet(justice40_file)
 
 # Set CRS if not already set (assuming both should be in EPSG:4326, modify if needed)
 if gdf.crs is None:
 gdf = gdf.set_crs("EPSG:4326")
 if justice40_gdf.crs is None:
 justice40_gdf = justice40_gdf.set_crs("EPSG:4326")
 # Ensure both GeoDataFrames are in the same CRS and reproject to a projected CRS for area calculations
 gdf = gdf.to_crs(projected_crs)
 justice40_gdf = justice40_gdf.to_crs(projected_crs)
 
 # Ensure both GeoDataFrames are in the same CRS
 gdf = gdf.to_crs(justice40_gdf.crs)
 
 # Filter justice40 polygons where justice40 == 1
 justice40_gdf = justice40_gdf[justice40_gdf['justice40'] == 1].copy()
 
 # Prepare a list to hold percentage of justice40 == 1 for each polygon
 percentages = []
 
 # Iterate over each polygon in the main GeoDataFrame
 for geom in gdf.geometry:
 # Find intersecting justice40 polygons
 justice40_intersections = justice40_gdf[justice40_gdf.intersects(geom)].copy()
 
 # Calculate the intersection area
 if not justice40_intersections.empty:
 justice40_intersections['intersection'] = justice40_intersections.intersection(geom)
 total_intersection_area = justice40_intersections['intersection'].area.sum()
 
 # Calculate percentage based on original polygon's area
 percentage_1 = (total_intersection_area / geom.area) 
 else:
 percentage_1 = 0.0 # No intersection with justice40 == 1 polygons
 
 # Append result
 percentages.append(percentage_1)
 
 # Add results to the original GeoDataFrame
 gdf[col_name] = percentages
 return gdf




In [None]:
%%time
vec_file = './cpad-stats-temp.parquet'

df = big_zonal_stats_binary(vec_file, "ca_justice40.parquet", col_name="percent_disadvantaged")
df.to_parquet("cpad-stats-temp.parquet")


# Fire

In [None]:
import geopandas as gpd

#get percentage of polygon with fire occurrence 
def fire_stats(file_name, fire_df, col_name):
 gdf = gpd.read_parquet(file_name)
 
 percentages = []
 # Find all fires that intersect with the current protected area 
 for geom in gdf.geometry:
 fire_intersections = fire_df[fire_df.intersects(geom)].copy()
 if not fire_intersections.empty:
 # If there is only one intersecting fire, compute the intersection area
 if len(fire_intersections) == 1:
 intersection_area = fire_intersections.geometry.iloc[0].intersection(geom).area
 else:
 # If there are multiple intersecting fires, use a union to avoid double-counting
 unioned_fires = fire_intersections.unary_union
 intersection_area = unioned_fires.intersection(geom).area
 
 percentage_1 = round((intersection_area / geom.area),3)
 else:
 percentage_1 = 0.0 

 percentages.append(percentage_1)
 
 gdf[col_name] = percentages
 return gdf


In [None]:
#historical fire perimeters 
fire_20 = (con
 .read_parquet("firep22_1.parquet")
 .rename(year = "YEAR_")
 .filter(_.STATE == "CA", _.year != '')
 .cast({"year":"int"})
 .filter(_.year>=2003)
 .select("year","geometry")
 .mutate(
 geometry=ibis.ifelse(
 _.geometry.is_valid(),
 _.geometry, # Keep the geometry if it's valid
 _.geometry.buffer(0) # Apply buffer(0) to fix invalid geometries
 )
 )
 )
fire_20.execute().to_parquet("ca-fire-20yrs.parquet")
fire_10 = fire_20.filter(_.year>=2013)
fire_5 = fire_20.filter(_.year>=2018)
fire_2 = fire_20.filter(_.year>=2022)


fire_20_df = fire_20.execute().set_crs("EPSG:3310")
fire_10_df = fire_10.execute().set_crs("EPSG:3310")
fire_5_df = fire_5.execute().set_crs("EPSG:3310")
fire_2_df = fire_2.execute().set_crs("EPSG:3310")


In [None]:
#prescribed burns
rxburn_20 = (con
 .read_parquet("rxburn22_1.parquet")
 .rename(year = "YEAR_")
 .filter(_.STATE == "CA", _.year != ' ', _.year != '')
 .cast({"year":"int"})
 .filter(_.year>=2003)
 .select("year","geometry")
 .mutate(
 geometry=ibis.ifelse(
 _.geometry.is_valid(),
 _.geometry, # Keep the geometry if it's valid
 _.geometry.buffer(0) # Apply buffer(0) to fix invalid geometries
 )
 )
 )

rxburn_20.execute().to_parquet("ca-rxburn-20yrs.parquet")
rxburn_10 = (rxburn_20.filter(_.year>=2013))
rxburn_5 = (rxburn_20.filter(_.year>=2018))
rxburn_2 = (rxburn_20.filter(_.year>=2022))

rxburn_20_df = rxburn_20.execute().set_crs("EPSG:3310")
rxburn_10_df = rxburn_10.execute().set_crs("EPSG:3310")
rxburn_5_df = rxburn_5.execute().set_crs("EPSG:3310")
rxburn_2_df = rxburn_2.execute().set_crs("EPSG:3310")

In [None]:
# need to validate geometries, using epsg:3310 to match fire polygons
ca = (con
 .read_parquet('cpad-stats-temp.parquet')
 .mutate(geom = _.geom.convert("EPSG:4326","EPSG:3310"))
 .mutate(
 geometry=ibis.ifelse(
 _.geom.is_valid(),
 _.geom, # Keep the geometry if it's valid
 _.geom.buffer(0) # Apply buffer(0) to fix invalid geometries
 )
 )
 .drop('geom')
 )
gdf = ca.execute()
gdf = gdf.set_crs('EPSG:3310')
gdf.to_parquet('cpad-stats-temp-EPSG3310.parquet')


In [None]:
%%time
file_name = 'cpad-stats-temp-EPSG3310.parquet'

names = ["percent_fire_20yr", "percent_fire_10yr", "percent_fire_5yr",
 "percent_fire_2yr","percent_rxburn_20yr", "percent_rxburn_10yr", 
 "percent_rxburn_5yr","percent_rxburn_2yr"]
dfs = [fire_20_df,fire_10_df,fire_5_df,fire_2_df,rxburn_20_df,rxburn_10_df,rxburn_5_df,rxburn_2_df]

for df,name in zip(dfs,names):
 df_stat = fire_stats(file_name,df, col_name=name)
 df_stat.to_parquet(file_name)

In [None]:
#save data back to cpad-stats-temp
# (not really necessary but I want to reuse the same code)
ca = (con
 .read_parquet(file_name)
 .mutate(geometry = _.geometry.convert("EPSG:3310","EPSG:4326"))
 )
gdf = ca.execute()
gdf= gdf.set_crs('EPSG:4326')
gdf.to_parquet("cpad-stats-temp.parquet")



# Cleaning up + Rounding floats

In [None]:
## clean up
con = ibis.duckdb.connect(extensions=["spatial"])
ca_geom = con.read_parquet("ca2024-30m.parquet").cast({"geom":"geometry"}).select("id","geom")

ca = (con
 .read_parquet("cpad-stats-temp.parquet")
 .cast({
 "crop_expansion": "int64",
 "crop_reduction": "int64",
 "manageable_carbon": "int64",
 "irrecoverable_carbon": "int64"
 })
 .mutate(
 richness=_.richness.round(3),
 rsr=_.rsr.round(3),
 all_species_rwr=_.all_species_rwr.round(3),
 all_species_richness=_.all_species_richness.round(3),
 percent_disadvantaged=(_.percent_disadvantaged).round(3),
 svi=_.svi.round(3),
 svi_socioeconomic_status=_.socioeconomic_status.round(3),
 svi_household_char=_.household_char.round(3),
 svi_racial_ethnic_minority=_.racial_ethnic_minority.round(3),
 svi_housing_transit=_.housing_transit.round(3),
 human_impact=_.human_impact.round(3),
 deforest_carbon=_.deforest_carbon.round(3),
 biodiversity_intactness_loss=_.biodiversity_intactness_loss.round(3),
 forest_integrity_loss=_.forest_integrity_loss.round(3),
 percent_fire_20yr = _.percent_fire_20yr.round(3),
 percent_fire_10yr = _.percent_fire_10yr.round(3),
 percent_fire_5yr = _.percent_fire_5yr.round(3),
 percent_fire_2yr = _.percent_fire_2yr.round(3),
 percent_rxburn_20yr = _.percent_rxburn_20yr.round(3),
 percent_rxburn_10yr = _.percent_rxburn_10yr.round(3),
 percent_rxburn_5yr = _.percent_rxburn_5yr.round(3),
 percent_rxburn_2yr = _.percent_rxburn_2yr.round(3),
 )
 # only grabbing columns we are making charts with 
 .select('established', 'reGAP', 'name', 'access_type', 'manager', 'manager_type', 'Easement', 'Acres', 'id', 'type','richness', 
 'rsr', 'irrecoverable_carbon', 'manageable_carbon', 'percent_fire_20yr', 'percent_fire_10yr', 'percent_fire_5yr','percent_fire_2yr',
 'percent_rxburn_20yr', 'percent_rxburn_10yr', 'percent_rxburn_5yr','percent_rxburn_2yr', 'percent_disadvantaged',
 'svi', 'svi_socioeconomic_status', 'svi_household_char', 'svi_racial_ethnic_minority',
 'svi_housing_transit', 'deforest_carbon','human_impact'
 )
 .join(ca_geom, "id", how="inner")
 )

ca.head(5).execute()


# Save as PMTiles + Upload data

In [None]:
import subprocess
import os
from huggingface_hub import HfApi, login
import streamlit as st

login(st.secrets["HF_TOKEN"])
# api = HfApi(add_to_git_credential=False)
api = HfApi()

def hf_upload(file, repo_id,repo_type):
 info = api.upload_file(
 path_or_fileobj=file,
 path_in_repo=file,
 repo_id=repo_id,
 repo_type=repo_type,
 )
def generate_pmtiles(input_file, output_file, max_zoom=12):
 # Ensure Tippecanoe is installed
 if subprocess.call(["which", "tippecanoe"], stdout=subprocess.DEVNULL) != 0:
 raise RuntimeError("Tippecanoe is not installed or not in PATH")

 # Construct the Tippecanoe command
 command = [
 "tippecanoe",
 "-o", output_file,
 "-zg",
 "--extend-zooms-if-still-dropping",
 "--force",
 "--projection", "EPSG:4326", 
 "-L","layer:"+input_file,
 ]
 # Run Tippecanoe
 try:
 subprocess.run(command, check=True)
 print(f"Successfully generated PMTiles file: {output_file}")
 except subprocess.CalledProcessError as e:
 print(f"Error running Tippecanoe: {e}")



In [None]:
gdf = ca.execute().set_crs("EPSG:4326")
gdf.to_file("cpad-stats.geojson")

generate_pmtiles("cpad-stats.geojson", "cpad-stats.pmtiles")
hf_upload("cpad-stats.pmtiles", "boettiger-lab/ca-30x30","dataset")

gdf.to_parquet("cpad-stats.parquet")
hf_upload("cpad-stats.parquet", "boettiger-lab/ca-30x30","dataset")
hf_upload("cpad-stats.parquet", "boettiger-lab/ca-30x30","space")



# Redoing fire polygons pmtiles to have each range be its own layer 

In [None]:
def generate_pmtiles(input_file1, input_file2, input_file3, input_file4, output_file, max_zoom=12):
 # Ensure Tippecanoe is installed
 if subprocess.call(["which", "tippecanoe"], stdout=subprocess.DEVNULL) != 0:
 raise RuntimeError("Tippecanoe is not installed or not in PATH")

 # Construct the Tippecanoe command
 command = [
 "tippecanoe",
 "-o", output_file,
 "-zg",
 "--extend-zooms-if-still-dropping",
 "--force",
 "--projection", "EPSG:4326", 
 "-L","layer1:"+input_file1,
 "-L","layer2:"+input_file2,
 "-L","layer3:"+input_file3,
 "-L","layer4:"+input_file4,

 ]
 # Run Tippecanoe
 try:
 subprocess.run(command, check=True)
 print(f"Successfully generated PMTiles file: {output_file}")
 except subprocess.CalledProcessError as e:
 print(f"Error running Tippecanoe: {e}")


In [None]:
rxburn_20 = (con
 .read_parquet("rxburn22_1.parquet")
 .rename(year = "YEAR_")
 .filter(_.STATE == "CA", _.year != ' ', _.year != '')
 .cast({"year":"int"})
 .filter(_.year>=2003)
 .mutate(
 geometry=ibis.ifelse(
 _.geometry.is_valid(),
 _.geometry, # Keep the geometry if it's valid
 _.geometry.buffer(0) # Apply buffer(0) to fix invalid geometries
 )
 )
 .mutate(geometry = _.geometry.convert("EPSG:3310","EPSG:4326"))
 )

rxburn_10 = (rxburn_20.filter(_.year>=2013))
rxburn_5 = (rxburn_20.filter(_.year>=2018))
rxburn_2 = (rxburn_20.filter(_.year>=2022))

rxburn_20_df = rxburn_20.execute().set_crs("EPSG:4326").to_file("rxburn_20.geojson")
rxburn_10_df = rxburn_10.execute().set_crs("EPSG:4326").to_file("rxburn_10.geojson")
rxburn_5_df = rxburn_5.execute().set_crs("EPSG:4326").to_file("rxburn_5.geojson")
rxburn_2_df = rxburn_2.execute().set_crs("EPSG:4326").to_file("rxburn_2.geojson")


generate_pmtiles("rxburn_20.geojson","rxburn_10.geojson","rxburn_5.geojson","rxburn_2.geojson","cal_rxburn_2022.pmtiles")
hf_upload("cal_rxburn_2022.pmtiles", "boettiger-lab/ca-30x30","dataset")


In [None]:
fire_20 = (con
 .read_parquet("firep22_1.parquet")
 .rename(year = "YEAR_")
 .filter(_.STATE == "CA", _.year != '')
 .cast({"year":"int"})
 .filter(_.year>=2003)
 .select("year","geometry")
 .mutate(
 geometry=ibis.ifelse(
 _.geometry.is_valid(),
 _.geometry, # Keep the geometry if it's valid
 _.geometry.buffer(0) # Apply buffer(0) to fix invalid geometries
 )
 )
 .mutate(geometry = _.geometry.convert("EPSG:3310","EPSG:4326"))
 )

fire_10 = (fire_20.filter(_.year>=2013))
fire_5 = (fire_20.filter(_.year>=2018))
fire_2 = (fire_20.filter(_.year>=2022))

fire_20_df = fire_20.execute().set_crs("EPSG:4326").to_file("fire_20.geojson")
fire_10_df = fire_10.execute().set_crs("EPSG:4326").to_file("fire_10.geojson")
fire_5_df = fire_5.execute().set_crs("EPSG:4326").to_file("fire_5.geojson")
fire_2_df = fire_2.execute().set_crs("EPSG:4326").to_file("fire_2.geojson")


generate_pmtiles("fire_20.geojson","fire_10.geojson","fire_5.geojson","fire_2.geojson","cal_fire_2022.pmtiles")
hf_upload("cal_fire_2022.pmtiles", "boettiger-lab/ca-30x30","dataset")


# Renaming variables, adding new columns, etc

In [None]:
ca = (con
 .read_parquet("https://huggingface.co/spaces/boettiger-lab/ca-30x30/resolve/main/cpad-stats.parquet")
 .rename(easement = "Easement")
 .rename(acres = "Acres")
 .drop('percent_fire_20yr', 'percent_fire_5yr','percent_fire_2yr','percent_rxburn_20yr', 'percent_rxburn_5yr','percent_rxburn_2yr')
 .cast({"established":"str"})
 .mutate(easement = _.easement.substitute({"Easement": "True", "Fee":"False"}),
 established = _.established.substitute({"2023": "pre-2024" }),
 )
 )

In [None]:
hf_upload("cpad-stats.parquet", "boettiger-lab/ca-30x30","space")


In [None]:
gdf = ca.execute().set_crs("EPSG:4326")
gdf.to_parquet("cpad-stats.parquet")
# hf_upload("cpad-stats.parquet", "boettiger-lab/ca-30x30","dataset")
hf_upload("cpad-stats.parquet", "boettiger-lab/ca-30x30","space")




In [None]:
gdf.to_file("cpad-stats.geojson")
generate_pmtiles("cpad-stats.geojson","cpad-stats.pmtiles")
hf_upload("cpad-stats.pmtiles", "boettiger-lab/ca-30x30","dataset")


In [None]:
hf_upload("cpad-stats.parquet", "boettiger-lab/ca-30x30-folium","space")


# Rounding acres 

In [None]:
# foliumap tooltip looks messy so I am rounding the acres value.
parquet = "cpad-stats.parquet"
ca = (con
 .read_parquet(parquet)
 .mutate(acres = _.acres.round(4)
 )
 )

gdf = ca.execute().set_crs("EPSG:4326")
gdf.to_parquet("cpad-stats.parquet")
## didn't need to upload parquet since the rounding doesn't impact this?
hf_upload("cpad-stats.parquet", "boettiger-lab/ca-30x30","dataset")
# hf_upload("cpad-stats.parquet", "boettiger-lab/ca-30x30","space")
# hf_upload("cpad-stats.parquet", "boettiger-lab/ca-30x30-folium","space")


In [None]:
gdf.to_file("cpad-stats.geojson")
generate_pmtiles("cpad-stats.geojson","cpad-stats.pmtiles")
hf_upload("cpad-stats.pmtiles", "boettiger-lab/ca-30x30","dataset")
