Spaces:

boettiger-lab
/

pad-us

Sleeping

App Files Files Community

cboettig commited on Mar 27, 2024

Commit

321154b

1 Parent(s): 71736e2

preprocess

Browse files

Files changed (1) hide show

preprocess.py +122 -0

preprocess.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import ibis
+from ibis import _
+import rioxarray
+from shapely.geometry import box
+import fiona
+# +
+# read crs, ideally we could do this with st_read_meta() in ibis+duckdb
+fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
+v = fiona.open(fgb)
+crs = v.crs
+# extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
+cog = "https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif"
+r = rioxarray.open_rasterio(cog)
+bounds = box(*r.rio.transform_bounds(crs))
+# +
+#import leafmap
+#leafmap.cog_validate(cog)
+# +
+con = ibis.duckdb.connect()
+# We could just read the flatgeobuf with read_geo.
+# it is not as fast as working with the (Geo)Parquet
+# pad = con.read_geo(fgb)
+# -
+# Unfortunately, ibis doesn't detect that this is GeoParquet.  We need a SQL escape-hatch to cast the geometry
+parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
+con = ibis.duckdb.connect()
+con.load_extension("spatial")
+con.raw_sql(f"CREATE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
+pad = con.table("pad")
+pad.columns
+# +
+# Now we can do all the usual SQL queries to subset the data.  Note the `geom.within()` spatial filter!
+focal_columns = ["bucket", "FeatClass", "Mang_Name",  "Mang_Type",  "Des_Tp",
+                 "Pub_Access",   "GAP_Sts",  "IUCN_Cat",   "Unit_Nm",  "geom"]
+public = ["DIST", "LOC", "FED", "STAT", "JNT"]
+case = (
+    ibis.case()
+    .when( (_.Mang_Type.isin(public) & (_.GAP_Sts <= 2)), "public")
+    .when( (_.Mang_Type.isin(public) & (_.GAP_Sts > 2)), "mixed")
+    .when( (_.Mang_Type.isin(["PVT", "NGO"])), "private")
+    .when( (_.Mang_Type == "TRIB"), "tribal")
+    .end()
+)
+pad_labeled = (
+    pad.
+    filter((_.FeatClass.isin(["Easement", "Fee"])) | (
+           (_.FeatClass == "Proclamation") & (_.Mang_Name == "TRIB"))
+          ).
+    filter(_.Mang_Type.notin(["UNK", "TERR"])).
+    filter(_.geom.within(bounds)).
+    mutate(GAP_Sts = _.GAP_Sts.cast("int")).
+    mutate(bucket = case).
+    select(focal_columns).
+    mutate(row_n=ibis.row_number())
+)
+# -
+pad_labeled.filter(_.row_n < 10).to_pandas()
+# +
+# # %%time
+# testing -- only the lower 48 states!
+# (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
+# -
+start = 0
+end = 10000
+df = pad_labeled.filter([_.row_n > start, _.row_n <= end]).to_pandas()
+#from_wkb(df.geometry)
+import geopandas
+geo = geopandas.GeoDataFrame(
+    df, geometry=df.geometry, crs=crs
+)
+geo.shape
+# +
+#geo.geometry.values
+# -
+raster = (rioxarray.
+     open_rasterio('/vsicurl/'+cog, masked=True).
+     rio.clip(geo.geometry.values, crs, from_disk=True).
+     sel(band=1).drop("band")
+    )
+# +
+# https://corteva.github.io/geocube/html/examples/zonal_statistics.html
+from geocube.api.core import make_geocube
+import xarray
+out_grid = make_geocube(
+    vector_data=geo,
+    measurements=["row_n"],
+    like=raster, # ensure the data are on the same grid
+)
+# merge the two together
+out_grid["richness"] = (raster.dims, raster.values, raster.attrs, raster.encoding)
+# -
+grouped_raster = out_grid.drop("spatial_ref").groupby(out_grid.row_n)
+grid_mean = grouped_raster.mean().rename({"richness": "richness_mean"})
+zonal_stats = xarray.merge([grid_mean]).to_dataframe()
+geo = geo.merge(zonal_stats, how="left", on="row_n")
+geo.plot(column="richness_mean", legend=True)