Spaces:

boettiger-lab
/

pad-us

Sleeping

App Files Files Community

cboettig commited on Mar 27, 2024

Commit

abebbc4

1 Parent(s): 321154b

preprocessed

Browse files

Files changed (4) hide show

color.txt +0 -50
colormap.R +0 -8
preprocess.py +43 -49
tileserver-creator.sh +0 -19

color.txt DELETED Viewed

@@ -1,50 +0,0 @@
-1 252 226 220 0
-2 251 220 214 0
-3 250 214 209 0
-4 249 209 204 0
-5 249 203 198 0
-6 248 198 193 50
-7 247 192 188 50
-8 246 187 182 50
-9 246 181 177 50
-10 245 176 172 50
-11 244 170 166 255
-12 243 164 161 255
-13 243 159 156 255
-14 242 153 151 255
-15 241 148 145 255
-16 240 142 140 255
-17 240 137 135 255
-18 239 131 129 255
-19 238 126 124 255
-20 238 120 119 255
-21 237 114 113 255
-22 236 109 108 255
-23 235 103 103 255
-24 235 98 97 255
-25 234 92 92 255
-26 231 88 89 255
-27 225 85 88 255
-28 220 82 87 255
-29 215 79 86 255
-30 209 76 85 255
-31 204 73 84 255
-32 198 70 83 255
-33 193 67 82 255
-34 188 64 81 255
-35 182 61 80 255
-36 177 58 79 255
-37 172 55 78 255
-38 166 52 77 255
-39 161 49 76 255
-40 155 46 75 255
-41 150 43 74 255
-42 145 40 73 255
-43 139 37 72 255
-44 134 34 71 255
-45 128 31 70 255
-46 123 28 69 255
-47 118 25 68 255
-48 112 22 67 255
-49 107 19 66 255
-50 102 16 66 255

colormap.R DELETED Viewed

@@ -1,8 +0,0 @@
-x <- grDevices::colorRampPalette(c("#fce2dc", "#ea5a5a", "#661042"), bias=1, alpha=TRUE)
-y <- x(50) |> grDevices::col2rgb(alpha=TRUE) |> t()  |> as.data.frame()
-y$alpha[1:5] <- 0
-y$alpha[6:10] <- 50
-y |> write.table("color.txt", quote = FALSE, col.names = FALSE)

preprocess.py CHANGED Viewed

@@ -5,41 +5,33 @@ from shapely.geometry import box
 import fiona
 # +
-# read crs, ideally we could do this with st_read_meta() in ibis+duckdb
 fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
-v = fiona.open(fgb)
-crs = v.crs
 # extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
-cog = "https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif"
 r = rioxarray.open_rasterio(cog)
 bounds = box(*r.rio.transform_bounds(crs))
 # +
 #import leafmap
 #leafmap.cog_validate(cog)
-# +
 con = ibis.duckdb.connect()
-# We could just read the flatgeobuf with read_geo.
-# it is not as fast as working with the (Geo)Parquet
 # pad = con.read_geo(fgb)
-# -
-# Unfortunately, ibis doesn't detect that this is GeoParquet.  We need a SQL escape-hatch to cast the geometry
-parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
-con = ibis.duckdb.connect()
 con.load_extension("spatial")
 con.raw_sql(f"CREATE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
 pad = con.table("pad")
-pad.columns
 # +
 # Now we can do all the usual SQL queries to subset the data.  Note the `geom.within()` spatial filter!
 focal_columns = ["bucket", "FeatClass", "Mang_Name",  "Mang_Type",  "Des_Tp",
                  "Pub_Access",   "GAP_Sts",  "IUCN_Cat",   "Unit_Nm",  "geom"]
 public = ["DIST", "LOC", "FED", "STAT", "JNT"]
@@ -66,9 +58,6 @@ pad_labeled = (
     mutate(row_n=ibis.row_number())
 )
-# -
-pad_labeled.filter(_.row_n < 10).to_pandas()
 # +
 # # %%time
@@ -76,47 +65,52 @@ pad_labeled.filter(_.row_n < 10).to_pandas()
 # (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
 # -
 start = 0
 end = 10000
 df = pad_labeled.filter([_.row_n > start, _.row_n <= end]).to_pandas()
-#from_wkb(df.geometry)
-import geopandas
-geo = geopandas.GeoDataFrame(
-    df, geometry=df.geometry, crs=crs
-)
-geo.shape
-# +
-#geo.geometry.values
-# -
-raster = (rioxarray.
-     open_rasterio('/vsicurl/'+cog, masked=True).
-     rio.clip(geo.geometry.values, crs, from_disk=True).
-     sel(band=1).drop("band")
-    )
-# +
 # https://corteva.github.io/geocube/html/examples/zonal_statistics.html
-from geocube.api.core import make_geocube
-import xarray
-out_grid = make_geocube(
-    vector_data=geo,
-    measurements=["row_n"],
-    like=raster, # ensure the data are on the same grid
-)
-# merge the two together
-out_grid["richness"] = (raster.dims, raster.values, raster.attrs, raster.encoding)
-# -
-grouped_raster = out_grid.drop("spatial_ref").groupby(out_grid.row_n)
-grid_mean = grouped_raster.mean().rename({"richness": "richness_mean"})
-zonal_stats = xarray.merge([grid_mean]).to_dataframe()
-geo = geo.merge(zonal_stats, how="left", on="row_n")
-geo.plot(column="richness_mean", legend=True)

 import fiona
 # +
 fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
+parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
+cog = "https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif"
+# fiona not built with parquet support.  ideally duckdb's st_read_meta would do this.
+crs = fiona.open(fgb).crs
 # extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
 r = rioxarray.open_rasterio(cog)
 bounds = box(*r.rio.transform_bounds(crs))
 # +
 #import leafmap
 #leafmap.cog_validate(cog)
+# -
 con = ibis.duckdb.connect()
+# We could just read the flatgeobuf with ibis.read_geo() but it is not as fast as working with the (Geo)Parquet
 # pad = con.read_geo(fgb)
+# Currently ibis doesn't detect that this is GeoParquet.  We need a SQL escape-hatch to cast the geometry
 con.load_extension("spatial")
 con.raw_sql(f"CREATE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
 pad = con.table("pad")
 # +
 # Now we can do all the usual SQL queries to subset the data.  Note the `geom.within()` spatial filter!
 focal_columns = ["bucket", "FeatClass", "Mang_Name",  "Mang_Type",  "Des_Tp",
                  "Pub_Access",   "GAP_Sts",  "IUCN_Cat",   "Unit_Nm",  "geom"]
 public = ["DIST", "LOC", "FED", "STAT", "JNT"]
     mutate(row_n=ibis.row_number())
 )
 # +
 # # %%time
 # (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
 # -
+# We could work in chunks, possibly parallelize this....
 start = 0
 end = 10000
 df = pad_labeled.filter([_.row_n > start, _.row_n <= end]).to_pandas()
+# Or be bold!
+df = pad_labeled.to_pandas()
+from geocube.api.core import make_geocube
+import xarray
+import geopandas
+geo = geopandas.GeoDataFrame(df, geometry=df.geometry, crs=crs)
+geo.shape
+def zonal_stats(cog, geo, crs, row_n = "row_n"):
 # https://corteva.github.io/geocube/html/examples/zonal_statistics.html
+    raster = (rioxarray.
+         open_rasterio('/vsicurl/'+cog, masked=True).
+         rio.clip(geo.geometry.values, crs, from_disk=True).
+         sel(band=1).drop("band")
+    )
+    out_grid = make_geocube(
+        vector_data=geo,
+        measurements=[row_n],
+        like=raster, # ensure the data are on the same grid
+    )
+    # merge the two together
+    out_grid["values"] = (raster.dims, raster.values, raster.attrs, raster.encoding)
+    grouped_raster = out_grid.drop("spatial_ref").groupby(out_grid.row_n)
+    # can add other stats
+    grid_mean = grouped_raster.mean().rename({"values": "mean"})
+    zonal_stats = xarray.merge([grid_mean]).to_dataframe()
+    geo = geo.merge(zonal_stats, how="left", on=row_n)
+    return geo
+geo = zonal_stats(cog, geo, crs)
+geo.to_parquet("pad-mobi.parquet")
+# Now we need to convert to PMTiles:
+#
+# ```
+# ogr2ogr  -dsco MAX_SIZE=90000000 -dsco MAX_FEATURES=50000000 -dsco MAXZOOM=10 pad-mobi.pmtiles pad-mobi.parquet
+# ```
+geo.plot(column="mean", legend=True)

tileserver-creator.sh DELETED Viewed

@@ -1,19 +0,0 @@
-#!/bin/bash
-#git lfs track "*.html"
-#git lfs track "*.png"
-gdal_translate -of VRT -ot Byte -scale /vsicurl/https://minio.carlboettiger.info/public-biodiversity/mobi/species-richness-all/SpeciesRichness_All.tif temp.vrt
-gdaldem color-relief -of GTiff temp.vrt color.txt richness.vrt -alpha
-gdal2tiles.py --xyz --zoom=1-10 --processes=24 richness.vrt mobi-red
-mc cp -r mobi-red/ nvme/shared-data/mobi-tiles/red
-#
-ogr2ogr pad-us3.pmtiles /vsicurl/https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb