Spaces:
Sleeping
Sleeping
preprocessed
Browse files- color.txt +0 -50
- colormap.R +0 -8
- preprocess.py +43 -49
- tileserver-creator.sh +0 -19
color.txt
DELETED
@@ -1,50 +0,0 @@
|
|
1 |
-
1 252 226 220 0
|
2 |
-
2 251 220 214 0
|
3 |
-
3 250 214 209 0
|
4 |
-
4 249 209 204 0
|
5 |
-
5 249 203 198 0
|
6 |
-
6 248 198 193 50
|
7 |
-
7 247 192 188 50
|
8 |
-
8 246 187 182 50
|
9 |
-
9 246 181 177 50
|
10 |
-
10 245 176 172 50
|
11 |
-
11 244 170 166 255
|
12 |
-
12 243 164 161 255
|
13 |
-
13 243 159 156 255
|
14 |
-
14 242 153 151 255
|
15 |
-
15 241 148 145 255
|
16 |
-
16 240 142 140 255
|
17 |
-
17 240 137 135 255
|
18 |
-
18 239 131 129 255
|
19 |
-
19 238 126 124 255
|
20 |
-
20 238 120 119 255
|
21 |
-
21 237 114 113 255
|
22 |
-
22 236 109 108 255
|
23 |
-
23 235 103 103 255
|
24 |
-
24 235 98 97 255
|
25 |
-
25 234 92 92 255
|
26 |
-
26 231 88 89 255
|
27 |
-
27 225 85 88 255
|
28 |
-
28 220 82 87 255
|
29 |
-
29 215 79 86 255
|
30 |
-
30 209 76 85 255
|
31 |
-
31 204 73 84 255
|
32 |
-
32 198 70 83 255
|
33 |
-
33 193 67 82 255
|
34 |
-
34 188 64 81 255
|
35 |
-
35 182 61 80 255
|
36 |
-
36 177 58 79 255
|
37 |
-
37 172 55 78 255
|
38 |
-
38 166 52 77 255
|
39 |
-
39 161 49 76 255
|
40 |
-
40 155 46 75 255
|
41 |
-
41 150 43 74 255
|
42 |
-
42 145 40 73 255
|
43 |
-
43 139 37 72 255
|
44 |
-
44 134 34 71 255
|
45 |
-
45 128 31 70 255
|
46 |
-
46 123 28 69 255
|
47 |
-
47 118 25 68 255
|
48 |
-
48 112 22 67 255
|
49 |
-
49 107 19 66 255
|
50 |
-
50 102 16 66 255
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
colormap.R
DELETED
@@ -1,8 +0,0 @@
|
|
1 |
-
|
2 |
-
x <- grDevices::colorRampPalette(c("#fce2dc", "#ea5a5a", "#661042"), bias=1, alpha=TRUE)
|
3 |
-
y <- x(50) |> grDevices::col2rgb(alpha=TRUE) |> t() |> as.data.frame()
|
4 |
-
y$alpha[1:5] <- 0
|
5 |
-
y$alpha[6:10] <- 50
|
6 |
-
|
7 |
-
y |> write.table("color.txt", quote = FALSE, col.names = FALSE)
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preprocess.py
CHANGED
@@ -5,41 +5,33 @@ from shapely.geometry import box
|
|
5 |
import fiona
|
6 |
|
7 |
# +
|
8 |
-
# read crs, ideally we could do this with st_read_meta() in ibis+duckdb
|
9 |
fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
12 |
|
13 |
# extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
|
14 |
-
cog = "https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif"
|
15 |
r = rioxarray.open_rasterio(cog)
|
16 |
bounds = box(*r.rio.transform_bounds(crs))
|
17 |
|
18 |
-
|
19 |
# +
|
20 |
#import leafmap
|
21 |
#leafmap.cog_validate(cog)
|
|
|
22 |
|
23 |
-
# +
|
24 |
con = ibis.duckdb.connect()
|
25 |
-
|
26 |
-
# We could just read the flatgeobuf with read_geo.
|
27 |
-
# it is not as fast as working with the (Geo)Parquet
|
28 |
# pad = con.read_geo(fgb)
|
29 |
-
# -
|
30 |
|
31 |
-
#
|
32 |
-
parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
|
33 |
-
con = ibis.duckdb.connect()
|
34 |
con.load_extension("spatial")
|
35 |
con.raw_sql(f"CREATE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
|
36 |
pad = con.table("pad")
|
37 |
|
38 |
-
pad.columns
|
39 |
-
|
40 |
# +
|
41 |
# Now we can do all the usual SQL queries to subset the data. Note the `geom.within()` spatial filter!
|
42 |
-
|
43 |
focal_columns = ["bucket", "FeatClass", "Mang_Name", "Mang_Type", "Des_Tp",
|
44 |
"Pub_Access", "GAP_Sts", "IUCN_Cat", "Unit_Nm", "geom"]
|
45 |
public = ["DIST", "LOC", "FED", "STAT", "JNT"]
|
@@ -66,9 +58,6 @@ pad_labeled = (
|
|
66 |
mutate(row_n=ibis.row_number())
|
67 |
)
|
68 |
|
69 |
-
# -
|
70 |
-
|
71 |
-
pad_labeled.filter(_.row_n < 10).to_pandas()
|
72 |
|
73 |
# +
|
74 |
# # %%time
|
@@ -76,47 +65,52 @@ pad_labeled.filter(_.row_n < 10).to_pandas()
|
|
76 |
# (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
|
77 |
# -
|
78 |
|
|
|
79 |
start = 0
|
80 |
end = 10000
|
81 |
df = pad_labeled.filter([_.row_n > start, _.row_n <= end]).to_pandas()
|
82 |
-
#from_wkb(df.geometry)
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
df, geometry=df.geometry, crs=crs
|
87 |
-
)
|
88 |
-
geo.shape
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
93 |
|
94 |
-
|
95 |
-
open_rasterio('/vsicurl/'+cog, masked=True).
|
96 |
-
rio.clip(geo.geometry.values, crs, from_disk=True).
|
97 |
-
sel(band=1).drop("band")
|
98 |
-
)
|
99 |
|
100 |
|
101 |
-
|
102 |
# https://corteva.github.io/geocube/html/examples/zonal_statistics.html
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
-
# merge the two together
|
112 |
-
out_grid["richness"] = (raster.dims, raster.values, raster.attrs, raster.encoding)
|
113 |
-
# -
|
114 |
|
115 |
-
|
116 |
-
grid_mean = grouped_raster.mean().rename({"richness": "richness_mean"})
|
117 |
-
zonal_stats = xarray.merge([grid_mean]).to_dataframe()
|
118 |
|
119 |
-
geo
|
120 |
|
121 |
-
|
|
|
|
|
|
|
|
|
122 |
|
|
|
|
5 |
import fiona
|
6 |
|
7 |
# +
|
|
|
8 |
fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
|
9 |
+
parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
|
10 |
+
cog = "https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif"
|
11 |
+
|
12 |
+
# fiona not built with parquet support. ideally duckdb's st_read_meta would do this.
|
13 |
+
crs = fiona.open(fgb).crs
|
14 |
|
15 |
# extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
|
|
|
16 |
r = rioxarray.open_rasterio(cog)
|
17 |
bounds = box(*r.rio.transform_bounds(crs))
|
18 |
|
|
|
19 |
# +
|
20 |
#import leafmap
|
21 |
#leafmap.cog_validate(cog)
|
22 |
+
# -
|
23 |
|
|
|
24 |
con = ibis.duckdb.connect()
|
25 |
+
# We could just read the flatgeobuf with ibis.read_geo() but it is not as fast as working with the (Geo)Parquet
|
|
|
|
|
26 |
# pad = con.read_geo(fgb)
|
|
|
27 |
|
28 |
+
# Currently ibis doesn't detect that this is GeoParquet. We need a SQL escape-hatch to cast the geometry
|
|
|
|
|
29 |
con.load_extension("spatial")
|
30 |
con.raw_sql(f"CREATE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
|
31 |
pad = con.table("pad")
|
32 |
|
|
|
|
|
33 |
# +
|
34 |
# Now we can do all the usual SQL queries to subset the data. Note the `geom.within()` spatial filter!
|
|
|
35 |
focal_columns = ["bucket", "FeatClass", "Mang_Name", "Mang_Type", "Des_Tp",
|
36 |
"Pub_Access", "GAP_Sts", "IUCN_Cat", "Unit_Nm", "geom"]
|
37 |
public = ["DIST", "LOC", "FED", "STAT", "JNT"]
|
|
|
58 |
mutate(row_n=ibis.row_number())
|
59 |
)
|
60 |
|
|
|
|
|
|
|
61 |
|
62 |
# +
|
63 |
# # %%time
|
|
|
65 |
# (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
|
66 |
# -
|
67 |
|
68 |
+
# We could work in chunks, possibly parallelize this....
|
69 |
start = 0
|
70 |
end = 10000
|
71 |
df = pad_labeled.filter([_.row_n > start, _.row_n <= end]).to_pandas()
|
|
|
72 |
|
73 |
+
# Or be bold!
|
74 |
+
df = pad_labeled.to_pandas()
|
|
|
|
|
|
|
75 |
|
76 |
+
from geocube.api.core import make_geocube
|
77 |
+
import xarray
|
78 |
+
import geopandas
|
79 |
+
geo = geopandas.GeoDataFrame(df, geometry=df.geometry, crs=crs)
|
80 |
|
81 |
+
geo.shape
|
|
|
|
|
|
|
|
|
82 |
|
83 |
|
84 |
+
def zonal_stats(cog, geo, crs, row_n = "row_n"):
|
85 |
# https://corteva.github.io/geocube/html/examples/zonal_statistics.html
|
86 |
+
raster = (rioxarray.
|
87 |
+
open_rasterio('/vsicurl/'+cog, masked=True).
|
88 |
+
rio.clip(geo.geometry.values, crs, from_disk=True).
|
89 |
+
sel(band=1).drop("band")
|
90 |
+
)
|
91 |
+
out_grid = make_geocube(
|
92 |
+
vector_data=geo,
|
93 |
+
measurements=[row_n],
|
94 |
+
like=raster, # ensure the data are on the same grid
|
95 |
+
)
|
96 |
+
# merge the two together
|
97 |
+
out_grid["values"] = (raster.dims, raster.values, raster.attrs, raster.encoding)
|
98 |
+
grouped_raster = out_grid.drop("spatial_ref").groupby(out_grid.row_n)
|
99 |
+
# can add other stats
|
100 |
+
grid_mean = grouped_raster.mean().rename({"values": "mean"})
|
101 |
+
zonal_stats = xarray.merge([grid_mean]).to_dataframe()
|
102 |
+
geo = geo.merge(zonal_stats, how="left", on=row_n)
|
103 |
+
return geo
|
104 |
|
|
|
|
|
|
|
105 |
|
106 |
+
geo = zonal_stats(cog, geo, crs)
|
|
|
|
|
107 |
|
108 |
+
geo.to_parquet("pad-mobi.parquet")
|
109 |
|
110 |
+
# Now we need to convert to PMTiles:
|
111 |
+
#
|
112 |
+
# ```
|
113 |
+
# ogr2ogr -dsco MAX_SIZE=90000000 -dsco MAX_FEATURES=50000000 -dsco MAXZOOM=10 pad-mobi.pmtiles pad-mobi.parquet
|
114 |
+
# ```
|
115 |
|
116 |
+
geo.plot(column="mean", legend=True)
|
tileserver-creator.sh
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
|
4 |
-
#git lfs track "*.html"
|
5 |
-
#git lfs track "*.png"
|
6 |
-
|
7 |
-
|
8 |
-
gdal_translate -of VRT -ot Byte -scale /vsicurl/https://minio.carlboettiger.info/public-biodiversity/mobi/species-richness-all/SpeciesRichness_All.tif temp.vrt
|
9 |
-
|
10 |
-
gdaldem color-relief -of GTiff temp.vrt color.txt richness.vrt -alpha
|
11 |
-
gdal2tiles.py --xyz --zoom=1-10 --processes=24 richness.vrt mobi-red
|
12 |
-
|
13 |
-
mc cp -r mobi-red/ nvme/shared-data/mobi-tiles/red
|
14 |
-
#
|
15 |
-
|
16 |
-
ogr2ogr pad-us3.pmtiles /vsicurl/https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|