cboettig commited on
Commit
abebbc4
·
1 Parent(s): 321154b

preprocessed

Browse files
Files changed (4) hide show
  1. color.txt +0 -50
  2. colormap.R +0 -8
  3. preprocess.py +43 -49
  4. tileserver-creator.sh +0 -19
color.txt DELETED
@@ -1,50 +0,0 @@
1
- 1 252 226 220 0
2
- 2 251 220 214 0
3
- 3 250 214 209 0
4
- 4 249 209 204 0
5
- 5 249 203 198 0
6
- 6 248 198 193 50
7
- 7 247 192 188 50
8
- 8 246 187 182 50
9
- 9 246 181 177 50
10
- 10 245 176 172 50
11
- 11 244 170 166 255
12
- 12 243 164 161 255
13
- 13 243 159 156 255
14
- 14 242 153 151 255
15
- 15 241 148 145 255
16
- 16 240 142 140 255
17
- 17 240 137 135 255
18
- 18 239 131 129 255
19
- 19 238 126 124 255
20
- 20 238 120 119 255
21
- 21 237 114 113 255
22
- 22 236 109 108 255
23
- 23 235 103 103 255
24
- 24 235 98 97 255
25
- 25 234 92 92 255
26
- 26 231 88 89 255
27
- 27 225 85 88 255
28
- 28 220 82 87 255
29
- 29 215 79 86 255
30
- 30 209 76 85 255
31
- 31 204 73 84 255
32
- 32 198 70 83 255
33
- 33 193 67 82 255
34
- 34 188 64 81 255
35
- 35 182 61 80 255
36
- 36 177 58 79 255
37
- 37 172 55 78 255
38
- 38 166 52 77 255
39
- 39 161 49 76 255
40
- 40 155 46 75 255
41
- 41 150 43 74 255
42
- 42 145 40 73 255
43
- 43 139 37 72 255
44
- 44 134 34 71 255
45
- 45 128 31 70 255
46
- 46 123 28 69 255
47
- 47 118 25 68 255
48
- 48 112 22 67 255
49
- 49 107 19 66 255
50
- 50 102 16 66 255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
colormap.R DELETED
@@ -1,8 +0,0 @@
1
-
2
- x <- grDevices::colorRampPalette(c("#fce2dc", "#ea5a5a", "#661042"), bias=1, alpha=TRUE)
3
- y <- x(50) |> grDevices::col2rgb(alpha=TRUE) |> t() |> as.data.frame()
4
- y$alpha[1:5] <- 0
5
- y$alpha[6:10] <- 50
6
-
7
- y |> write.table("color.txt", quote = FALSE, col.names = FALSE)
8
-
 
 
 
 
 
 
 
 
 
preprocess.py CHANGED
@@ -5,41 +5,33 @@ from shapely.geometry import box
5
  import fiona
6
 
7
  # +
8
- # read crs, ideally we could do this with st_read_meta() in ibis+duckdb
9
  fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
10
- v = fiona.open(fgb)
11
- crs = v.crs
 
 
 
12
 
13
  # extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
14
- cog = "https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif"
15
  r = rioxarray.open_rasterio(cog)
16
  bounds = box(*r.rio.transform_bounds(crs))
17
 
18
-
19
  # +
20
  #import leafmap
21
  #leafmap.cog_validate(cog)
 
22
 
23
- # +
24
  con = ibis.duckdb.connect()
25
-
26
- # We could just read the flatgeobuf with read_geo.
27
- # it is not as fast as working with the (Geo)Parquet
28
  # pad = con.read_geo(fgb)
29
- # -
30
 
31
- # Unfortunately, ibis doesn't detect that this is GeoParquet. We need a SQL escape-hatch to cast the geometry
32
- parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
33
- con = ibis.duckdb.connect()
34
  con.load_extension("spatial")
35
  con.raw_sql(f"CREATE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
36
  pad = con.table("pad")
37
 
38
- pad.columns
39
-
40
  # +
41
  # Now we can do all the usual SQL queries to subset the data. Note the `geom.within()` spatial filter!
42
-
43
  focal_columns = ["bucket", "FeatClass", "Mang_Name", "Mang_Type", "Des_Tp",
44
  "Pub_Access", "GAP_Sts", "IUCN_Cat", "Unit_Nm", "geom"]
45
  public = ["DIST", "LOC", "FED", "STAT", "JNT"]
@@ -66,9 +58,6 @@ pad_labeled = (
66
  mutate(row_n=ibis.row_number())
67
  )
68
 
69
- # -
70
-
71
- pad_labeled.filter(_.row_n < 10).to_pandas()
72
 
73
  # +
74
  # # %%time
@@ -76,47 +65,52 @@ pad_labeled.filter(_.row_n < 10).to_pandas()
76
  # (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
77
  # -
78
 
 
79
  start = 0
80
  end = 10000
81
  df = pad_labeled.filter([_.row_n > start, _.row_n <= end]).to_pandas()
82
- #from_wkb(df.geometry)
83
 
84
- import geopandas
85
- geo = geopandas.GeoDataFrame(
86
- df, geometry=df.geometry, crs=crs
87
- )
88
- geo.shape
89
 
90
- # +
91
- #geo.geometry.values
92
- # -
 
93
 
94
- raster = (rioxarray.
95
- open_rasterio('/vsicurl/'+cog, masked=True).
96
- rio.clip(geo.geometry.values, crs, from_disk=True).
97
- sel(band=1).drop("band")
98
- )
99
 
100
 
101
- # +
102
  # https://corteva.github.io/geocube/html/examples/zonal_statistics.html
103
- from geocube.api.core import make_geocube
104
- import xarray
105
- out_grid = make_geocube(
106
- vector_data=geo,
107
- measurements=["row_n"],
108
- like=raster, # ensure the data are on the same grid
109
- )
 
 
 
 
 
 
 
 
 
 
 
110
 
111
- # merge the two together
112
- out_grid["richness"] = (raster.dims, raster.values, raster.attrs, raster.encoding)
113
- # -
114
 
115
- grouped_raster = out_grid.drop("spatial_ref").groupby(out_grid.row_n)
116
- grid_mean = grouped_raster.mean().rename({"richness": "richness_mean"})
117
- zonal_stats = xarray.merge([grid_mean]).to_dataframe()
118
 
119
- geo = geo.merge(zonal_stats, how="left", on="row_n")
120
 
121
- geo.plot(column="richness_mean", legend=True)
 
 
 
 
122
 
 
 
5
  import fiona
6
 
7
  # +
 
8
  fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
9
+ parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
10
+ cog = "https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif"
11
+
12
+ # fiona not built with parquet support. ideally duckdb's st_read_meta would do this.
13
+ crs = fiona.open(fgb).crs
14
 
15
  # extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
 
16
  r = rioxarray.open_rasterio(cog)
17
  bounds = box(*r.rio.transform_bounds(crs))
18
 
 
19
  # +
20
  #import leafmap
21
  #leafmap.cog_validate(cog)
22
+ # -
23
 
 
24
  con = ibis.duckdb.connect()
25
+ # We could just read the flatgeobuf with ibis.read_geo() but it is not as fast as working with the (Geo)Parquet
 
 
26
  # pad = con.read_geo(fgb)
 
27
 
28
+ # Currently ibis doesn't detect that this is GeoParquet. We need a SQL escape-hatch to cast the geometry
 
 
29
  con.load_extension("spatial")
30
  con.raw_sql(f"CREATE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
31
  pad = con.table("pad")
32
 
 
 
33
  # +
34
  # Now we can do all the usual SQL queries to subset the data. Note the `geom.within()` spatial filter!
 
35
  focal_columns = ["bucket", "FeatClass", "Mang_Name", "Mang_Type", "Des_Tp",
36
  "Pub_Access", "GAP_Sts", "IUCN_Cat", "Unit_Nm", "geom"]
37
  public = ["DIST", "LOC", "FED", "STAT", "JNT"]
 
58
  mutate(row_n=ibis.row_number())
59
  )
60
 
 
 
 
61
 
62
  # +
63
  # # %%time
 
65
  # (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
66
  # -
67
 
68
+ # We could work in chunks, possibly parallelize this....
69
  start = 0
70
  end = 10000
71
  df = pad_labeled.filter([_.row_n > start, _.row_n <= end]).to_pandas()
 
72
 
73
+ # Or be bold!
74
+ df = pad_labeled.to_pandas()
 
 
 
75
 
76
+ from geocube.api.core import make_geocube
77
+ import xarray
78
+ import geopandas
79
+ geo = geopandas.GeoDataFrame(df, geometry=df.geometry, crs=crs)
80
 
81
+ geo.shape
 
 
 
 
82
 
83
 
84
+ def zonal_stats(cog, geo, crs, row_n = "row_n"):
85
  # https://corteva.github.io/geocube/html/examples/zonal_statistics.html
86
+ raster = (rioxarray.
87
+ open_rasterio('/vsicurl/'+cog, masked=True).
88
+ rio.clip(geo.geometry.values, crs, from_disk=True).
89
+ sel(band=1).drop("band")
90
+ )
91
+ out_grid = make_geocube(
92
+ vector_data=geo,
93
+ measurements=[row_n],
94
+ like=raster, # ensure the data are on the same grid
95
+ )
96
+ # merge the two together
97
+ out_grid["values"] = (raster.dims, raster.values, raster.attrs, raster.encoding)
98
+ grouped_raster = out_grid.drop("spatial_ref").groupby(out_grid.row_n)
99
+ # can add other stats
100
+ grid_mean = grouped_raster.mean().rename({"values": "mean"})
101
+ zonal_stats = xarray.merge([grid_mean]).to_dataframe()
102
+ geo = geo.merge(zonal_stats, how="left", on=row_n)
103
+ return geo
104
 
 
 
 
105
 
106
+ geo = zonal_stats(cog, geo, crs)
 
 
107
 
108
+ geo.to_parquet("pad-mobi.parquet")
109
 
110
+ # Now we need to convert to PMTiles:
111
+ #
112
+ # ```
113
+ # ogr2ogr -dsco MAX_SIZE=90000000 -dsco MAX_FEATURES=50000000 -dsco MAXZOOM=10 pad-mobi.pmtiles pad-mobi.parquet
114
+ # ```
115
 
116
+ geo.plot(column="mean", legend=True)
tileserver-creator.sh DELETED
@@ -1,19 +0,0 @@
1
- #!/bin/bash
2
-
3
-
4
- #git lfs track "*.html"
5
- #git lfs track "*.png"
6
-
7
-
8
- gdal_translate -of VRT -ot Byte -scale /vsicurl/https://minio.carlboettiger.info/public-biodiversity/mobi/species-richness-all/SpeciesRichness_All.tif temp.vrt
9
-
10
- gdaldem color-relief -of GTiff temp.vrt color.txt richness.vrt -alpha
11
- gdal2tiles.py --xyz --zoom=1-10 --processes=24 richness.vrt mobi-red
12
-
13
- mc cp -r mobi-red/ nvme/shared-data/mobi-tiles/red
14
- #
15
-
16
- ogr2ogr pad-us3.pmtiles /vsicurl/https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb
17
-
18
-
19
-