cboettig commited on
Commit
f052b1c
·
1 Parent(s): a25711e

dynamic charts

Browse files
Files changed (2) hide show
  1. app.py +35 -32
  2. preprocess.py +79 -46
app.py CHANGED
@@ -109,11 +109,11 @@ gap = {
109
  ['1', "#26633d"],
110
  ['2', "#879647"],
111
  ['3', "#BBBBBB"],
112
- ['4', "#FFFFFF"]
113
  ]
114
  }
115
 
116
- IUCN = {
117
  'property': 'IUCN_Cat',
118
  'type': 'categorical',
119
  'stops': [
@@ -125,7 +125,7 @@ IUCN = {
125
  ["V", "#9932CC"],
126
  ["VI", "#9400D3"],
127
  ["Other Conservation Area", "#DDA0DD"],
128
- ["Unassigned", "#FFFFFF"]
129
  ]
130
  }
131
 
@@ -174,7 +174,8 @@ def pad_style(paint, alpha):
174
  }
175
  }]}
176
 
177
-
 
178
 
179
  # +
180
  ## Map controls sidebar
@@ -194,7 +195,7 @@ with st.sidebar:
194
 
195
  style_options = {
196
  "GAP Status Code": gap,
197
- "IUCN Status Code": IUCN,
198
  "Manager Type": manager,
199
  "Fee/Easement": easement,
200
  "Mean Richness": richness,
@@ -240,7 +241,6 @@ with st.sidebar:
240
  hi="https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"
241
  m.add_cog_layer(hi, palette="purples", name="Human Impact", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
242
 
243
-
244
  # "## Boundaries"
245
  # boundaries = st.radio("Boundaries:",
246
  # ["None",
@@ -253,8 +253,6 @@ with st.sidebar:
253
  "## Basemaps"
254
  if st.toggle("Shaded Relief Topo"):
255
  m.add_basemap("Esri.WorldShadedRelief")
256
-
257
-
258
 
259
  "## Additional elements"
260
  # Fire Polygons, USGS
@@ -275,15 +273,32 @@ with st.sidebar:
275
  "paint": {"fill-color": "#FFA500", "fill-opacity": 0.2}}]}
276
  m.add_pmtiles(usgs, name="Fire", style=combined_style, overlay=True, show=True, zoom_to_layer=False)
277
 
278
-
279
-
280
-
281
  # +
282
-
283
  # And here we go!
284
  m.to_streamlit(height=700)
285
  # -
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  st.divider()
288
 
289
 
@@ -291,56 +306,44 @@ st.divider()
291
  us_lower_48_area_m2 = 7.8e+12
292
 
293
  @st.cache_data()
294
- def summary_table():
295
- x = ibis.memtable({"bucket": ["public", "tribal", "mixed", "private"],
296
- "color": [public_color, tribal_color, mixed_color, private_color]})
297
  df = (pad_data.
298
- group_by(_.bucket).
299
  aggregate(percent_protected = 100 * _.area.sum() / us_lower_48_area_m2,
300
  mean_richness = (_.richness * _.area).sum() / _.area.sum(),
301
  mean_rsr = (_.rsr * _.area).sum() / _.area.sum()
302
  ).
303
  mutate(percent_protected = _.percent_protected.round())
304
- ).inner_join(x, "bucket")
305
  return df.to_pandas()
306
 
307
- df = summary_table()
308
- # st.table(richness_table)
309
-
310
-
311
- # +
312
- #summary_table.to_pandas()
313
 
314
- # +
315
 
316
  base = alt.Chart(df).encode(
317
  alt.Theta("percent_protected:Q").stack(True),
318
  alt.Color("color:N").scale(None).legend(None)
319
  )
320
 
321
-
322
  area_chart = (
323
  base.mark_arc(innerRadius=50, outerRadius=120) +
324
- base.mark_text(radius=165, size=20).encode(text="bucket") +
325
  base.mark_text(radius=135, size=20).encode(text="percent_protected:N")
326
  )
327
 
328
  # area_chart
329
 
330
  # +
331
-
332
  richness_chart = alt.Chart(df).mark_bar().encode(
333
- x='bucket',
334
  y='mean_richness',
335
  color=alt.Color('color').scale(None)
336
  )
337
- #richness_chart
338
 
339
 
340
  # +
341
-
342
  rsr_chart = alt.Chart(df).mark_bar().encode(
343
- x='bucket',
344
  y='mean_rsr',
345
  color=alt.Color('color').scale(None)
346
  )
@@ -355,7 +358,7 @@ rsr_chart = alt.Chart(df).mark_bar().encode(
355
  col1, col2, col3 = st.columns(3)
356
 
357
  with col1:
358
- "#### Percent of Continental US Area"
359
  st.altair_chart(area_chart, use_container_width=True)
360
 
361
  # -
 
109
  ['1', "#26633d"],
110
  ['2', "#879647"],
111
  ['3', "#BBBBBB"],
112
+ ['4', "#F8F8F8"]
113
  ]
114
  }
115
 
116
+ iucn = {
117
  'property': 'IUCN_Cat',
118
  'type': 'categorical',
119
  'stops': [
 
125
  ["V", "#9932CC"],
126
  ["VI", "#9400D3"],
127
  ["Other Conservation Area", "#DDA0DD"],
128
+ ["Unassigned", "#F8F8F8"]
129
  ]
130
  }
131
 
 
174
  }
175
  }]}
176
 
177
+ manager_colors = {"bucket": ["public", "tribal", "mixed", "private"],
178
+ "color": [public_color, tribal_color, mixed_color, private_color]}
179
 
180
  # +
181
  ## Map controls sidebar
 
195
 
196
  style_options = {
197
  "GAP Status Code": gap,
198
+ "IUCN Status Code": iucn,
199
  "Manager Type": manager,
200
  "Fee/Easement": easement,
201
  "Mean Richness": richness,
 
241
  hi="https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"
242
  m.add_cog_layer(hi, palette="purples", name="Human Impact", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
243
 
 
244
  # "## Boundaries"
245
  # boundaries = st.radio("Boundaries:",
246
  # ["None",
 
253
  "## Basemaps"
254
  if st.toggle("Shaded Relief Topo"):
255
  m.add_basemap("Esri.WorldShadedRelief")
 
 
256
 
257
  "## Additional elements"
258
  # Fire Polygons, USGS
 
273
  "paint": {"fill-color": "#FFA500", "fill-opacity": 0.2}}]}
274
  m.add_pmtiles(usgs, name="Fire", style=combined_style, overlay=True, show=True, zoom_to_layer=False)
275
 
 
 
 
276
  # +
 
277
  # And here we go!
278
  m.to_streamlit(height=700)
279
  # -
280
 
281
+
282
+ select_column = {
283
+ "GAP Status Code": "GAP_Sts",
284
+ "IUCN Status Code": "IUCN_Cat",
285
+ "Manager Type": "bucket",
286
+ "Fee/Easement": "FeatClass",
287
+ "Mean Richness": "bucket",
288
+ "Mean RSR": "bucket",
289
+ "custom": "bucket"}
290
+ column = select_column[style_choice]
291
+
292
+ select_colors = {
293
+ "GAP Status Code": gap["stops"],
294
+ "IUCN Status Code": iucn["stops"],
295
+ "Manager Type": manager["stops"],
296
+ "Fee/Easement": easement["stops"],
297
+ "Mean Richness": manager["stops"],
298
+ "Mean RSR": manager["stops"],
299
+ "custom": manager["stops"]}
300
+ colors = ibis.memtable(select_colors[style_choice], columns = [column, "color"]).to_pandas()
301
+
302
  st.divider()
303
 
304
 
 
306
  us_lower_48_area_m2 = 7.8e+12
307
 
308
  @st.cache_data()
309
+ def summary_table(column = column, colors = colors):
 
 
310
  df = (pad_data.
311
+ group_by(_[column]).
312
  aggregate(percent_protected = 100 * _.area.sum() / us_lower_48_area_m2,
313
  mean_richness = (_.richness * _.area).sum() / _.area.sum(),
314
  mean_rsr = (_.rsr * _.area).sum() / _.area.sum()
315
  ).
316
  mutate(percent_protected = _.percent_protected.round())
317
+ ).inner_join(colors, column)
318
  return df.to_pandas()
319
 
320
+ df = summary_table(column, colors)
 
 
 
 
 
321
 
 
322
 
323
  base = alt.Chart(df).encode(
324
  alt.Theta("percent_protected:Q").stack(True),
325
  alt.Color("color:N").scale(None).legend(None)
326
  )
327
 
 
328
  area_chart = (
329
  base.mark_arc(innerRadius=50, outerRadius=120) +
330
+ base.mark_text(radius=165, size=20).encode(text=column) +
331
  base.mark_text(radius=135, size=20).encode(text="percent_protected:N")
332
  )
333
 
334
  # area_chart
335
 
336
  # +
 
337
  richness_chart = alt.Chart(df).mark_bar().encode(
338
+ x=column,
339
  y='mean_richness',
340
  color=alt.Color('color').scale(None)
341
  )
 
342
 
343
 
344
  # +
 
345
  rsr_chart = alt.Chart(df).mark_bar().encode(
346
+ x=column,
347
  y='mean_rsr',
348
  color=alt.Color('color').scale(None)
349
  )
 
358
  col1, col2, col3 = st.columns(3)
359
 
360
  with col1:
361
+ f"#### Percent of Continental US Area"
362
  st.altair_chart(area_chart, use_container_width=True)
363
 
364
  # -
preprocess.py CHANGED
@@ -1,12 +1,17 @@
 
1
  import ibis
2
  from ibis import _
3
- import rioxarray
4
  import xarray
5
  from shapely.geometry import box
6
  from geocube.api.core import make_geocube
7
  import geopandas
8
  import fiona
9
 
 
 
 
 
 
10
 
11
  # +
12
  fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
@@ -65,14 +70,24 @@ pad_labeled = (
65
  )
66
 
67
 
 
 
 
 
68
  # -
69
 
 
 
 
 
 
70
 
71
  def zonal_stats(cog, geo, band_name = "mean", row_n = "row_n"):
72
  # https://corteva.github.io/geocube/html/examples/zonal_statistics.html
73
  raster = (rioxarray.
74
- open_rasterio('/vsicurl/'+cog, masked=True).
75
- rio.clip(geo.geometry.values, geo.crs, from_disk=True).
 
76
  sel(band=1).drop_vars("band")
77
  )
78
  out_grid = make_geocube(
@@ -90,71 +105,89 @@ def zonal_stats(cog, geo, band_name = "mean", row_n = "row_n"):
90
  return geo
91
 
92
 
93
- total_features = pad_labeled.count().to_pandas()
 
 
 
 
 
 
 
 
 
 
 
94
 
95
 
96
- # +
97
- # # %%time
98
- # testing -- only the lower 48 states!
99
- # (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
100
 
101
- # +
102
 
103
- def piecewise_zonal(cog, tbl, crs, band_name = "mean", row_n = "row_n", dirname = "pad_parquet"):
104
- total_features = tbl.count().to_pandas()
105
- n = 10000
106
- steps = range(0, total_features, 10000)
107
- parts = [*[i for i in steps], total_features]
108
- for i in range(0,len(steps)):
109
- begin = parts[i]
110
- end = parts[i+1] - 1
111
- df = tbl.filter([_[row_n] > begin, _[row_n] <= end]).to_pandas()
112
- geo = geopandas.GeoDataFrame(df, geometry=df.geometry, crs=crs)
113
- geo = zonal_stats(cog, geo, band_name, row_n)
114
- geo.to_parquet(f"{dirname}/part_{i}.parquet")
115
-
116
 
 
 
 
 
 
117
 
118
- # -
119
 
 
120
  # %%time
121
- cog = "https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif"
122
- piecewise_zonal(cog, pad_labeled, crs, "richness")
 
123
 
124
- # Or be bold!
125
- df = pad_labeled.to_pandas()
126
- geo = geopandas.GeoDataFrame(df, geometry=df.geometry, crs=crs)
 
 
 
127
 
128
  # +
129
- import numpy as np
130
 
131
- def piecewise_zonal2(cog, geo, band_name = "mean", n = 10000, row_n = "row_n", dirname = "pad_parquet2"):
132
- total = len(geo)
133
- for i in range(0,total,n):
134
- end = np.min([i + n,total])
135
- geo_slice = geo.iloc[i:end]
136
- geo_slice = zonal_stats(cog, geo_slice, band_name, row_n)
137
- geo_slice.to_parquet(f"{dirname}/part_{i}.parquet")
138
-
139
 
140
 
141
- # -
 
 
 
 
 
142
 
 
143
  # %%time
144
- piecewise_zonal2(cog, geo, "richness") # 6 min
145
-
146
-
147
- import geopandas
148
- gdf = geopandas.read_parquet("pad_parquet2")
149
 
150
- gdf.columns
 
 
 
 
 
151
 
 
152
  # %%time
153
- human_impacts_2021 = "https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"
154
- gdf = piecewise_zonal2(human_impacts_2021, gdf, "human_impacts_2021")
155
 
 
 
156
 
 
 
 
 
 
 
 
157
 
 
 
158
 
159
 
160
 
 
1
+ # +
2
  import ibis
3
  from ibis import _
 
4
  import xarray
5
  from shapely.geometry import box
6
  from geocube.api.core import make_geocube
7
  import geopandas
8
  import fiona
9
 
10
+ import multiprocessing.popen_spawn_posix
11
+ from dask.distributed import Client, LocalCluster, Lock
12
+ import rioxarray
13
+
14
+
15
 
16
  # +
17
  fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
 
70
  )
71
 
72
 
73
+ # +
74
+ # # %%time
75
+ # smoke test -- only the lower 48 states!
76
+ # (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
77
  # -
78
 
79
+ # Or be bold!
80
+ df = pad_labeled.to_pandas()
81
+ geo = geopandas.GeoDataFrame(df, geometry=df.geometry, crs=crs)
82
+ geo.to_parquet("pad-filtered.parquet")
83
+
84
 
85
  def zonal_stats(cog, geo, band_name = "mean", row_n = "row_n"):
86
  # https://corteva.github.io/geocube/html/examples/zonal_statistics.html
87
  raster = (rioxarray.
88
+ open_rasterio('/vsicurl/'+cog, masked=True, chunks=True, lock=False).
89
+ rio.clip_box(*geo.total_bounds, crs=geo.crs).
90
+ rio.clip(geo.geometry.values, crs=geo.crs, from_disk=True).
91
  sel(band=1).drop_vars("band")
92
  )
93
  out_grid = make_geocube(
 
105
  return geo
106
 
107
 
108
+ import numpy as np
109
+ # consider doing multiple cogs per slice
110
+ def piecewise_zonal2(cog, geo, band_name = "mean", dirname = "pad_parquet", n = 10000, row_n = "row_n"):
111
+ total = len(geo)
112
+ for i in range(0,total,n):
113
+ k = i // n
114
+ path = f"{dirname}/part_{k}.parquet"
115
+ print(f"processing {path}")
116
+ end = np.min([i + n,total])
117
+ geo_slice = geo.iloc[i:end]
118
+ geo_slice = zonal_stats(cog, geo_slice, band_name, row_n)
119
+ geo_slice.to_parquet(path)
120
 
121
 
122
+ # %%time
123
+ piecewise_zonal2(cog, geo, "richness", dirname = "pad_mobi", n = 50000) # 6 min
 
 
124
 
 
125
 
126
+ # # Manual approach
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
+ # +
129
+ import geopandas
130
+ import multiprocessing.popen_spawn_posix
131
+ from dask.distributed import Client, LocalCluster, Lock
132
+ import rioxarray
133
 
134
+ geo = geopandas.read_parquet("pad_mobi") # ~ 4.8 GB RAM
135
 
136
+ # +
137
  # %%time
138
+ band_name = "human_impact"
139
+ row_n = "row_n"
140
+ cog = "https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"
141
 
142
+ raster = (rioxarray.
143
+ open_rasterio('/vsicurl/'+cog, masked=True, chunks=True, lock=False).
144
+ rio.clip_box(*geo.total_bounds, crs=geo.crs).
145
+ rio.clip(geo.geometry.values, geo.crs, from_disk=True).
146
+ sel(band=1).drop_vars("band")
147
+ )
148
 
149
  # +
150
+ # %%time
151
 
152
+ band_name = "human_impact"
153
+ row_n = "row_n"
154
+ cog = "https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"
 
 
 
 
 
155
 
156
 
157
+ with LocalCluster() as cluster, Client(cluster) as client:
158
+ raster = (rioxarray.
159
+ open_rasterio('/vsicurl/'+cog, masked=True, chunks=True, lock=False).
160
+ rio.clip(geo.geometry.values, geo.crs, from_disk=True).
161
+ sel(band=1).drop_vars("band")
162
+ )
163
 
164
+ # +
165
  # %%time
 
 
 
 
 
166
 
167
+ out_grid = make_geocube(
168
+ vector_data=geo,
169
+ measurements=['row_n'],
170
+ like=raster, # ensure the data are on the same grid
171
+ )
172
+ # ~ +1 Gb, 1.2s
173
 
174
+ # +
175
  # %%time
176
+ # 100 ~ 30s, 1000 ~ 30s
 
177
 
178
+ out_grid["values"] = (raster.dims, raster.values, raster.attrs, raster.encoding)
179
+ grouped_raster = out_grid.drop_vars("spatial_ref").groupby(out_grid.row_n) # ~ +3 Gb
180
 
181
+ # +
182
+ # %%time
183
+ grid_mean = grouped_raster.mean().rename({"values": band_name})
184
+ zonal_stats = xarray.merge([grid_mean]).to_dataframe()
185
+ geo = geo.merge(zonal_stats, how="left", on=row_n)
186
+ geo.to_parquet("test.parquet")
187
+ len(geo)
188
 
189
+ # 1.2 s
190
+ # -
191
 
192
 
193