Spaces:
Sleeping
Sleeping
dynamic charts
Browse files- app.py +35 -32
- preprocess.py +79 -46
app.py
CHANGED
@@ -109,11 +109,11 @@ gap = {
|
|
109 |
['1', "#26633d"],
|
110 |
['2', "#879647"],
|
111 |
['3', "#BBBBBB"],
|
112 |
-
['4', "#
|
113 |
]
|
114 |
}
|
115 |
|
116 |
-
|
117 |
'property': 'IUCN_Cat',
|
118 |
'type': 'categorical',
|
119 |
'stops': [
|
@@ -125,7 +125,7 @@ IUCN = {
|
|
125 |
["V", "#9932CC"],
|
126 |
["VI", "#9400D3"],
|
127 |
["Other Conservation Area", "#DDA0DD"],
|
128 |
-
["Unassigned", "#
|
129 |
]
|
130 |
}
|
131 |
|
@@ -174,7 +174,8 @@ def pad_style(paint, alpha):
|
|
174 |
}
|
175 |
}]}
|
176 |
|
177 |
-
|
|
|
178 |
|
179 |
# +
|
180 |
## Map controls sidebar
|
@@ -194,7 +195,7 @@ with st.sidebar:
|
|
194 |
|
195 |
style_options = {
|
196 |
"GAP Status Code": gap,
|
197 |
-
"IUCN Status Code":
|
198 |
"Manager Type": manager,
|
199 |
"Fee/Easement": easement,
|
200 |
"Mean Richness": richness,
|
@@ -240,7 +241,6 @@ with st.sidebar:
|
|
240 |
hi="https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"
|
241 |
m.add_cog_layer(hi, palette="purples", name="Human Impact", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
|
242 |
|
243 |
-
|
244 |
# "## Boundaries"
|
245 |
# boundaries = st.radio("Boundaries:",
|
246 |
# ["None",
|
@@ -253,8 +253,6 @@ with st.sidebar:
|
|
253 |
"## Basemaps"
|
254 |
if st.toggle("Shaded Relief Topo"):
|
255 |
m.add_basemap("Esri.WorldShadedRelief")
|
256 |
-
|
257 |
-
|
258 |
|
259 |
"## Additional elements"
|
260 |
# Fire Polygons, USGS
|
@@ -275,15 +273,32 @@ with st.sidebar:
|
|
275 |
"paint": {"fill-color": "#FFA500", "fill-opacity": 0.2}}]}
|
276 |
m.add_pmtiles(usgs, name="Fire", style=combined_style, overlay=True, show=True, zoom_to_layer=False)
|
277 |
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
# +
|
282 |
-
|
283 |
# And here we go!
|
284 |
m.to_streamlit(height=700)
|
285 |
# -
|
286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
st.divider()
|
288 |
|
289 |
|
@@ -291,56 +306,44 @@ st.divider()
|
|
291 |
us_lower_48_area_m2 = 7.8e+12
|
292 |
|
293 |
@st.cache_data()
|
294 |
-
def summary_table():
|
295 |
-
x = ibis.memtable({"bucket": ["public", "tribal", "mixed", "private"],
|
296 |
-
"color": [public_color, tribal_color, mixed_color, private_color]})
|
297 |
df = (pad_data.
|
298 |
-
group_by(_
|
299 |
aggregate(percent_protected = 100 * _.area.sum() / us_lower_48_area_m2,
|
300 |
mean_richness = (_.richness * _.area).sum() / _.area.sum(),
|
301 |
mean_rsr = (_.rsr * _.area).sum() / _.area.sum()
|
302 |
).
|
303 |
mutate(percent_protected = _.percent_protected.round())
|
304 |
-
).inner_join(
|
305 |
return df.to_pandas()
|
306 |
|
307 |
-
df = summary_table()
|
308 |
-
# st.table(richness_table)
|
309 |
-
|
310 |
-
|
311 |
-
# +
|
312 |
-
#summary_table.to_pandas()
|
313 |
|
314 |
-
# +
|
315 |
|
316 |
base = alt.Chart(df).encode(
|
317 |
alt.Theta("percent_protected:Q").stack(True),
|
318 |
alt.Color("color:N").scale(None).legend(None)
|
319 |
)
|
320 |
|
321 |
-
|
322 |
area_chart = (
|
323 |
base.mark_arc(innerRadius=50, outerRadius=120) +
|
324 |
-
base.mark_text(radius=165, size=20).encode(text=
|
325 |
base.mark_text(radius=135, size=20).encode(text="percent_protected:N")
|
326 |
)
|
327 |
|
328 |
# area_chart
|
329 |
|
330 |
# +
|
331 |
-
|
332 |
richness_chart = alt.Chart(df).mark_bar().encode(
|
333 |
-
x=
|
334 |
y='mean_richness',
|
335 |
color=alt.Color('color').scale(None)
|
336 |
)
|
337 |
-
#richness_chart
|
338 |
|
339 |
|
340 |
# +
|
341 |
-
|
342 |
rsr_chart = alt.Chart(df).mark_bar().encode(
|
343 |
-
x=
|
344 |
y='mean_rsr',
|
345 |
color=alt.Color('color').scale(None)
|
346 |
)
|
@@ -355,7 +358,7 @@ rsr_chart = alt.Chart(df).mark_bar().encode(
|
|
355 |
col1, col2, col3 = st.columns(3)
|
356 |
|
357 |
with col1:
|
358 |
-
"#### Percent of Continental US Area"
|
359 |
st.altair_chart(area_chart, use_container_width=True)
|
360 |
|
361 |
# -
|
|
|
109 |
['1', "#26633d"],
|
110 |
['2', "#879647"],
|
111 |
['3', "#BBBBBB"],
|
112 |
+
['4', "#F8F8F8"]
|
113 |
]
|
114 |
}
|
115 |
|
116 |
+
iucn = {
|
117 |
'property': 'IUCN_Cat',
|
118 |
'type': 'categorical',
|
119 |
'stops': [
|
|
|
125 |
["V", "#9932CC"],
|
126 |
["VI", "#9400D3"],
|
127 |
["Other Conservation Area", "#DDA0DD"],
|
128 |
+
["Unassigned", "#F8F8F8"]
|
129 |
]
|
130 |
}
|
131 |
|
|
|
174 |
}
|
175 |
}]}
|
176 |
|
177 |
+
manager_colors = {"bucket": ["public", "tribal", "mixed", "private"],
|
178 |
+
"color": [public_color, tribal_color, mixed_color, private_color]}
|
179 |
|
180 |
# +
|
181 |
## Map controls sidebar
|
|
|
195 |
|
196 |
style_options = {
|
197 |
"GAP Status Code": gap,
|
198 |
+
"IUCN Status Code": iucn,
|
199 |
"Manager Type": manager,
|
200 |
"Fee/Easement": easement,
|
201 |
"Mean Richness": richness,
|
|
|
241 |
hi="https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"
|
242 |
m.add_cog_layer(hi, palette="purples", name="Human Impact", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
|
243 |
|
|
|
244 |
# "## Boundaries"
|
245 |
# boundaries = st.radio("Boundaries:",
|
246 |
# ["None",
|
|
|
253 |
"## Basemaps"
|
254 |
if st.toggle("Shaded Relief Topo"):
|
255 |
m.add_basemap("Esri.WorldShadedRelief")
|
|
|
|
|
256 |
|
257 |
"## Additional elements"
|
258 |
# Fire Polygons, USGS
|
|
|
273 |
"paint": {"fill-color": "#FFA500", "fill-opacity": 0.2}}]}
|
274 |
m.add_pmtiles(usgs, name="Fire", style=combined_style, overlay=True, show=True, zoom_to_layer=False)
|
275 |
|
|
|
|
|
|
|
276 |
# +
|
|
|
277 |
# And here we go!
|
278 |
m.to_streamlit(height=700)
|
279 |
# -
|
280 |
|
281 |
+
|
282 |
+
select_column = {
|
283 |
+
"GAP Status Code": "GAP_Sts",
|
284 |
+
"IUCN Status Code": "IUCN_Cat",
|
285 |
+
"Manager Type": "bucket",
|
286 |
+
"Fee/Easement": "FeatClass",
|
287 |
+
"Mean Richness": "bucket",
|
288 |
+
"Mean RSR": "bucket",
|
289 |
+
"custom": "bucket"}
|
290 |
+
column = select_column[style_choice]
|
291 |
+
|
292 |
+
select_colors = {
|
293 |
+
"GAP Status Code": gap["stops"],
|
294 |
+
"IUCN Status Code": iucn["stops"],
|
295 |
+
"Manager Type": manager["stops"],
|
296 |
+
"Fee/Easement": easement["stops"],
|
297 |
+
"Mean Richness": manager["stops"],
|
298 |
+
"Mean RSR": manager["stops"],
|
299 |
+
"custom": manager["stops"]}
|
300 |
+
colors = ibis.memtable(select_colors[style_choice], columns = [column, "color"]).to_pandas()
|
301 |
+
|
302 |
st.divider()
|
303 |
|
304 |
|
|
|
306 |
us_lower_48_area_m2 = 7.8e+12
|
307 |
|
308 |
@st.cache_data()
|
309 |
+
def summary_table(column = column, colors = colors):
|
|
|
|
|
310 |
df = (pad_data.
|
311 |
+
group_by(_[column]).
|
312 |
aggregate(percent_protected = 100 * _.area.sum() / us_lower_48_area_m2,
|
313 |
mean_richness = (_.richness * _.area).sum() / _.area.sum(),
|
314 |
mean_rsr = (_.rsr * _.area).sum() / _.area.sum()
|
315 |
).
|
316 |
mutate(percent_protected = _.percent_protected.round())
|
317 |
+
).inner_join(colors, column)
|
318 |
return df.to_pandas()
|
319 |
|
320 |
+
df = summary_table(column, colors)
|
|
|
|
|
|
|
|
|
|
|
321 |
|
|
|
322 |
|
323 |
base = alt.Chart(df).encode(
|
324 |
alt.Theta("percent_protected:Q").stack(True),
|
325 |
alt.Color("color:N").scale(None).legend(None)
|
326 |
)
|
327 |
|
|
|
328 |
area_chart = (
|
329 |
base.mark_arc(innerRadius=50, outerRadius=120) +
|
330 |
+
base.mark_text(radius=165, size=20).encode(text=column) +
|
331 |
base.mark_text(radius=135, size=20).encode(text="percent_protected:N")
|
332 |
)
|
333 |
|
334 |
# area_chart
|
335 |
|
336 |
# +
|
|
|
337 |
richness_chart = alt.Chart(df).mark_bar().encode(
|
338 |
+
x=column,
|
339 |
y='mean_richness',
|
340 |
color=alt.Color('color').scale(None)
|
341 |
)
|
|
|
342 |
|
343 |
|
344 |
# +
|
|
|
345 |
rsr_chart = alt.Chart(df).mark_bar().encode(
|
346 |
+
x=column,
|
347 |
y='mean_rsr',
|
348 |
color=alt.Color('color').scale(None)
|
349 |
)
|
|
|
358 |
col1, col2, col3 = st.columns(3)
|
359 |
|
360 |
with col1:
|
361 |
+
f"#### Percent of Continental US Area"
|
362 |
st.altair_chart(area_chart, use_container_width=True)
|
363 |
|
364 |
# -
|
preprocess.py
CHANGED
@@ -1,12 +1,17 @@
|
|
|
|
1 |
import ibis
|
2 |
from ibis import _
|
3 |
-
import rioxarray
|
4 |
import xarray
|
5 |
from shapely.geometry import box
|
6 |
from geocube.api.core import make_geocube
|
7 |
import geopandas
|
8 |
import fiona
|
9 |
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# +
|
12 |
fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
|
@@ -65,14 +70,24 @@ pad_labeled = (
|
|
65 |
)
|
66 |
|
67 |
|
|
|
|
|
|
|
|
|
68 |
# -
|
69 |
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
def zonal_stats(cog, geo, band_name = "mean", row_n = "row_n"):
|
72 |
# https://corteva.github.io/geocube/html/examples/zonal_statistics.html
|
73 |
raster = (rioxarray.
|
74 |
-
open_rasterio('/vsicurl/'+cog, masked=True).
|
75 |
-
rio.
|
|
|
76 |
sel(band=1).drop_vars("band")
|
77 |
)
|
78 |
out_grid = make_geocube(
|
@@ -90,71 +105,89 @@ def zonal_stats(cog, geo, band_name = "mean", row_n = "row_n"):
|
|
90 |
return geo
|
91 |
|
92 |
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
|
96 |
-
#
|
97 |
-
|
98 |
-
# testing -- only the lower 48 states!
|
99 |
-
# (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
|
100 |
|
101 |
-
# +
|
102 |
|
103 |
-
|
104 |
-
total_features = tbl.count().to_pandas()
|
105 |
-
n = 10000
|
106 |
-
steps = range(0, total_features, 10000)
|
107 |
-
parts = [*[i for i in steps], total_features]
|
108 |
-
for i in range(0,len(steps)):
|
109 |
-
begin = parts[i]
|
110 |
-
end = parts[i+1] - 1
|
111 |
-
df = tbl.filter([_[row_n] > begin, _[row_n] <= end]).to_pandas()
|
112 |
-
geo = geopandas.GeoDataFrame(df, geometry=df.geometry, crs=crs)
|
113 |
-
geo = zonal_stats(cog, geo, band_name, row_n)
|
114 |
-
geo.to_parquet(f"{dirname}/part_{i}.parquet")
|
115 |
-
|
116 |
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
-
#
|
119 |
|
|
|
120 |
# %%time
|
121 |
-
|
122 |
-
|
|
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
|
|
|
|
|
|
127 |
|
128 |
# +
|
129 |
-
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
end = np.min([i + n,total])
|
135 |
-
geo_slice = geo.iloc[i:end]
|
136 |
-
geo_slice = zonal_stats(cog, geo_slice, band_name, row_n)
|
137 |
-
geo_slice.to_parquet(f"{dirname}/part_{i}.parquet")
|
138 |
-
|
139 |
|
140 |
|
141 |
-
|
|
|
|
|
|
|
|
|
|
|
142 |
|
|
|
143 |
# %%time
|
144 |
-
piecewise_zonal2(cog, geo, "richness") # 6 min
|
145 |
-
|
146 |
-
|
147 |
-
import geopandas
|
148 |
-
gdf = geopandas.read_parquet("pad_parquet2")
|
149 |
|
150 |
-
|
|
|
|
|
|
|
|
|
|
|
151 |
|
|
|
152 |
# %%time
|
153 |
-
|
154 |
-
gdf = piecewise_zonal2(human_impacts_2021, gdf, "human_impacts_2021")
|
155 |
|
|
|
|
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
|
|
|
|
158 |
|
159 |
|
160 |
|
|
|
1 |
+
# +
|
2 |
import ibis
|
3 |
from ibis import _
|
|
|
4 |
import xarray
|
5 |
from shapely.geometry import box
|
6 |
from geocube.api.core import make_geocube
|
7 |
import geopandas
|
8 |
import fiona
|
9 |
|
10 |
+
import multiprocessing.popen_spawn_posix
|
11 |
+
from dask.distributed import Client, LocalCluster, Lock
|
12 |
+
import rioxarray
|
13 |
+
|
14 |
+
|
15 |
|
16 |
# +
|
17 |
fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
|
|
|
70 |
)
|
71 |
|
72 |
|
73 |
+
# +
|
74 |
+
# # %%time
|
75 |
+
# smoke test -- only the lower 48 states!
|
76 |
+
# (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
|
77 |
# -
|
78 |
|
79 |
+
# Or be bold!
|
80 |
+
df = pad_labeled.to_pandas()
|
81 |
+
geo = geopandas.GeoDataFrame(df, geometry=df.geometry, crs=crs)
|
82 |
+
geo.to_parquet("pad-filtered.parquet")
|
83 |
+
|
84 |
|
85 |
def zonal_stats(cog, geo, band_name = "mean", row_n = "row_n"):
|
86 |
# https://corteva.github.io/geocube/html/examples/zonal_statistics.html
|
87 |
raster = (rioxarray.
|
88 |
+
open_rasterio('/vsicurl/'+cog, masked=True, chunks=True, lock=False).
|
89 |
+
rio.clip_box(*geo.total_bounds, crs=geo.crs).
|
90 |
+
rio.clip(geo.geometry.values, crs=geo.crs, from_disk=True).
|
91 |
sel(band=1).drop_vars("band")
|
92 |
)
|
93 |
out_grid = make_geocube(
|
|
|
105 |
return geo
|
106 |
|
107 |
|
108 |
+
import numpy as np
|
109 |
+
# consider doing multiple cogs per slice
|
110 |
+
def piecewise_zonal2(cog, geo, band_name = "mean", dirname = "pad_parquet", n = 10000, row_n = "row_n"):
|
111 |
+
total = len(geo)
|
112 |
+
for i in range(0,total,n):
|
113 |
+
k = i // n
|
114 |
+
path = f"{dirname}/part_{k}.parquet"
|
115 |
+
print(f"processing {path}")
|
116 |
+
end = np.min([i + n,total])
|
117 |
+
geo_slice = geo.iloc[i:end]
|
118 |
+
geo_slice = zonal_stats(cog, geo_slice, band_name, row_n)
|
119 |
+
geo_slice.to_parquet(path)
|
120 |
|
121 |
|
122 |
+
# %%time
|
123 |
+
piecewise_zonal2(cog, geo, "richness", dirname = "pad_mobi", n = 50000) # 6 min
|
|
|
|
|
124 |
|
|
|
125 |
|
126 |
+
# # Manual approach
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
+
# +
|
129 |
+
import geopandas
|
130 |
+
import multiprocessing.popen_spawn_posix
|
131 |
+
from dask.distributed import Client, LocalCluster, Lock
|
132 |
+
import rioxarray
|
133 |
|
134 |
+
geo = geopandas.read_parquet("pad_mobi") # ~ 4.8 GB RAM
|
135 |
|
136 |
+
# +
|
137 |
# %%time
|
138 |
+
band_name = "human_impact"
|
139 |
+
row_n = "row_n"
|
140 |
+
cog = "https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"
|
141 |
|
142 |
+
raster = (rioxarray.
|
143 |
+
open_rasterio('/vsicurl/'+cog, masked=True, chunks=True, lock=False).
|
144 |
+
rio.clip_box(*geo.total_bounds, crs=geo.crs).
|
145 |
+
rio.clip(geo.geometry.values, geo.crs, from_disk=True).
|
146 |
+
sel(band=1).drop_vars("band")
|
147 |
+
)
|
148 |
|
149 |
# +
|
150 |
+
# %%time
|
151 |
|
152 |
+
band_name = "human_impact"
|
153 |
+
row_n = "row_n"
|
154 |
+
cog = "https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
|
157 |
+
with LocalCluster() as cluster, Client(cluster) as client:
|
158 |
+
raster = (rioxarray.
|
159 |
+
open_rasterio('/vsicurl/'+cog, masked=True, chunks=True, lock=False).
|
160 |
+
rio.clip(geo.geometry.values, geo.crs, from_disk=True).
|
161 |
+
sel(band=1).drop_vars("band")
|
162 |
+
)
|
163 |
|
164 |
+
# +
|
165 |
# %%time
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
+
out_grid = make_geocube(
|
168 |
+
vector_data=geo,
|
169 |
+
measurements=['row_n'],
|
170 |
+
like=raster, # ensure the data are on the same grid
|
171 |
+
)
|
172 |
+
# ~ +1 Gb, 1.2s
|
173 |
|
174 |
+
# +
|
175 |
# %%time
|
176 |
+
# 100 ~ 30s, 1000 ~ 30s
|
|
|
177 |
|
178 |
+
out_grid["values"] = (raster.dims, raster.values, raster.attrs, raster.encoding)
|
179 |
+
grouped_raster = out_grid.drop_vars("spatial_ref").groupby(out_grid.row_n) # ~ +3 Gb
|
180 |
|
181 |
+
# +
|
182 |
+
# %%time
|
183 |
+
grid_mean = grouped_raster.mean().rename({"values": band_name})
|
184 |
+
zonal_stats = xarray.merge([grid_mean]).to_dataframe()
|
185 |
+
geo = geo.merge(zonal_stats, how="left", on=row_n)
|
186 |
+
geo.to_parquet("test.parquet")
|
187 |
+
len(geo)
|
188 |
|
189 |
+
# 1.2 s
|
190 |
+
# -
|
191 |
|
192 |
|
193 |
|