Spaces:
Running
Running
Commit
·
593d846
1
Parent(s):
56784d5
wip - charts
Browse filesadded charts grouped by status, fixed non-conserved data (needed a unique id), fixed chatbot for fire risk.
Need to fix: 1) SE Great Basin is missing for non-conserved areas, 2) 30x30 status chart doesn't dynamically change and 3) implement ecoregion colors into labels for 30x30 status charts.
- app/app.py +25 -118
- app/system_prompt.txt +9 -1
- app/utils.py +273 -42
- app/variables.py +8 -6
- preprocess/preprocess.ipynb +8 -23
app/app.py
CHANGED
@@ -26,10 +26,9 @@ if "mydata" not in set(current_tables):
|
|
26 |
tbl = con.read_parquet(ca_parquet)
|
27 |
con.create_table("mydata", tbl)
|
28 |
|
29 |
-
|
30 |
ca = con.table("mydata")
|
31 |
|
32 |
-
|
33 |
for key in [
|
34 |
'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
|
35 |
'fire', 'rxburn', 'disadvantaged_communities',
|
@@ -37,6 +36,13 @@ for key in [
|
|
37 |
if key not in st.session_state:
|
38 |
st.session_state[key] = False
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
|
41 |
|
42 |
#customizing style with CSS
|
@@ -128,8 +134,6 @@ m = leafmap.Map(style="positron")
|
|
128 |
#############
|
129 |
|
130 |
|
131 |
-
|
132 |
-
|
133 |
##### Chatbot stuff
|
134 |
|
135 |
|
@@ -160,98 +164,22 @@ prompt = ChatPromptTemplate.from_messages([
|
|
160 |
structured_llm = llm.with_structured_output(SQLResponse)
|
161 |
few_shot_structured_llm = prompt | structured_llm
|
162 |
|
163 |
-
# @st.cache_data(ttl=600) # Cache expires every 10 minutes
|
164 |
-
def run_sql(query,color_choice):
|
165 |
-
"""
|
166 |
-
Filter data based on an LLM-generated SQL query and return matching IDs.
|
167 |
-
|
168 |
-
Args:
|
169 |
-
query (str): The natural language query to filter the data.
|
170 |
-
color_choice (str): The column used for plotting.
|
171 |
-
"""
|
172 |
-
output = few_shot_structured_llm.invoke(query)
|
173 |
-
sql_query = output.sql_query
|
174 |
-
explanation =output.explanation
|
175 |
-
|
176 |
-
if not sql_query: # if the chatbot can't generate a SQL query.
|
177 |
-
st.success(explanation)
|
178 |
-
return pd.DataFrame({'id' : []})
|
179 |
-
|
180 |
-
result = ca.sql(sql_query).execute()
|
181 |
-
if result.empty :
|
182 |
-
explanation = "This query did not return any results. Please try again with a different query."
|
183 |
-
st.warning(explanation, icon="⚠️")
|
184 |
-
st.caption("SQL Query:")
|
185 |
-
st.code(sql_query,language = "sql")
|
186 |
-
if 'geom' in result.columns:
|
187 |
-
return result.drop('geom',axis = 1)
|
188 |
-
else:
|
189 |
-
return result
|
190 |
-
|
191 |
-
elif ("id" and "geom" in result.columns):
|
192 |
-
style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
|
193 |
-
legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
|
194 |
-
position = 'bottom-left'
|
195 |
-
fontsize = 15
|
196 |
-
bg_color = 'white'
|
197 |
-
|
198 |
-
# shorten legend for ecoregions
|
199 |
-
if color_choice == "Ecoregion":
|
200 |
-
legend_d = {key.replace("Northern California", "NorCal"): value for key, value in legend_d.items()}
|
201 |
-
legend_d = {key.replace("Southern California", "SoCal"): value for key, value in legend_d.items()}
|
202 |
-
legend_d = {key.replace("Southeastern", "SE."): value for key, value in legend_d.items()}
|
203 |
-
legend_d = {key.replace("and", "&"): value for key, value in legend_d.items()}
|
204 |
-
legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
|
205 |
-
legend_d = {key.replace("Northwestern", "NW."): value for key, value in legend_d.items()}
|
206 |
-
bg_color = 'rgba(255, 255, 255, 0.6)'
|
207 |
-
fontsize = 12
|
208 |
-
|
209 |
-
m.add_legend(legend_dict = legend_d, position = position, bg_color = bg_color, fontsize = fontsize)
|
210 |
-
m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
|
211 |
-
m.fit_bounds(result.total_bounds.tolist())
|
212 |
-
result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
|
213 |
-
else:
|
214 |
-
|
215 |
-
st.write(result) # if we aren't mapping, just print out the data
|
216 |
-
|
217 |
-
with st.popover("Explanation"):
|
218 |
-
st.write(explanation)
|
219 |
-
st.caption("SQL Query:")
|
220 |
-
st.code(sql_query,language = "sql")
|
221 |
-
|
222 |
-
return result
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
def summary_table_sql(ca, column, colors, ids): # get df for charts + df_tab for printed table
|
227 |
-
filters = [_.id.isin(ids)]
|
228 |
-
combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
|
229 |
-
df = get_summary(ca, combined_filter, [column], colors) # df used for charts
|
230 |
-
return df
|
231 |
-
|
232 |
-
|
233 |
chatbot_toggles = {key: False for key in [
|
234 |
'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
|
235 |
'fire', 'rxburn', 'disadvantaged_communities',
|
236 |
'svi',
|
237 |
]}
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
#############
|
242 |
|
243 |
-
|
244 |
filters = {}
|
245 |
|
246 |
with st.sidebar:
|
247 |
-
|
248 |
-
color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
|
249 |
colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
|
250 |
alpha = 0.8
|
251 |
st.divider()
|
252 |
|
253 |
|
254 |
-
|
255 |
##### Chatbot
|
256 |
with st.container():
|
257 |
|
@@ -271,8 +199,8 @@ with st.container():
|
|
271 |
'''
|
272 |
Exploratory data queries:
|
273 |
- What is a GAP code?
|
|
|
274 |
- What is the total acreage of areas designated as easements?
|
275 |
-
- Which GAP code has been impacted the most by fire?
|
276 |
- Who manages the land with the highest amount of irrecoverable carbon and highest social vulnerability index?
|
277 |
'''
|
278 |
|
@@ -332,7 +260,6 @@ with st.sidebar:
|
|
332 |
|
333 |
|
334 |
# People Section
|
335 |
-
|
336 |
with st.expander("👤 People"):
|
337 |
a_people = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
|
338 |
show_justice40 = st.toggle("Disadvantaged Communities (Justice40)", key = "disadvantaged_communities", value=chatbot_toggles['disadvantaged_communities'])
|
@@ -361,15 +288,11 @@ with st.sidebar:
|
|
361 |
|
362 |
st.divider()
|
363 |
st.markdown('<p class = "medium-font-sidebar"> Filters:</p>', help = "Apply filters to adjust what data is shown on the map.", unsafe_allow_html= True)
|
364 |
-
for col,val in style_options.items():
|
365 |
-
for name in val['stops'][0]:
|
366 |
-
key = val['property']+str(name)
|
367 |
-
st.session_state[key] = default_gap.get(name, True)
|
368 |
|
369 |
for label in style_options: # get selected filters (based on the buttons selected)
|
370 |
with st.expander(label):
|
371 |
if label in ["GAP Code","30x30 Status"]: # gap code 1 and 2 are on by default
|
372 |
-
opts = getButtons(style_options, label,
|
373 |
else: # other buttons are not on by default.
|
374 |
opts = getButtons(style_options, label)
|
375 |
filters.update(opts)
|
@@ -383,42 +306,25 @@ with st.sidebar:
|
|
383 |
filter_vals = []
|
384 |
|
385 |
st.divider()
|
|
|
386 |
st.markdown("""
|
387 |
<p class="medium-font-sidebar">
|
388 |
<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' class='bi bi-github ' style='height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;margin-right:4px;' aria-hidden='true' role='img'><path d='M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z'></path></svg>Source Code: </p> <a href='https://github.com/boettiger-lab/ca-30x30' target='_blank'>https://github.com/boettiger-lab/ca-30x30</a>
|
389 |
-
""", unsafe_allow_html=True)
|
390 |
|
391 |
# Display CA 30x30 Data
|
392 |
if 'out' not in locals():
|
393 |
style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
|
394 |
-
|
395 |
-
position =
|
396 |
-
fontsize = 15
|
397 |
-
bg_color = 'white'
|
398 |
-
|
399 |
-
# shorten legend for ecoregions
|
400 |
-
if color_choice == "Ecoregion":
|
401 |
-
legend_d = {key.replace("Northern California", "NorCal"): value for key, value in legend_d.items()}
|
402 |
-
legend_d = {key.replace("Southern California", "SoCal"): value for key, value in legend_d.items()}
|
403 |
-
legend_d = {key.replace("Southeastern", "SE."): value for key, value in legend_d.items()}
|
404 |
-
legend_d = {key.replace("and", "&"): value for key, value in legend_d.items()}
|
405 |
-
legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
|
406 |
-
legend_d = {key.replace("Northwestern", "NW."): value for key, value in legend_d.items()}
|
407 |
-
bg_color = 'rgba(255, 255, 255, 0.6)'
|
408 |
-
fontsize = 12
|
409 |
-
|
410 |
-
|
411 |
-
m.add_legend(legend_dict = legend_d, position = position, bg_color = bg_color, fontsize = fontsize)
|
412 |
m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds=True)
|
413 |
|
414 |
-
|
415 |
-
|
416 |
column = select_column[color_choice]
|
417 |
|
418 |
select_colors = {
|
419 |
-
"Year": year["stops"],
|
420 |
-
"GAP Code": gap["stops"],
|
421 |
"30x30 Status": status["stops"],
|
|
|
|
|
422 |
"Ecoregion": ecoregion["stops"],
|
423 |
"Manager Type": manager["stops"],
|
424 |
"Easement": easement["stops"],
|
@@ -435,12 +341,12 @@ colors = (
|
|
435 |
# get summary tables used for charts + printed table
|
436 |
# df - charts; df_tab - printed table (omits colors)
|
437 |
if 'out' not in locals():
|
438 |
-
df,df_tab,df_percent = summary_table(ca, column,
|
439 |
-
total_percent = df_percent.
|
440 |
|
441 |
else:
|
442 |
df = summary_table_sql(ca, column, colors, ids)
|
443 |
-
total_percent = df.
|
444 |
|
445 |
|
446 |
# charts displayed based on color_by variable
|
@@ -453,7 +359,6 @@ rx_10_chart = bar_chart(df, column, 'mean_rxburn',"Prescribed Burns (2013-2023)"
|
|
453 |
justice40_chart = bar_chart(df, column, 'mean_disadvantaged', "Disadvantaged Communities (2021)")
|
454 |
svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index (2022)")
|
455 |
|
456 |
-
|
457 |
main = st.container()
|
458 |
|
459 |
with main:
|
@@ -471,7 +376,11 @@ with main:
|
|
471 |
|
472 |
st.markdown(f"{total_percent}% CA Covered", help = "Updates based on displayed data")
|
473 |
st.altair_chart(area_plot(df, column), use_container_width=True)
|
474 |
-
|
|
|
|
|
|
|
|
|
475 |
if show_richness:
|
476 |
st.altair_chart(richness_chart, use_container_width=True)
|
477 |
|
@@ -496,8 +405,6 @@ with main:
|
|
496 |
if show_rxburn:
|
497 |
st.altair_chart(rx_10_chart, use_container_width=True)
|
498 |
|
499 |
-
|
500 |
-
|
501 |
|
502 |
st.caption("***The label 'established' is inferred from the California Protected Areas Database, which may introduce artifacts. For details on our methodology, please refer to our code: https://github.com/boettiger-lab/ca-30x30.")
|
503 |
|
|
|
26 |
tbl = con.read_parquet(ca_parquet)
|
27 |
con.create_table("mydata", tbl)
|
28 |
|
|
|
29 |
ca = con.table("mydata")
|
30 |
|
31 |
+
# session state for syncing app
|
32 |
for key in [
|
33 |
'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
|
34 |
'fire', 'rxburn', 'disadvantaged_communities',
|
|
|
36 |
if key not in st.session_state:
|
37 |
st.session_state[key] = False
|
38 |
|
39 |
+
for col,val in style_options.items():
|
40 |
+
for name in val['stops']:
|
41 |
+
key = val['property']+str(name[0])
|
42 |
+
if key not in st.session_state:
|
43 |
+
st.session_state[key] = default_boxes.get(name[0], True)
|
44 |
+
|
45 |
+
|
46 |
st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
|
47 |
|
48 |
#customizing style with CSS
|
|
|
134 |
#############
|
135 |
|
136 |
|
|
|
|
|
137 |
##### Chatbot stuff
|
138 |
|
139 |
|
|
|
164 |
structured_llm = llm.with_structured_output(SQLResponse)
|
165 |
few_shot_structured_llm = prompt | structured_llm
|
166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
chatbot_toggles = {key: False for key in [
|
168 |
'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
|
169 |
'fire', 'rxburn', 'disadvantaged_communities',
|
170 |
'svi',
|
171 |
]}
|
|
|
|
|
|
|
172 |
#############
|
173 |
|
|
|
174 |
filters = {}
|
175 |
|
176 |
with st.sidebar:
|
177 |
+
color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
|
|
|
178 |
colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
|
179 |
alpha = 0.8
|
180 |
st.divider()
|
181 |
|
182 |
|
|
|
183 |
##### Chatbot
|
184 |
with st.container():
|
185 |
|
|
|
199 |
'''
|
200 |
Exploratory data queries:
|
201 |
- What is a GAP code?
|
202 |
+
- What percentage of 30x30 conserved land has been impacted by wildfire?
|
203 |
- What is the total acreage of areas designated as easements?
|
|
|
204 |
- Who manages the land with the highest amount of irrecoverable carbon and highest social vulnerability index?
|
205 |
'''
|
206 |
|
|
|
260 |
|
261 |
|
262 |
# People Section
|
|
|
263 |
with st.expander("👤 People"):
|
264 |
a_people = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
|
265 |
show_justice40 = st.toggle("Disadvantaged Communities (Justice40)", key = "disadvantaged_communities", value=chatbot_toggles['disadvantaged_communities'])
|
|
|
288 |
|
289 |
st.divider()
|
290 |
st.markdown('<p class = "medium-font-sidebar"> Filters:</p>', help = "Apply filters to adjust what data is shown on the map.", unsafe_allow_html= True)
|
|
|
|
|
|
|
|
|
291 |
|
292 |
for label in style_options: # get selected filters (based on the buttons selected)
|
293 |
with st.expander(label):
|
294 |
if label in ["GAP Code","30x30 Status"]: # gap code 1 and 2 are on by default
|
295 |
+
opts = getButtons(style_options, label, default_boxes)
|
296 |
else: # other buttons are not on by default.
|
297 |
opts = getButtons(style_options, label)
|
298 |
filters.update(opts)
|
|
|
306 |
filter_vals = []
|
307 |
|
308 |
st.divider()
|
309 |
+
# adding github logo
|
310 |
st.markdown("""
|
311 |
<p class="medium-font-sidebar">
|
312 |
<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' class='bi bi-github ' style='height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;margin-right:4px;' aria-hidden='true' role='img'><path d='M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z'></path></svg>Source Code: </p> <a href='https://github.com/boettiger-lab/ca-30x30' target='_blank'>https://github.com/boettiger-lab/ca-30x30</a>
|
313 |
+
""", unsafe_allow_html=True)
|
314 |
|
315 |
# Display CA 30x30 Data
|
316 |
if 'out' not in locals():
|
317 |
style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
|
318 |
+
legend, position, bg_color, fontsize = getLegend(style_options, color_choice)
|
319 |
+
m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds=True)
|
321 |
|
|
|
|
|
322 |
column = select_column[color_choice]
|
323 |
|
324 |
select_colors = {
|
|
|
|
|
325 |
"30x30 Status": status["stops"],
|
326 |
+
"GAP Code": gap["stops"],
|
327 |
+
"Year": year["stops"],
|
328 |
"Ecoregion": ecoregion["stops"],
|
329 |
"Manager Type": manager["stops"],
|
330 |
"Easement": easement["stops"],
|
|
|
341 |
# get summary tables used for charts + printed table
|
342 |
# df - charts; df_tab - printed table (omits colors)
|
343 |
if 'out' not in locals():
|
344 |
+
df, df_tab, df_percent, df_bar_30x30 = summary_table(ca, column, select_colors, color_choice, filter_cols, filter_vals,colorby_vals)
|
345 |
+
total_percent = df_percent.percent_CA.sum().round(2)
|
346 |
|
347 |
else:
|
348 |
df = summary_table_sql(ca, column, colors, ids)
|
349 |
+
total_percent = df.percent_CA.sum().round(2)
|
350 |
|
351 |
|
352 |
# charts displayed based on color_by variable
|
|
|
359 |
justice40_chart = bar_chart(df, column, 'mean_disadvantaged', "Disadvantaged Communities (2021)")
|
360 |
svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index (2022)")
|
361 |
|
|
|
362 |
main = st.container()
|
363 |
|
364 |
with main:
|
|
|
376 |
|
377 |
st.markdown(f"{total_percent}% CA Covered", help = "Updates based on displayed data")
|
378 |
st.altair_chart(area_plot(df, column), use_container_width=True)
|
379 |
+
|
380 |
+
if 'df_bar_30x30' in locals(): #if we use chatbot, we won't have these graphs.
|
381 |
+
if column not in ["status", "gap_code"]:
|
382 |
+
st.altair_chart(stacked_bar(df_bar_30x30, column,'percent_group','status', color_choice + ' by 30x30 Status'), use_container_width=True)
|
383 |
+
|
384 |
if show_richness:
|
385 |
st.altair_chart(richness_chart, use_container_width=True)
|
386 |
|
|
|
405 |
if show_rxburn:
|
406 |
st.altair_chart(rx_10_chart, use_container_width=True)
|
407 |
|
|
|
|
|
408 |
|
409 |
st.caption("***The label 'established' is inferred from the California Protected Areas Database, which may introduce artifacts. For details on our methodology, please refer to our code: https://github.com/boettiger-lab/ca-30x30.")
|
410 |
|
app/system_prompt.txt
CHANGED
@@ -147,10 +147,18 @@ sql_query:
|
|
147 |
## Example:
|
148 |
example_user: "Show me protected lands in disadvantaged communities that have had prescribed fires in at least 30% of its area."
|
149 |
sql_query:
|
150 |
-
SELECT "id", "geom", "name", "acres", "
|
151 |
FROM mydata
|
152 |
WHERE "percent_disadvantaged" > 0
|
153 |
AND "percent_rxburn_10yr" >= 0.3;
|
154 |
|
155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
Question: {input}
|
|
|
147 |
## Example:
|
148 |
example_user: "Show me protected lands in disadvantaged communities that have had prescribed fires in at least 30% of its area."
|
149 |
sql_query:
|
150 |
+
SELECT "id", "geom", "name", "acres", "rxburn", "percent_disadvantaged"
|
151 |
FROM mydata
|
152 |
WHERE "percent_disadvantaged" > 0
|
153 |
AND "percent_rxburn_10yr" >= 0.3;
|
154 |
|
155 |
|
156 |
+
|
157 |
+
## Example:
|
158 |
+
example_user: "What percentage of 30x30 conserved land has been impacted by wildfire?"
|
159 |
+
sql_query:
|
160 |
+
SELECT SUM("fire" * "acres") / SUM("acres") * 100 AS percent_fire
|
161 |
+
FROM mydata
|
162 |
+
WHERE "status" = '30x30-conserved';
|
163 |
+
|
164 |
Question: {input}
|
app/utils.py
CHANGED
@@ -18,11 +18,22 @@ from itertools import chain
|
|
18 |
|
19 |
from variables import *
|
20 |
|
21 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
df = ca.filter(combined_filter)
|
23 |
df = (df
|
24 |
.group_by(*column) # unpack the list for grouping
|
25 |
-
.aggregate(
|
|
|
26 |
mean_richness = (_.richness * _.acres).sum() / _.acres.sum(),
|
27 |
mean_rsr = (_.rsr * _.acres).sum() / _.acres.sum(),
|
28 |
mean_irrecoverable_carbon = (_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
|
@@ -32,16 +43,21 @@ def get_summary(ca, combined_filter, column, colors=None): #summary stats, based
|
|
32 |
mean_disadvantaged = (_.disadvantaged_communities * _.acres).sum() / _.acres.sum(),
|
33 |
mean_svi = (_.svi * _.acres).sum() / _.acres.sum(),
|
34 |
)
|
35 |
-
.mutate(
|
|
|
36 |
)
|
|
|
|
|
|
|
37 |
if colors is not None and not colors.empty: #only the df will have colors, df_tab doesn't since we are printing it.
|
38 |
-
df = df.inner_join(colors, column)
|
39 |
df = df.cast({col: "string" for col in column})
|
40 |
df = df.to_pandas()
|
41 |
return df
|
|
|
42 |
|
43 |
-
|
44 |
-
|
45 |
filters = []
|
46 |
if filter_cols and filter_vals: #if a filter is selected, add to list of filters
|
47 |
for filter_col, filter_val in zip(filter_cols, filter_vals):
|
@@ -51,42 +67,153 @@ def summary_table(ca, column, colors, filter_cols, filter_vals,colorby_vals): #
|
|
51 |
filters.append(getattr(_, filter_col) == filter_val[0])
|
52 |
if column not in filter_cols: #show color_by column in table by adding it as a filter (if it's not already a filter)
|
53 |
filter_cols.append(column)
|
54 |
-
filters.append(getattr(_, column).isin(colorby_vals[column]))
|
55 |
combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
-
if column == "status": #need to include non-conserved in summary stats
|
60 |
-
combined_filter = (combined_filter) | (_.status.isin(['30x30-conserved','other-conserved','non-conserved']))
|
61 |
|
62 |
-
df = get_summary(ca, combined_filter, [column], colors) # df used for charts
|
63 |
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
|
70 |
-
def area_plot(df, column):
|
71 |
base = alt.Chart(df).encode(
|
72 |
-
alt.Theta("
|
73 |
)
|
74 |
-
pie = (
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
)
|
79 |
-
text = (
|
80 |
-
|
81 |
-
|
|
|
82 |
)
|
83 |
-
plot = pie
|
84 |
return plot.properties(width="container", height=290)
|
85 |
|
86 |
|
|
|
87 |
def bar_chart(df, x, y, title): #display summary stats for color_by column
|
88 |
#axis label angles / chart size
|
89 |
-
if x
|
90 |
angle = 270
|
91 |
height = 373
|
92 |
elif x == 'ecoregion': # make labels vertical and figure taller
|
@@ -97,34 +224,71 @@ def bar_chart(df, x, y, title): #display summary stats for color_by column
|
|
97 |
height = 310
|
98 |
|
99 |
# order of bars
|
|
|
|
|
100 |
if x == "established": # order labels in chronological order, not alphabetic.
|
101 |
sort = '-x'
|
102 |
elif x == "access_type": #order based on levels of openness
|
103 |
sort=['Open', 'Restricted', 'No Public', "Unknown"]
|
104 |
elif x == "manager_type":
|
105 |
sort = ["Federal","Tribal","State","Special District", "County", "City", "HOA","Joint","Non Profit","Private","Unknown"]
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
# modify label names in bar chart to fit in frame
|
110 |
label_transform = f"datum.{x}" # default; no change
|
111 |
if x == "access_type":
|
112 |
label_transform = f"replace(datum.{x}, ' Access', '')" #omit 'access' from access_type
|
113 |
elif x == "ecoregion":
|
114 |
-
label_transform =
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
x_title = next(key for key, value in select_column.items() if value == x)
|
117 |
chart = alt.Chart(df).mark_bar(stroke = 'black', strokeWidth = .5).transform_calculate(
|
118 |
-
|
119 |
).encode(
|
120 |
x=alt.X("label:N",
|
121 |
axis=alt.Axis(labelAngle=angle, title=x_title, labelLimit = 200),
|
122 |
-
sort=sort),
|
123 |
-
y=alt.Y(y, axis=alt.Axis()),
|
124 |
color=alt.Color('color').scale(None),
|
125 |
-
).
|
|
|
|
|
|
|
126 |
return chart
|
127 |
|
|
|
|
|
128 |
def sync_checkboxes(source):
|
129 |
# gap 1 and gap 2 on -> 30x30-conserved on
|
130 |
if source in ["gap_code1", "gap_code2"]:
|
@@ -138,35 +302,31 @@ def sync_checkboxes(source):
|
|
138 |
# other-conserved on <-> gap 3 on
|
139 |
elif source == "gap_code3":
|
140 |
st.session_state["statusother-conserved"] = st.session_state.gap_code3
|
141 |
-
rerun_needed = True
|
142 |
elif source == "statusother-conserved":
|
143 |
if "gap_code3" in st.session_state and st.session_state["statusother-conserved"] != st.session_state.gap_code3:
|
144 |
st.session_state.gap_code3 = st.session_state["statusother-conserved"]
|
145 |
-
rerun_needed = True # Ensure UI updates
|
146 |
|
147 |
# unknown on <-> gap 4 on
|
148 |
elif source == "gap_code4":
|
149 |
st.session_state.statusunknown = st.session_state.gap_code4
|
150 |
-
|
151 |
elif source == "statusunknown":
|
152 |
if "gap_code4" in st.session_state and st.session_state.statusunknown != st.session_state.gap_code4:
|
153 |
st.session_state.gap_code4 = st.session_state.statusunknown
|
154 |
-
rerun_needed = True
|
155 |
|
156 |
# non-conserved on <-> gap 0
|
157 |
elif source == "gap_code0":
|
158 |
st.session_state['statusnon-conserved'] = st.session_state.gap_code0
|
159 |
-
|
160 |
elif source == "statusnon-conserved":
|
161 |
if "gap_code0" in st.session_state and st.session_state['statusnon-conserved'] != st.session_state.gap_code0:
|
162 |
st.session_state.gap_code0 = st.session_state['statusnon-conserved']
|
163 |
-
rerun_needed = True
|
164 |
|
165 |
|
166 |
-
def getButtons(style_options, style_choice,
|
167 |
column = style_options[style_choice]['property']
|
168 |
opts = [style[0] for style in style_options[style_choice]['stops']]
|
169 |
-
|
170 |
buttons = {}
|
171 |
for name in opts:
|
172 |
key = column + str(name)
|
@@ -185,15 +345,32 @@ def getColorVals(style_options, style_choice):
|
|
185 |
return d
|
186 |
|
187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
|
189 |
filters = []
|
190 |
for col, val in zip(filter_cols, filter_vals):
|
191 |
filters.append(["match", ["get", col], val, True, False])
|
192 |
combined_filters = ["all"] + filters
|
193 |
-
|
194 |
if "non-conserved" in list(chain.from_iterable(filter_vals)):
|
195 |
combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"],True, False]]
|
196 |
-
|
197 |
style = {
|
198 |
"version": 8,
|
199 |
"sources": {
|
@@ -244,3 +421,57 @@ def get_pmtiles_style_llm(paint, ids):
|
|
244 |
]
|
245 |
}
|
246 |
return style
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
from variables import *
|
20 |
|
21 |
+
def colorTable(select_colors,color_choice,column):
|
22 |
+
colors = (ibis
|
23 |
+
.memtable(select_colors[color_choice], columns=[column, "color"])
|
24 |
+
.to_pandas()
|
25 |
+
)
|
26 |
+
return colors
|
27 |
+
|
28 |
+
def get_summary(ca, combined_filter, column, main_group, colors=None):
|
29 |
+
df = ca.filter(combined_filter)
|
30 |
+
#total acres for each group
|
31 |
+
group_totals = df.group_by(main_group).aggregate(total_acres=_.acres.sum())
|
32 |
df = ca.filter(combined_filter)
|
33 |
df = (df
|
34 |
.group_by(*column) # unpack the list for grouping
|
35 |
+
.aggregate(percent_CA=100 * _.acres.sum() / ca_area_acres,
|
36 |
+
acres = _.acres.sum(),
|
37 |
mean_richness = (_.richness * _.acres).sum() / _.acres.sum(),
|
38 |
mean_rsr = (_.rsr * _.acres).sum() / _.acres.sum(),
|
39 |
mean_irrecoverable_carbon = (_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
|
|
|
43 |
mean_disadvantaged = (_.disadvantaged_communities * _.acres).sum() / _.acres.sum(),
|
44 |
mean_svi = (_.svi * _.acres).sum() / _.acres.sum(),
|
45 |
)
|
46 |
+
.mutate(percent_CA=_.percent_CA.round(1),
|
47 |
+
acres=_.acres.round(1))
|
48 |
)
|
49 |
+
|
50 |
+
df = df.inner_join(group_totals, main_group)
|
51 |
+
df = df.mutate(percent_group=(100 * _.acres / _.total_acres).round(1))
|
52 |
if colors is not None and not colors.empty: #only the df will have colors, df_tab doesn't since we are printing it.
|
53 |
+
df = df.inner_join(colors, column[-1])
|
54 |
df = df.cast({col: "string" for col in column})
|
55 |
df = df.to_pandas()
|
56 |
return df
|
57 |
+
|
58 |
|
59 |
+
def summary_table(ca, column, select_colors, color_choice, filter_cols, filter_vals,colorby_vals): # get df for charts + df_tab for printed table
|
60 |
+
colors = colorTable(select_colors,color_choice,column)
|
61 |
filters = []
|
62 |
if filter_cols and filter_vals: #if a filter is selected, add to list of filters
|
63 |
for filter_col, filter_val in zip(filter_cols, filter_vals):
|
|
|
67 |
filters.append(getattr(_, filter_col) == filter_val[0])
|
68 |
if column not in filter_cols: #show color_by column in table by adding it as a filter (if it's not already a filter)
|
69 |
filter_cols.append(column)
|
70 |
+
filters.append(getattr(_, column).isin(colorby_vals[column]))
|
71 |
combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
|
72 |
+
|
73 |
+
df_percent = get_summary(ca, combined_filter, [column],column, colors) # df used for percentage, excludes non-conserved.
|
74 |
+
df_tab = get_summary(ca, combined_filter, filter_cols, column, colors = None) #df used for printed table
|
75 |
+
if column == "status": #need to include non-conserved in summary stats
|
76 |
+
combined_filter = (combined_filter) | (_.status.isin(['30x30-conserved','other-conserved','unknown','non-conserved']))
|
77 |
+
df = get_summary(ca, combined_filter, [column], column, colors) # df used for charts
|
78 |
|
79 |
+
df_bar_30x30 = None # no stacked charts if we have status/gap_code
|
80 |
+
if column not in ["status","gap_code"]: # df for stacked 30x30 status bar chart
|
81 |
+
colors = colorTable(select_colors,"30x30 Status",'status')
|
82 |
+
combined_filter_status = (combined_filter) | (_.status.isin(['30x30-conserved','other-conserved','unknown','non-conserved']))
|
83 |
+
df_bar_30x30 = get_summary(ca, combined_filter_status, [column, 'status'], column, colors) # df used for charts
|
84 |
+
return df, df_tab, df_percent, df_bar_30x30
|
85 |
|
|
|
|
|
86 |
|
|
|
87 |
|
88 |
+
def get_hex(df, color,sort_order):
|
89 |
+
return list(df.drop_duplicates(subset=color, keep="first")
|
90 |
+
.set_index(color)
|
91 |
+
.reindex(sort_order)
|
92 |
+
.dropna()["color"])
|
93 |
+
|
94 |
|
95 |
+
def stacked_bar(df, x, y, color, title):
|
96 |
+
# bar order
|
97 |
+
if x == "established": # order labels in chronological order, not alphabetic.
|
98 |
+
sort = '-x'
|
99 |
+
elif x == "access_type": #order based on levels of openness
|
100 |
+
sort=['Open', 'Restricted', 'No Public', "Unknown"]
|
101 |
+
elif x == "manager_type":
|
102 |
+
sort = ["Federal","Tribal","State","Special District", "County", "City", "HOA","Joint","Non Profit","Private","Unknown"]
|
103 |
+
elif x == "status":
|
104 |
+
sort = ["30x30-conserved","other-conserved","unknown","non-conserved"]
|
105 |
+
elif x == "ecoregion":
|
106 |
+
sort = ['SE. Great Basin','Mojave Desert','Sonoran Desert','Sierra Nevada','SoCal Mountains & Valleys','Mono',
|
107 |
+
'Central CA Coast','Klamath Mountains','NorCal Coast','NorCal Coast Ranges',
|
108 |
+
'NW. Basin & Range','Colorado Desert','Central Valley Coast Ranges','SoCal Coast',
|
109 |
+
'Sierra Nevada Foothills','Southern Cascades','Modoc Plateau','Great Valley (North)','NorCal Interior Coast Ranges',
|
110 |
+
'Great Valley (South)']
|
111 |
+
else:
|
112 |
+
sort = 'x'
|
113 |
|
114 |
+
# label order
|
115 |
+
if x == "manager_type": #labels are too long, making vertical
|
116 |
+
angle = 270
|
117 |
+
height = 373
|
118 |
+
elif x == 'ecoregion': # make labels vertical and figure taller
|
119 |
+
angle = 270
|
120 |
+
height = 430
|
121 |
+
else: #other labels are horizontal
|
122 |
+
angle = 0
|
123 |
+
height = 310
|
124 |
+
|
125 |
+
# stacked bar order
|
126 |
+
sort_order = ['30x30-conserved', 'other-conserved', 'unknown', 'non-conserved']
|
127 |
+
y_titles = {
|
128 |
+
'ecoregion': 'Ecoregion (%)',
|
129 |
+
'established': 'Year (%)',
|
130 |
+
'manager_type': 'Manager Type (%)',
|
131 |
+
'easement': 'Easement (%)',
|
132 |
+
'access_type': 'Access (%)'
|
133 |
+
}
|
134 |
+
ytitle = y_titles.get(x, y) # Default to `y` if not in the dictionary
|
135 |
+
color_hex = get_hex(df[[color, 'color']], color, sort_order)
|
136 |
+
sort_order = sort_order[0:len(color_hex)]
|
137 |
+
df["stack_order"] = df[color].apply(lambda val: sort_order.index(val) if val in sort_order else len(sort_order))
|
138 |
+
|
139 |
+
if x == "ecoregion":
|
140 |
+
label_transform = (
|
141 |
+
"replace("
|
142 |
+
"replace("
|
143 |
+
"replace("
|
144 |
+
"replace("
|
145 |
+
"replace("
|
146 |
+
"replace(datum.ecoregion, 'Northern California', 'NorCal'),"
|
147 |
+
"'Southern California', 'SoCal'),"
|
148 |
+
"'Southeastern', 'SE.'),"
|
149 |
+
"'Northwestern', 'NW.'),"
|
150 |
+
"'and', '&'),"
|
151 |
+
"'California', 'CA')"
|
152 |
+
)
|
153 |
+
else:
|
154 |
+
label_transform = f"datum.{x}" # Default label transformation
|
155 |
+
|
156 |
+
chart = alt.Chart(df).mark_bar().transform_calculate(
|
157 |
+
label=label_transform
|
158 |
+
).encode(
|
159 |
+
x=alt.X("label:N", sort = sort, title=None, axis=alt.Axis(labelLimit=150, labelAngle=angle)), # Shorten axis labels
|
160 |
+
y=alt.Y(y, title=ytitle).scale(domain=(0,100)),
|
161 |
+
color=alt.Color(
|
162 |
+
color,
|
163 |
+
sort=sort_order, # Controls legend order
|
164 |
+
scale=alt.Scale(domain=sort_order, range=color_hex)
|
165 |
+
),
|
166 |
+
order=alt.Order(
|
167 |
+
"stack_order:Q",
|
168 |
+
sort="ascending"
|
169 |
+
),
|
170 |
+
tooltip=[
|
171 |
+
alt.Tooltip("label", type="nominal"), # Use transformed label
|
172 |
+
alt.Tooltip("percent_CA", type="quantitative", format=",.2f"),
|
173 |
+
alt.Tooltip("percent_group", type="quantitative", format=",.2f"),
|
174 |
+
alt.Tooltip("acres", type="quantitative", format=",.0f"),
|
175 |
+
]
|
176 |
+
).configure_legend(
|
177 |
+
direction = 'horizontal',
|
178 |
+
orient='top',
|
179 |
+
columns = 3,
|
180 |
+
title = None,
|
181 |
+
labelOffset = 2,
|
182 |
+
offset = 10
|
183 |
+
).properties(width="container", height=height, title=title
|
184 |
+
).configure_title(fontSize=18, align = "center",anchor='middle',offset = 10)
|
185 |
+
return chart
|
186 |
|
187 |
|
188 |
+
def area_plot(df, column): # Percent protected pie chart
|
189 |
base = alt.Chart(df).encode(
|
190 |
+
alt.Theta("percent_CA:Q").stack(True),
|
191 |
)
|
192 |
+
pie = (
|
193 |
+
base
|
194 |
+
.mark_arc(innerRadius=40, outerRadius=100, stroke="black", strokeWidth=0.5)
|
195 |
+
.encode(
|
196 |
+
alt.Color("color:N").scale(None).legend(None),
|
197 |
+
tooltip=[
|
198 |
+
alt.Tooltip(column, type="nominal"),
|
199 |
+
alt.Tooltip("percent_CA", type="quantitative", format=",.2f"),
|
200 |
+
alt.Tooltip("acres", type="quantitative", format=",.0f"),
|
201 |
+
]
|
202 |
+
)
|
203 |
)
|
204 |
+
text = (
|
205 |
+
base
|
206 |
+
.mark_text(radius=80, size=14, color="white")
|
207 |
+
.encode(text=column + ":N")
|
208 |
)
|
209 |
+
plot = pie # pie + text
|
210 |
return plot.properties(width="container", height=290)
|
211 |
|
212 |
|
213 |
+
|
214 |
def bar_chart(df, x, y, title): #display summary stats for color_by column
|
215 |
#axis label angles / chart size
|
216 |
+
if x == "manager_type": #labels are too long, making vertical
|
217 |
angle = 270
|
218 |
height = 373
|
219 |
elif x == 'ecoregion': # make labels vertical and figure taller
|
|
|
224 |
height = 310
|
225 |
|
226 |
# order of bars
|
227 |
+
sort = 'x'
|
228 |
+
lineBreak = ''
|
229 |
if x == "established": # order labels in chronological order, not alphabetic.
|
230 |
sort = '-x'
|
231 |
elif x == "access_type": #order based on levels of openness
|
232 |
sort=['Open', 'Restricted', 'No Public', "Unknown"]
|
233 |
elif x == "manager_type":
|
234 |
sort = ["Federal","Tribal","State","Special District", "County", "City", "HOA","Joint","Non Profit","Private","Unknown"]
|
235 |
+
elif x == "ecoregion":
|
236 |
+
sort = ['SE. Great Basin','Mojave Desert','Sonoran Desert','Sierra Nevada','SoCal Mountains & Valleys','Mono',
|
237 |
+
'Central CA Coast','Klamath Mountains','NorCal Coast','NorCal Coast Ranges',
|
238 |
+
'NW. Basin & Range','Colorado Desert','Central Valley Coast Ranges','SoCal Coast',
|
239 |
+
'Sierra Nevada Foothills','Southern Cascades','Modoc Plateau','Great Valley (North)','NorCal Interior Coast Ranges',
|
240 |
+
'Great Valley (South)']
|
241 |
+
elif x == "status":
|
242 |
+
sort = ["30x30-conserved","other-conserved","unknown","non-conserved"]
|
243 |
+
lineBreak = '-'
|
244 |
|
245 |
# modify label names in bar chart to fit in frame
|
246 |
label_transform = f"datum.{x}" # default; no change
|
247 |
if x == "access_type":
|
248 |
label_transform = f"replace(datum.{x}, ' Access', '')" #omit 'access' from access_type
|
249 |
elif x == "ecoregion":
|
250 |
+
label_transform = (
|
251 |
+
"replace("
|
252 |
+
"replace("
|
253 |
+
"replace("
|
254 |
+
"replace("
|
255 |
+
"replace("
|
256 |
+
"replace(datum.ecoregion, 'Northern California', 'NorCal'),"
|
257 |
+
"'Southern California', 'SoCal'),"
|
258 |
+
"'Southeastern', 'SE.'),"
|
259 |
+
"'Northwestern', 'NW.'),"
|
260 |
+
"'and', '&'),"
|
261 |
+
"'California', 'CA')"
|
262 |
+
)
|
263 |
+
y_titles = {
|
264 |
+
'mean_richness': 'Richness (Mean)',
|
265 |
+
'mean_rsr': 'Range-Size Rarity (Mean)',
|
266 |
+
'mean_irrecoverable_carbon': 'Irrecoverable Carbon (Mean)',
|
267 |
+
'mean_manageable_carbon': 'Manageable Carbon (Mean)',
|
268 |
+
'mean_disadvantaged': 'Disadvantaged (Mean)',
|
269 |
+
'mean_svi': 'SVI (Mean)',
|
270 |
+
'mean_fire': 'Fire (Mean)',
|
271 |
+
'mean_rxburn': 'Rx Fire (Mean)'
|
272 |
+
}
|
273 |
+
ytitle = y_titles.get(y, y) # Default to `y` if not in the dictionary
|
274 |
+
|
275 |
x_title = next(key for key, value in select_column.items() if value == x)
|
276 |
chart = alt.Chart(df).mark_bar(stroke = 'black', strokeWidth = .5).transform_calculate(
|
277 |
+
label=label_transform
|
278 |
).encode(
|
279 |
x=alt.X("label:N",
|
280 |
axis=alt.Axis(labelAngle=angle, title=x_title, labelLimit = 200),
|
281 |
+
sort=sort),
|
282 |
+
y=alt.Y(y, axis=alt.Axis(title = ytitle)),
|
283 |
color=alt.Color('color').scale(None),
|
284 |
+
).configure(lineBreak = lineBreak)
|
285 |
+
|
286 |
+
chart = chart.properties(width="container", height=height, title = title
|
287 |
+
).configure_title(fontSize=18, align = "center",anchor='middle')
|
288 |
return chart
|
289 |
|
290 |
+
|
291 |
+
|
292 |
def sync_checkboxes(source):
|
293 |
# gap 1 and gap 2 on -> 30x30-conserved on
|
294 |
if source in ["gap_code1", "gap_code2"]:
|
|
|
302 |
# other-conserved on <-> gap 3 on
|
303 |
elif source == "gap_code3":
|
304 |
st.session_state["statusother-conserved"] = st.session_state.gap_code3
|
|
|
305 |
elif source == "statusother-conserved":
|
306 |
if "gap_code3" in st.session_state and st.session_state["statusother-conserved"] != st.session_state.gap_code3:
|
307 |
st.session_state.gap_code3 = st.session_state["statusother-conserved"]
|
|
|
308 |
|
309 |
# unknown on <-> gap 4 on
|
310 |
elif source == "gap_code4":
|
311 |
st.session_state.statusunknown = st.session_state.gap_code4
|
312 |
+
|
313 |
elif source == "statusunknown":
|
314 |
if "gap_code4" in st.session_state and st.session_state.statusunknown != st.session_state.gap_code4:
|
315 |
st.session_state.gap_code4 = st.session_state.statusunknown
|
|
|
316 |
|
317 |
# non-conserved on <-> gap 0
|
318 |
elif source == "gap_code0":
|
319 |
st.session_state['statusnon-conserved'] = st.session_state.gap_code0
|
320 |
+
|
321 |
elif source == "statusnon-conserved":
|
322 |
if "gap_code0" in st.session_state and st.session_state['statusnon-conserved'] != st.session_state.gap_code0:
|
323 |
st.session_state.gap_code0 = st.session_state['statusnon-conserved']
|
|
|
324 |
|
325 |
|
326 |
+
def getButtons(style_options, style_choice, default_boxes=None):
|
327 |
column = style_options[style_choice]['property']
|
328 |
opts = [style[0] for style in style_options[style_choice]['stops']]
|
329 |
+
default_boxes = default_boxes or {}
|
330 |
buttons = {}
|
331 |
for name in opts:
|
332 |
key = column + str(name)
|
|
|
345 |
return d
|
346 |
|
347 |
|
348 |
+
def getLegend(style_options, color_choice):
|
349 |
+
legend = {cat: color for cat, color in style_options[color_choice]['stops']}
|
350 |
+
position = 'bottom-left'
|
351 |
+
fontsize = 15
|
352 |
+
bg_color = 'white'
|
353 |
+
# shorten legend for ecoregions
|
354 |
+
if color_choice == "Ecoregion":
|
355 |
+
legend = {key.replace("Northern California", "NorCal"): value for key, value in legend.items()}
|
356 |
+
legend = {key.replace("Southern California", "SoCal"): value for key, value in legend.items()}
|
357 |
+
legend = {key.replace("Southeastern", "SE."): value for key, value in legend.items()}
|
358 |
+
legend = {key.replace("and", "&"): value for key, value in legend.items()}
|
359 |
+
legend = {key.replace("California", "CA"): value for key, value in legend.items()}
|
360 |
+
legend = {key.replace("Northwestern", "NW."): value for key, value in legend.items()}
|
361 |
+
bg_color = 'rgba(255, 255, 255, 0.6)'
|
362 |
+
fontsize = 12
|
363 |
+
return legend, position, bg_color, fontsize
|
364 |
+
|
365 |
+
|
366 |
+
|
367 |
def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
|
368 |
filters = []
|
369 |
for col, val in zip(filter_cols, filter_vals):
|
370 |
filters.append(["match", ["get", col], val, True, False])
|
371 |
combined_filters = ["all"] + filters
|
|
|
372 |
if "non-conserved" in list(chain.from_iterable(filter_vals)):
|
373 |
combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"],True, False]]
|
|
|
374 |
style = {
|
375 |
"version": 8,
|
376 |
"sources": {
|
|
|
421 |
]
|
422 |
}
|
423 |
return style
|
424 |
+
|
425 |
+
def run_sql(query,color_choice):
|
426 |
+
"""
|
427 |
+
Filter data based on an LLM-generated SQL query and return matching IDs.
|
428 |
+
|
429 |
+
Args:
|
430 |
+
query (str): The natural language query to filter the data.
|
431 |
+
color_choice (str): The column used for plotting.
|
432 |
+
"""
|
433 |
+
output = few_shot_structured_llm.invoke(query)
|
434 |
+
sql_query = output.sql_query
|
435 |
+
explanation =output.explanation
|
436 |
+
|
437 |
+
if not sql_query: # if the chatbot can't generate a SQL query.
|
438 |
+
st.success(explanation)
|
439 |
+
return pd.DataFrame({'id' : []})
|
440 |
+
|
441 |
+
result = ca.sql(sql_query).execute()
|
442 |
+
if result.empty :
|
443 |
+
explanation = "This query did not return any results. Please try again with a different query."
|
444 |
+
st.warning(explanation, icon="⚠️")
|
445 |
+
st.caption("SQL Query:")
|
446 |
+
st.code(sql_query,language = "sql")
|
447 |
+
if 'geom' in result.columns:
|
448 |
+
return result.drop('geom',axis = 1)
|
449 |
+
else:
|
450 |
+
return result
|
451 |
+
|
452 |
+
elif ("id" and "geom" in result.columns):
|
453 |
+
style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
|
454 |
+
legend, position, bg_color, fontsize = getLegend(style_options,color_choice)
|
455 |
+
|
456 |
+
m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
|
457 |
+
m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
|
458 |
+
m.fit_bounds(result.total_bounds.tolist())
|
459 |
+
result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
|
460 |
+
else:
|
461 |
+
st.write(result) # if we aren't mapping, just print out the data
|
462 |
+
|
463 |
+
with st.popover("Explanation"):
|
464 |
+
st.write(explanation)
|
465 |
+
st.caption("SQL Query:")
|
466 |
+
st.code(sql_query,language = "sql")
|
467 |
+
|
468 |
+
return result
|
469 |
+
|
470 |
+
|
471 |
+
|
472 |
+
def summary_table_sql(ca, column, colors, ids): # get df for charts + df_tab for printed table
|
473 |
+
filters = [_.id.isin(ids)]
|
474 |
+
combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
|
475 |
+
df = get_summary(ca, combined_filter, [column], colors) # df used for charts
|
476 |
+
return df
|
477 |
+
|
app/variables.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
# urls for main layer
|
2 |
-
ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/
|
3 |
-
ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/
|
4 |
-
|
5 |
|
6 |
ca_area_acres = 1.014e8 #acres
|
|
|
7 |
style_choice = "GAP Status Code"
|
8 |
|
9 |
# urls for additional data layers
|
@@ -38,7 +38,7 @@ svi_color = "#1bc7c3" #cyan
|
|
38 |
white = "#FFFFFF"
|
39 |
|
40 |
# gap codes 3 and 4 are off by default.
|
41 |
-
|
42 |
0: False,
|
43 |
3: False,
|
44 |
4: False,
|
@@ -119,6 +119,8 @@ status = {
|
|
119 |
['other-conserved', "#b6ce7a"],
|
120 |
['unknown', "#e5efdb"],
|
121 |
['non-conserved', "#e1e1e1"]
|
|
|
|
|
122 |
],
|
123 |
}
|
124 |
|
@@ -153,9 +155,9 @@ ecoregion = {
|
|
153 |
}
|
154 |
|
155 |
style_options = {
|
156 |
-
"Year": year,
|
157 |
"30x30 Status": status,
|
158 |
"GAP Code": gap,
|
|
|
159 |
"Ecoregion": ecoregion,
|
160 |
"Manager Type": manager,
|
161 |
"Easement": easement,
|
@@ -262,9 +264,9 @@ svi_style = {
|
|
262 |
|
263 |
|
264 |
select_column = {
|
265 |
-
"Year": "established",
|
266 |
"30x30 Status": "status",
|
267 |
"GAP Code": "gap_code",
|
|
|
268 |
"Ecoregion": "ecoregion",
|
269 |
"Manager Type": "manager_type",
|
270 |
"Easement": "easement",
|
|
|
1 |
# urls for main layer
|
2 |
+
ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/65eb463312262e50d51f5c07bfad7568152803b0/ca-30x30.parquet"
|
3 |
+
ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/fc4dc523c27a8133452bb7596e3e520fda24eccd/ca-30x30.pmtiles"
|
|
|
4 |
|
5 |
ca_area_acres = 1.014e8 #acres
|
6 |
+
# ca_area_acres = 103179953.76086558
|
7 |
style_choice = "GAP Status Code"
|
8 |
|
9 |
# urls for additional data layers
|
|
|
38 |
white = "#FFFFFF"
|
39 |
|
40 |
# gap codes 3 and 4 are off by default.
|
41 |
+
default_boxes = {
|
42 |
0: False,
|
43 |
3: False,
|
44 |
4: False,
|
|
|
119 |
['other-conserved', "#b6ce7a"],
|
120 |
['unknown', "#e5efdb"],
|
121 |
['non-conserved', "#e1e1e1"]
|
122 |
+
# ['non-conserved', white]
|
123 |
+
|
124 |
],
|
125 |
}
|
126 |
|
|
|
155 |
}
|
156 |
|
157 |
style_options = {
|
|
|
158 |
"30x30 Status": status,
|
159 |
"GAP Code": gap,
|
160 |
+
"Year": year,
|
161 |
"Ecoregion": ecoregion,
|
162 |
"Manager Type": manager,
|
163 |
"Easement": easement,
|
|
|
264 |
|
265 |
|
266 |
select_column = {
|
|
|
267 |
"30x30 Status": "status",
|
268 |
"GAP Code": "gap_code",
|
269 |
+
"Year": "established",
|
270 |
"Ecoregion": "ecoregion",
|
271 |
"Manager Type": "manager_type",
|
272 |
"Easement": "easement",
|
preprocess/preprocess.ipynb
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
-
"execution_count":
|
14 |
"id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
|
15 |
"metadata": {
|
16 |
"editable": true,
|
@@ -182,25 +182,10 @@
|
|
182 |
},
|
183 |
{
|
184 |
"cell_type": "code",
|
185 |
-
"execution_count":
|
186 |
"id": "070bbdde-b141-4a63-8f8a-984dd01fd51a",
|
187 |
"metadata": {},
|
188 |
-
"outputs": [
|
189 |
-
{
|
190 |
-
"data": {
|
191 |
-
"application/vnd.jupyter.widget-view+json": {
|
192 |
-
"model_id": "3c217929b7744164a99f6e2314366359",
|
193 |
-
"version_major": 2,
|
194 |
-
"version_minor": 0
|
195 |
-
},
|
196 |
-
"text/plain": [
|
197 |
-
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
198 |
-
]
|
199 |
-
},
|
200 |
-
"metadata": {},
|
201 |
-
"output_type": "display_data"
|
202 |
-
}
|
203 |
-
],
|
204 |
"source": [
|
205 |
"con = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
|
206 |
"\n",
|
@@ -210,7 +195,7 @@
|
|
210 |
"con.create_table(\"eco\", eco.select(\"ECOREGION_\",\"geometry\"), overwrite = True)\n",
|
211 |
"con.create_table(\"non\", non, overwrite = True)\n",
|
212 |
"\n",
|
213 |
-
"#
|
214 |
"con.con.execute('''\n",
|
215 |
"CREATE TABLE non_conserved_eco AS\n",
|
216 |
"SELECT \n",
|
@@ -223,12 +208,12 @@
|
|
223 |
"WHERE ST_GeometryType(ST_Intersection(non.geom, eco.geometry)) IN ('POLYGON', 'MULTIPOLYGON');\n",
|
224 |
"''')\n",
|
225 |
"\n",
|
226 |
-
"\n",
|
227 |
"# save to parquet file so we don't have to run this again\n",
|
228 |
"non_eco = (con.table(\"non_conserved_eco\")\n",
|
229 |
" .drop('geom')\n",
|
230 |
" .rename(geom = \"geom_1\")\n",
|
231 |
-
" .mutate(geom = ST_MakeValid(_.geom))
|
|
|
232 |
" )\n",
|
233 |
"\n",
|
234 |
"non_conserved_eco = non_eco.execute()\n",
|
@@ -256,7 +241,7 @@
|
|
256 |
" .cast({\"geom\": \"geometry\"})\n",
|
257 |
" .mutate(established = ibis.null(), gap_code = 0, name = ibis.literal(\"Non-Conserved Areas\"),\n",
|
258 |
" access_type = ibis.null(), manager = ibis.null(), manager_type = ibis.null(),\n",
|
259 |
-
"
|
260 |
" status = ibis.literal(\"non-conserved\"),\n",
|
261 |
" acres = _.geom.area() / 4046.8564224 #convert sq meters to acres\n",
|
262 |
" )\n",
|
@@ -466,7 +451,7 @@
|
|
466 |
},
|
467 |
{
|
468 |
"cell_type": "code",
|
469 |
-
"execution_count":
|
470 |
"id": "aade11d9-87b9-403d-bad1-3069663807a9",
|
471 |
"metadata": {},
|
472 |
"outputs": [],
|
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
+
"execution_count": null,
|
14 |
"id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
|
15 |
"metadata": {
|
16 |
"editable": true,
|
|
|
182 |
},
|
183 |
{
|
184 |
"cell_type": "code",
|
185 |
+
"execution_count": null,
|
186 |
"id": "070bbdde-b141-4a63-8f8a-984dd01fd51a",
|
187 |
"metadata": {},
|
188 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
"source": [
|
190 |
"con = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
|
191 |
"\n",
|
|
|
195 |
"con.create_table(\"eco\", eco.select(\"ECOREGION_\",\"geometry\"), overwrite = True)\n",
|
196 |
"con.create_table(\"non\", non, overwrite = True)\n",
|
197 |
"\n",
|
198 |
+
"#split up the non-conserved areas by ecoregions\n",
|
199 |
"con.con.execute('''\n",
|
200 |
"CREATE TABLE non_conserved_eco AS\n",
|
201 |
"SELECT \n",
|
|
|
208 |
"WHERE ST_GeometryType(ST_Intersection(non.geom, eco.geometry)) IN ('POLYGON', 'MULTIPOLYGON');\n",
|
209 |
"''')\n",
|
210 |
"\n",
|
|
|
211 |
"# save to parquet file so we don't have to run this again\n",
|
212 |
"non_eco = (con.table(\"non_conserved_eco\")\n",
|
213 |
" .drop('geom')\n",
|
214 |
" .rename(geom = \"geom_1\")\n",
|
215 |
+
" .mutate(geom = ST_MakeValid(_.geom))\n",
|
216 |
+
" .mutate(id=ibis.row_number().over())\n",
|
217 |
" )\n",
|
218 |
"\n",
|
219 |
"non_conserved_eco = non_eco.execute()\n",
|
|
|
241 |
" .cast({\"geom\": \"geometry\"})\n",
|
242 |
" .mutate(established = ibis.null(), gap_code = 0, name = ibis.literal(\"Non-Conserved Areas\"),\n",
|
243 |
" access_type = ibis.null(), manager = ibis.null(), manager_type = ibis.null(),\n",
|
244 |
+
" easement = ibis.null(), type = ibis.literal(\"Land\"),\n",
|
245 |
" status = ibis.literal(\"non-conserved\"),\n",
|
246 |
" acres = _.geom.area() / 4046.8564224 #convert sq meters to acres\n",
|
247 |
" )\n",
|
|
|
451 |
},
|
452 |
{
|
453 |
"cell_type": "code",
|
454 |
+
"execution_count": null,
|
455 |
"id": "aade11d9-87b9-403d-bad1-3069663807a9",
|
456 |
"metadata": {},
|
457 |
"outputs": [],
|