cassiebuhler commited on
Commit
593d846
·
1 Parent(s): 56784d5

wip - charts

Browse files

added charts grouped by status, fixed non-conserved data (needed a unique id), fixed chatbot for fire risk.

Need to fix: 1) SE Great Basin is missing for non-conserved areas, 2) 30x30 status chart doesn't dynamically change and 3) implement ecoregion colors into labels for 30x30 status charts.

Files changed (5) hide show
  1. app/app.py +25 -118
  2. app/system_prompt.txt +9 -1
  3. app/utils.py +273 -42
  4. app/variables.py +8 -6
  5. preprocess/preprocess.ipynb +8 -23
app/app.py CHANGED
@@ -26,10 +26,9 @@ if "mydata" not in set(current_tables):
26
  tbl = con.read_parquet(ca_parquet)
27
  con.create_table("mydata", tbl)
28
 
29
-
30
  ca = con.table("mydata")
31
 
32
-
33
  for key in [
34
  'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
35
  'fire', 'rxburn', 'disadvantaged_communities',
@@ -37,6 +36,13 @@ for key in [
37
  if key not in st.session_state:
38
  st.session_state[key] = False
39
 
 
 
 
 
 
 
 
40
  st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
41
 
42
  #customizing style with CSS
@@ -128,8 +134,6 @@ m = leafmap.Map(style="positron")
128
  #############
129
 
130
 
131
-
132
-
133
  ##### Chatbot stuff
134
 
135
 
@@ -160,98 +164,22 @@ prompt = ChatPromptTemplate.from_messages([
160
  structured_llm = llm.with_structured_output(SQLResponse)
161
  few_shot_structured_llm = prompt | structured_llm
162
 
163
- # @st.cache_data(ttl=600) # Cache expires every 10 minutes
164
- def run_sql(query,color_choice):
165
- """
166
- Filter data based on an LLM-generated SQL query and return matching IDs.
167
-
168
- Args:
169
- query (str): The natural language query to filter the data.
170
- color_choice (str): The column used for plotting.
171
- """
172
- output = few_shot_structured_llm.invoke(query)
173
- sql_query = output.sql_query
174
- explanation =output.explanation
175
-
176
- if not sql_query: # if the chatbot can't generate a SQL query.
177
- st.success(explanation)
178
- return pd.DataFrame({'id' : []})
179
-
180
- result = ca.sql(sql_query).execute()
181
- if result.empty :
182
- explanation = "This query did not return any results. Please try again with a different query."
183
- st.warning(explanation, icon="⚠️")
184
- st.caption("SQL Query:")
185
- st.code(sql_query,language = "sql")
186
- if 'geom' in result.columns:
187
- return result.drop('geom',axis = 1)
188
- else:
189
- return result
190
-
191
- elif ("id" and "geom" in result.columns):
192
- style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
193
- legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
194
- position = 'bottom-left'
195
- fontsize = 15
196
- bg_color = 'white'
197
-
198
- # shorten legend for ecoregions
199
- if color_choice == "Ecoregion":
200
- legend_d = {key.replace("Northern California", "NorCal"): value for key, value in legend_d.items()}
201
- legend_d = {key.replace("Southern California", "SoCal"): value for key, value in legend_d.items()}
202
- legend_d = {key.replace("Southeastern", "SE."): value for key, value in legend_d.items()}
203
- legend_d = {key.replace("and", "&"): value for key, value in legend_d.items()}
204
- legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
205
- legend_d = {key.replace("Northwestern", "NW."): value for key, value in legend_d.items()}
206
- bg_color = 'rgba(255, 255, 255, 0.6)'
207
- fontsize = 12
208
-
209
- m.add_legend(legend_dict = legend_d, position = position, bg_color = bg_color, fontsize = fontsize)
210
- m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
211
- m.fit_bounds(result.total_bounds.tolist())
212
- result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
213
- else:
214
-
215
- st.write(result) # if we aren't mapping, just print out the data
216
-
217
- with st.popover("Explanation"):
218
- st.write(explanation)
219
- st.caption("SQL Query:")
220
- st.code(sql_query,language = "sql")
221
-
222
- return result
223
-
224
-
225
-
226
- def summary_table_sql(ca, column, colors, ids): # get df for charts + df_tab for printed table
227
- filters = [_.id.isin(ids)]
228
- combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
229
- df = get_summary(ca, combined_filter, [column], colors) # df used for charts
230
- return df
231
-
232
-
233
  chatbot_toggles = {key: False for key in [
234
  'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
235
  'fire', 'rxburn', 'disadvantaged_communities',
236
  'svi',
237
  ]}
238
-
239
-
240
-
241
  #############
242
 
243
-
244
  filters = {}
245
 
246
  with st.sidebar:
247
-
248
- color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
249
  colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
250
  alpha = 0.8
251
  st.divider()
252
 
253
 
254
-
255
  ##### Chatbot
256
  with st.container():
257
 
@@ -271,8 +199,8 @@ with st.container():
271
  '''
272
  Exploratory data queries:
273
  - What is a GAP code?
 
274
  - What is the total acreage of areas designated as easements?
275
- - Which GAP code has been impacted the most by fire?
276
  - Who manages the land with the highest amount of irrecoverable carbon and highest social vulnerability index?
277
  '''
278
 
@@ -332,7 +260,6 @@ with st.sidebar:
332
 
333
 
334
  # People Section
335
-
336
  with st.expander("👤 People"):
337
  a_people = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
338
  show_justice40 = st.toggle("Disadvantaged Communities (Justice40)", key = "disadvantaged_communities", value=chatbot_toggles['disadvantaged_communities'])
@@ -361,15 +288,11 @@ with st.sidebar:
361
 
362
  st.divider()
363
  st.markdown('<p class = "medium-font-sidebar"> Filters:</p>', help = "Apply filters to adjust what data is shown on the map.", unsafe_allow_html= True)
364
- for col,val in style_options.items():
365
- for name in val['stops'][0]:
366
- key = val['property']+str(name)
367
- st.session_state[key] = default_gap.get(name, True)
368
 
369
  for label in style_options: # get selected filters (based on the buttons selected)
370
  with st.expander(label):
371
  if label in ["GAP Code","30x30 Status"]: # gap code 1 and 2 are on by default
372
- opts = getButtons(style_options, label, default_gap)
373
  else: # other buttons are not on by default.
374
  opts = getButtons(style_options, label)
375
  filters.update(opts)
@@ -383,42 +306,25 @@ with st.sidebar:
383
  filter_vals = []
384
 
385
  st.divider()
 
386
  st.markdown("""
387
  <p class="medium-font-sidebar">
388
  <svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' class='bi bi-github ' style='height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;margin-right:4px;' aria-hidden='true' role='img'><path d='M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z'></path></svg>Source Code: </p> <a href='https://github.com/boettiger-lab/ca-30x30' target='_blank'>https://github.com/boettiger-lab/ca-30x30</a>
389
- """, unsafe_allow_html=True)# adding github logo
390
 
391
  # Display CA 30x30 Data
392
  if 'out' not in locals():
393
  style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
394
- legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
395
- position = 'bottom-left'
396
- fontsize = 15
397
- bg_color = 'white'
398
-
399
- # shorten legend for ecoregions
400
- if color_choice == "Ecoregion":
401
- legend_d = {key.replace("Northern California", "NorCal"): value for key, value in legend_d.items()}
402
- legend_d = {key.replace("Southern California", "SoCal"): value for key, value in legend_d.items()}
403
- legend_d = {key.replace("Southeastern", "SE."): value for key, value in legend_d.items()}
404
- legend_d = {key.replace("and", "&"): value for key, value in legend_d.items()}
405
- legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
406
- legend_d = {key.replace("Northwestern", "NW."): value for key, value in legend_d.items()}
407
- bg_color = 'rgba(255, 255, 255, 0.6)'
408
- fontsize = 12
409
-
410
-
411
- m.add_legend(legend_dict = legend_d, position = position, bg_color = bg_color, fontsize = fontsize)
412
  m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds=True)
413
 
414
-
415
-
416
  column = select_column[color_choice]
417
 
418
  select_colors = {
419
- "Year": year["stops"],
420
- "GAP Code": gap["stops"],
421
  "30x30 Status": status["stops"],
 
 
422
  "Ecoregion": ecoregion["stops"],
423
  "Manager Type": manager["stops"],
424
  "Easement": easement["stops"],
@@ -435,12 +341,12 @@ colors = (
435
  # get summary tables used for charts + printed table
436
  # df - charts; df_tab - printed table (omits colors)
437
  if 'out' not in locals():
438
- df,df_tab,df_percent = summary_table(ca, column, colors, filter_cols, filter_vals, colorby_vals)
439
- total_percent = df_percent.percent_protected.sum().round(2)
440
 
441
  else:
442
  df = summary_table_sql(ca, column, colors, ids)
443
- total_percent = df.percent_protected.sum().round(2)
444
 
445
 
446
  # charts displayed based on color_by variable
@@ -453,7 +359,6 @@ rx_10_chart = bar_chart(df, column, 'mean_rxburn',"Prescribed Burns (2013-2023)"
453
  justice40_chart = bar_chart(df, column, 'mean_disadvantaged', "Disadvantaged Communities (2021)")
454
  svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index (2022)")
455
 
456
-
457
  main = st.container()
458
 
459
  with main:
@@ -471,7 +376,11 @@ with main:
471
 
472
  st.markdown(f"{total_percent}% CA Covered", help = "Updates based on displayed data")
473
  st.altair_chart(area_plot(df, column), use_container_width=True)
474
-
 
 
 
 
475
  if show_richness:
476
  st.altair_chart(richness_chart, use_container_width=True)
477
 
@@ -496,8 +405,6 @@ with main:
496
  if show_rxburn:
497
  st.altair_chart(rx_10_chart, use_container_width=True)
498
 
499
-
500
-
501
 
502
  st.caption("***The label 'established' is inferred from the California Protected Areas Database, which may introduce artifacts. For details on our methodology, please refer to our code: https://github.com/boettiger-lab/ca-30x30.")
503
 
 
26
  tbl = con.read_parquet(ca_parquet)
27
  con.create_table("mydata", tbl)
28
 
 
29
  ca = con.table("mydata")
30
 
31
+ # session state for syncing app
32
  for key in [
33
  'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
34
  'fire', 'rxburn', 'disadvantaged_communities',
 
36
  if key not in st.session_state:
37
  st.session_state[key] = False
38
 
39
+ for col,val in style_options.items():
40
+ for name in val['stops']:
41
+ key = val['property']+str(name[0])
42
+ if key not in st.session_state:
43
+ st.session_state[key] = default_boxes.get(name[0], True)
44
+
45
+
46
  st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
47
 
48
  #customizing style with CSS
 
134
  #############
135
 
136
 
 
 
137
  ##### Chatbot stuff
138
 
139
 
 
164
  structured_llm = llm.with_structured_output(SQLResponse)
165
  few_shot_structured_llm = prompt | structured_llm
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  chatbot_toggles = {key: False for key in [
168
  'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
169
  'fire', 'rxburn', 'disadvantaged_communities',
170
  'svi',
171
  ]}
 
 
 
172
  #############
173
 
 
174
  filters = {}
175
 
176
  with st.sidebar:
177
+ color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
 
178
  colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
179
  alpha = 0.8
180
  st.divider()
181
 
182
 
 
183
  ##### Chatbot
184
  with st.container():
185
 
 
199
  '''
200
  Exploratory data queries:
201
  - What is a GAP code?
202
+ - What percentage of 30x30 conserved land has been impacted by wildfire?
203
  - What is the total acreage of areas designated as easements?
 
204
  - Who manages the land with the highest amount of irrecoverable carbon and highest social vulnerability index?
205
  '''
206
 
 
260
 
261
 
262
  # People Section
 
263
  with st.expander("👤 People"):
264
  a_people = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
265
  show_justice40 = st.toggle("Disadvantaged Communities (Justice40)", key = "disadvantaged_communities", value=chatbot_toggles['disadvantaged_communities'])
 
288
 
289
  st.divider()
290
  st.markdown('<p class = "medium-font-sidebar"> Filters:</p>', help = "Apply filters to adjust what data is shown on the map.", unsafe_allow_html= True)
 
 
 
 
291
 
292
  for label in style_options: # get selected filters (based on the buttons selected)
293
  with st.expander(label):
294
  if label in ["GAP Code","30x30 Status"]: # gap code 1 and 2 are on by default
295
+ opts = getButtons(style_options, label, default_boxes)
296
  else: # other buttons are not on by default.
297
  opts = getButtons(style_options, label)
298
  filters.update(opts)
 
306
  filter_vals = []
307
 
308
  st.divider()
309
+ # adding github logo
310
  st.markdown("""
311
  <p class="medium-font-sidebar">
312
  <svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' class='bi bi-github ' style='height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;margin-right:4px;' aria-hidden='true' role='img'><path d='M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z'></path></svg>Source Code: </p> <a href='https://github.com/boettiger-lab/ca-30x30' target='_blank'>https://github.com/boettiger-lab/ca-30x30</a>
313
+ """, unsafe_allow_html=True)
314
 
315
  # Display CA 30x30 Data
316
  if 'out' not in locals():
317
  style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
318
+ legend, position, bg_color, fontsize = getLegend(style_options, color_choice)
319
+ m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds=True)
321
 
 
 
322
  column = select_column[color_choice]
323
 
324
  select_colors = {
 
 
325
  "30x30 Status": status["stops"],
326
+ "GAP Code": gap["stops"],
327
+ "Year": year["stops"],
328
  "Ecoregion": ecoregion["stops"],
329
  "Manager Type": manager["stops"],
330
  "Easement": easement["stops"],
 
341
  # get summary tables used for charts + printed table
342
  # df - charts; df_tab - printed table (omits colors)
343
  if 'out' not in locals():
344
+ df, df_tab, df_percent, df_bar_30x30 = summary_table(ca, column, select_colors, color_choice, filter_cols, filter_vals,colorby_vals)
345
+ total_percent = df_percent.percent_CA.sum().round(2)
346
 
347
  else:
348
  df = summary_table_sql(ca, column, colors, ids)
349
+ total_percent = df.percent_CA.sum().round(2)
350
 
351
 
352
  # charts displayed based on color_by variable
 
359
  justice40_chart = bar_chart(df, column, 'mean_disadvantaged', "Disadvantaged Communities (2021)")
360
  svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index (2022)")
361
 
 
362
  main = st.container()
363
 
364
  with main:
 
376
 
377
  st.markdown(f"{total_percent}% CA Covered", help = "Updates based on displayed data")
378
  st.altair_chart(area_plot(df, column), use_container_width=True)
379
+
380
+ if 'df_bar_30x30' in locals(): #if we use chatbot, we won't have these graphs.
381
+ if column not in ["status", "gap_code"]:
382
+ st.altair_chart(stacked_bar(df_bar_30x30, column,'percent_group','status', color_choice + ' by 30x30 Status'), use_container_width=True)
383
+
384
  if show_richness:
385
  st.altair_chart(richness_chart, use_container_width=True)
386
 
 
405
  if show_rxburn:
406
  st.altair_chart(rx_10_chart, use_container_width=True)
407
 
 
 
408
 
409
  st.caption("***The label 'established' is inferred from the California Protected Areas Database, which may introduce artifacts. For details on our methodology, please refer to our code: https://github.com/boettiger-lab/ca-30x30.")
410
 
app/system_prompt.txt CHANGED
@@ -147,10 +147,18 @@ sql_query:
147
  ## Example:
148
  example_user: "Show me protected lands in disadvantaged communities that have had prescribed fires in at least 30% of its area."
149
  sql_query:
150
- SELECT "id", "geom", "name", "acres", "percent_rxburn_10yr", "percent_disadvantaged"
151
  FROM mydata
152
  WHERE "percent_disadvantaged" > 0
153
  AND "percent_rxburn_10yr" >= 0.3;
154
 
155
 
 
 
 
 
 
 
 
 
156
  Question: {input}
 
147
  ## Example:
148
  example_user: "Show me protected lands in disadvantaged communities that have had prescribed fires in at least 30% of its area."
149
  sql_query:
150
+ SELECT "id", "geom", "name", "acres", "rxburn", "percent_disadvantaged"
151
  FROM mydata
152
  WHERE "percent_disadvantaged" > 0
153
  AND "percent_rxburn_10yr" >= 0.3;
154
 
155
 
156
+
157
+ ## Example:
158
+ example_user: "What percentage of 30x30 conserved land has been impacted by wildfire?"
159
+ sql_query:
160
+ SELECT SUM("fire" * "acres") / SUM("acres") * 100 AS percent_fire
161
+ FROM mydata
162
+ WHERE "status" = '30x30-conserved';
163
+
164
  Question: {input}
app/utils.py CHANGED
@@ -18,11 +18,22 @@ from itertools import chain
18
 
19
  from variables import *
20
 
21
- def get_summary(ca, combined_filter, column, colors=None): #summary stats, based on filtered data
 
 
 
 
 
 
 
 
 
 
22
  df = ca.filter(combined_filter)
23
  df = (df
24
  .group_by(*column) # unpack the list for grouping
25
- .aggregate(percent_protected=100 * _.acres.sum() / ca_area_acres,
 
26
  mean_richness = (_.richness * _.acres).sum() / _.acres.sum(),
27
  mean_rsr = (_.rsr * _.acres).sum() / _.acres.sum(),
28
  mean_irrecoverable_carbon = (_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
@@ -32,16 +43,21 @@ def get_summary(ca, combined_filter, column, colors=None): #summary stats, based
32
  mean_disadvantaged = (_.disadvantaged_communities * _.acres).sum() / _.acres.sum(),
33
  mean_svi = (_.svi * _.acres).sum() / _.acres.sum(),
34
  )
35
- .mutate(percent_protected=_.percent_protected.round(1))
 
36
  )
 
 
 
37
  if colors is not None and not colors.empty: #only the df will have colors, df_tab doesn't since we are printing it.
38
- df = df.inner_join(colors, column)
39
  df = df.cast({col: "string" for col in column})
40
  df = df.to_pandas()
41
  return df
 
42
 
43
-
44
- def summary_table(ca, column, colors, filter_cols, filter_vals,colorby_vals): # get df for charts + df_tab for printed table
45
  filters = []
46
  if filter_cols and filter_vals: #if a filter is selected, add to list of filters
47
  for filter_col, filter_val in zip(filter_cols, filter_vals):
@@ -51,42 +67,153 @@ def summary_table(ca, column, colors, filter_cols, filter_vals,colorby_vals): #
51
  filters.append(getattr(_, filter_col) == filter_val[0])
52
  if column not in filter_cols: #show color_by column in table by adding it as a filter (if it's not already a filter)
53
  filter_cols.append(column)
54
- filters.append(getattr(_, column).isin(colorby_vals[column]))
55
  combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
 
 
 
 
 
 
56
 
57
- df_percent = get_summary(ca, combined_filter, [column], colors) # df used for charts
 
 
 
 
 
58
 
59
- if column == "status": #need to include non-conserved in summary stats
60
- combined_filter = (combined_filter) | (_.status.isin(['30x30-conserved','other-conserved','non-conserved']))
61
 
62
- df = get_summary(ca, combined_filter, [column], colors) # df used for charts
63
 
64
- df_tab = get_summary(ca, combined_filter, filter_cols, colors = None) #df used for printed table
 
 
 
 
 
65
 
66
- return df, df_tab, df_percent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
 
70
- def area_plot(df, column): #percent protected pie chart
71
  base = alt.Chart(df).encode(
72
- alt.Theta("percent_protected:Q").stack(True),
73
  )
74
- pie = ( base
75
- .mark_arc(innerRadius= 40, outerRadius=100, stroke = 'black', strokeWidth = .5)
76
- .encode(alt.Color("color:N").scale(None).legend(None),
77
- tooltip=['percent_protected', column])
 
 
 
 
 
 
 
78
  )
79
- text = ( base
80
- .mark_text(radius=80, size=14, color="white")
81
- .encode(text = column + ":N")
 
82
  )
83
- plot = pie # pie + text
84
  return plot.properties(width="container", height=290)
85
 
86
 
 
87
  def bar_chart(df, x, y, title): #display summary stats for color_by column
88
  #axis label angles / chart size
89
- if x in ["manager_type",'status']: #labels are too long, making vertical
90
  angle = 270
91
  height = 373
92
  elif x == 'ecoregion': # make labels vertical and figure taller
@@ -97,34 +224,71 @@ def bar_chart(df, x, y, title): #display summary stats for color_by column
97
  height = 310
98
 
99
  # order of bars
 
 
100
  if x == "established": # order labels in chronological order, not alphabetic.
101
  sort = '-x'
102
  elif x == "access_type": #order based on levels of openness
103
  sort=['Open', 'Restricted', 'No Public', "Unknown"]
104
  elif x == "manager_type":
105
  sort = ["Federal","Tribal","State","Special District", "County", "City", "HOA","Joint","Non Profit","Private","Unknown"]
106
- else:
107
- sort = 'x'
 
 
 
 
 
 
 
108
 
109
  # modify label names in bar chart to fit in frame
110
  label_transform = f"datum.{x}" # default; no change
111
  if x == "access_type":
112
  label_transform = f"replace(datum.{x}, ' Access', '')" #omit 'access' from access_type
113
  elif x == "ecoregion":
114
- label_transform = f"replace(datum.{x}, 'California', 'CA')" # Replace "California" with "CA"
115
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  x_title = next(key for key, value in select_column.items() if value == x)
117
  chart = alt.Chart(df).mark_bar(stroke = 'black', strokeWidth = .5).transform_calculate(
118
- label=label_transform
119
  ).encode(
120
  x=alt.X("label:N",
121
  axis=alt.Axis(labelAngle=angle, title=x_title, labelLimit = 200),
122
- sort=sort),
123
- y=alt.Y(y, axis=alt.Axis()),
124
  color=alt.Color('color').scale(None),
125
- ).properties(width="container", height=height, title = title)
 
 
 
126
  return chart
127
 
 
 
128
  def sync_checkboxes(source):
129
  # gap 1 and gap 2 on -> 30x30-conserved on
130
  if source in ["gap_code1", "gap_code2"]:
@@ -138,35 +302,31 @@ def sync_checkboxes(source):
138
  # other-conserved on <-> gap 3 on
139
  elif source == "gap_code3":
140
  st.session_state["statusother-conserved"] = st.session_state.gap_code3
141
- rerun_needed = True
142
  elif source == "statusother-conserved":
143
  if "gap_code3" in st.session_state and st.session_state["statusother-conserved"] != st.session_state.gap_code3:
144
  st.session_state.gap_code3 = st.session_state["statusother-conserved"]
145
- rerun_needed = True # Ensure UI updates
146
 
147
  # unknown on <-> gap 4 on
148
  elif source == "gap_code4":
149
  st.session_state.statusunknown = st.session_state.gap_code4
150
- rerun_needed = True
151
  elif source == "statusunknown":
152
  if "gap_code4" in st.session_state and st.session_state.statusunknown != st.session_state.gap_code4:
153
  st.session_state.gap_code4 = st.session_state.statusunknown
154
- rerun_needed = True
155
 
156
  # non-conserved on <-> gap 0
157
  elif source == "gap_code0":
158
  st.session_state['statusnon-conserved'] = st.session_state.gap_code0
159
- rerun_needed = True
160
  elif source == "statusnon-conserved":
161
  if "gap_code0" in st.session_state and st.session_state['statusnon-conserved'] != st.session_state.gap_code0:
162
  st.session_state.gap_code0 = st.session_state['statusnon-conserved']
163
- rerun_needed = True
164
 
165
 
166
- def getButtons(style_options, style_choice, default_gap=None):
167
  column = style_options[style_choice]['property']
168
  opts = [style[0] for style in style_options[style_choice]['stops']]
169
- default_gap = default_gap or {}
170
  buttons = {}
171
  for name in opts:
172
  key = column + str(name)
@@ -185,15 +345,32 @@ def getColorVals(style_options, style_choice):
185
  return d
186
 
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
189
  filters = []
190
  for col, val in zip(filter_cols, filter_vals):
191
  filters.append(["match", ["get", col], val, True, False])
192
  combined_filters = ["all"] + filters
193
-
194
  if "non-conserved" in list(chain.from_iterable(filter_vals)):
195
  combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"],True, False]]
196
-
197
  style = {
198
  "version": 8,
199
  "sources": {
@@ -244,3 +421,57 @@ def get_pmtiles_style_llm(paint, ids):
244
  ]
245
  }
246
  return style
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  from variables import *
20
 
21
+ def colorTable(select_colors,color_choice,column):
22
+ colors = (ibis
23
+ .memtable(select_colors[color_choice], columns=[column, "color"])
24
+ .to_pandas()
25
+ )
26
+ return colors
27
+
28
+ def get_summary(ca, combined_filter, column, main_group, colors=None):
29
+ df = ca.filter(combined_filter)
30
+ #total acres for each group
31
+ group_totals = df.group_by(main_group).aggregate(total_acres=_.acres.sum())
32
  df = ca.filter(combined_filter)
33
  df = (df
34
  .group_by(*column) # unpack the list for grouping
35
+ .aggregate(percent_CA=100 * _.acres.sum() / ca_area_acres,
36
+ acres = _.acres.sum(),
37
  mean_richness = (_.richness * _.acres).sum() / _.acres.sum(),
38
  mean_rsr = (_.rsr * _.acres).sum() / _.acres.sum(),
39
  mean_irrecoverable_carbon = (_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
 
43
  mean_disadvantaged = (_.disadvantaged_communities * _.acres).sum() / _.acres.sum(),
44
  mean_svi = (_.svi * _.acres).sum() / _.acres.sum(),
45
  )
46
+ .mutate(percent_CA=_.percent_CA.round(1),
47
+ acres=_.acres.round(1))
48
  )
49
+
50
+ df = df.inner_join(group_totals, main_group)
51
+ df = df.mutate(percent_group=(100 * _.acres / _.total_acres).round(1))
52
  if colors is not None and not colors.empty: #only the df will have colors, df_tab doesn't since we are printing it.
53
+ df = df.inner_join(colors, column[-1])
54
  df = df.cast({col: "string" for col in column})
55
  df = df.to_pandas()
56
  return df
57
+
58
 
59
+ def summary_table(ca, column, select_colors, color_choice, filter_cols, filter_vals,colorby_vals): # get df for charts + df_tab for printed table
60
+ colors = colorTable(select_colors,color_choice,column)
61
  filters = []
62
  if filter_cols and filter_vals: #if a filter is selected, add to list of filters
63
  for filter_col, filter_val in zip(filter_cols, filter_vals):
 
67
  filters.append(getattr(_, filter_col) == filter_val[0])
68
  if column not in filter_cols: #show color_by column in table by adding it as a filter (if it's not already a filter)
69
  filter_cols.append(column)
70
+ filters.append(getattr(_, column).isin(colorby_vals[column]))
71
  combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
72
+
73
+ df_percent = get_summary(ca, combined_filter, [column],column, colors) # df used for percentage, excludes non-conserved.
74
+ df_tab = get_summary(ca, combined_filter, filter_cols, column, colors = None) #df used for printed table
75
+ if column == "status": #need to include non-conserved in summary stats
76
+ combined_filter = (combined_filter) | (_.status.isin(['30x30-conserved','other-conserved','unknown','non-conserved']))
77
+ df = get_summary(ca, combined_filter, [column], column, colors) # df used for charts
78
 
79
+ df_bar_30x30 = None # no stacked charts if we have status/gap_code
80
+ if column not in ["status","gap_code"]: # df for stacked 30x30 status bar chart
81
+ colors = colorTable(select_colors,"30x30 Status",'status')
82
+ combined_filter_status = (combined_filter) | (_.status.isin(['30x30-conserved','other-conserved','unknown','non-conserved']))
83
+ df_bar_30x30 = get_summary(ca, combined_filter_status, [column, 'status'], column, colors) # df used for charts
84
+ return df, df_tab, df_percent, df_bar_30x30
85
 
 
 
86
 
 
87
 
88
+ def get_hex(df, color,sort_order):
89
+ return list(df.drop_duplicates(subset=color, keep="first")
90
+ .set_index(color)
91
+ .reindex(sort_order)
92
+ .dropna()["color"])
93
+
94
 
95
+ def stacked_bar(df, x, y, color, title):
96
+ # bar order
97
+ if x == "established": # order labels in chronological order, not alphabetic.
98
+ sort = '-x'
99
+ elif x == "access_type": #order based on levels of openness
100
+ sort=['Open', 'Restricted', 'No Public', "Unknown"]
101
+ elif x == "manager_type":
102
+ sort = ["Federal","Tribal","State","Special District", "County", "City", "HOA","Joint","Non Profit","Private","Unknown"]
103
+ elif x == "status":
104
+ sort = ["30x30-conserved","other-conserved","unknown","non-conserved"]
105
+ elif x == "ecoregion":
106
+ sort = ['SE. Great Basin','Mojave Desert','Sonoran Desert','Sierra Nevada','SoCal Mountains & Valleys','Mono',
107
+ 'Central CA Coast','Klamath Mountains','NorCal Coast','NorCal Coast Ranges',
108
+ 'NW. Basin & Range','Colorado Desert','Central Valley Coast Ranges','SoCal Coast',
109
+ 'Sierra Nevada Foothills','Southern Cascades','Modoc Plateau','Great Valley (North)','NorCal Interior Coast Ranges',
110
+ 'Great Valley (South)']
111
+ else:
112
+ sort = 'x'
113
 
114
+ # label order
115
+ if x == "manager_type": #labels are too long, making vertical
116
+ angle = 270
117
+ height = 373
118
+ elif x == 'ecoregion': # make labels vertical and figure taller
119
+ angle = 270
120
+ height = 430
121
+ else: #other labels are horizontal
122
+ angle = 0
123
+ height = 310
124
+
125
+ # stacked bar order
126
+ sort_order = ['30x30-conserved', 'other-conserved', 'unknown', 'non-conserved']
127
+ y_titles = {
128
+ 'ecoregion': 'Ecoregion (%)',
129
+ 'established': 'Year (%)',
130
+ 'manager_type': 'Manager Type (%)',
131
+ 'easement': 'Easement (%)',
132
+ 'access_type': 'Access (%)'
133
+ }
134
+ ytitle = y_titles.get(x, y) # Default to `y` if not in the dictionary
135
+ color_hex = get_hex(df[[color, 'color']], color, sort_order)
136
+ sort_order = sort_order[0:len(color_hex)]
137
+ df["stack_order"] = df[color].apply(lambda val: sort_order.index(val) if val in sort_order else len(sort_order))
138
+
139
+ if x == "ecoregion":
140
+ label_transform = (
141
+ "replace("
142
+ "replace("
143
+ "replace("
144
+ "replace("
145
+ "replace("
146
+ "replace(datum.ecoregion, 'Northern California', 'NorCal'),"
147
+ "'Southern California', 'SoCal'),"
148
+ "'Southeastern', 'SE.'),"
149
+ "'Northwestern', 'NW.'),"
150
+ "'and', '&'),"
151
+ "'California', 'CA')"
152
+ )
153
+ else:
154
+ label_transform = f"datum.{x}" # Default label transformation
155
+
156
+ chart = alt.Chart(df).mark_bar().transform_calculate(
157
+ label=label_transform
158
+ ).encode(
159
+ x=alt.X("label:N", sort = sort, title=None, axis=alt.Axis(labelLimit=150, labelAngle=angle)), # Shorten axis labels
160
+ y=alt.Y(y, title=ytitle).scale(domain=(0,100)),
161
+ color=alt.Color(
162
+ color,
163
+ sort=sort_order, # Controls legend order
164
+ scale=alt.Scale(domain=sort_order, range=color_hex)
165
+ ),
166
+ order=alt.Order(
167
+ "stack_order:Q",
168
+ sort="ascending"
169
+ ),
170
+ tooltip=[
171
+ alt.Tooltip("label", type="nominal"), # Use transformed label
172
+ alt.Tooltip("percent_CA", type="quantitative", format=",.2f"),
173
+ alt.Tooltip("percent_group", type="quantitative", format=",.2f"),
174
+ alt.Tooltip("acres", type="quantitative", format=",.0f"),
175
+ ]
176
+ ).configure_legend(
177
+ direction = 'horizontal',
178
+ orient='top',
179
+ columns = 3,
180
+ title = None,
181
+ labelOffset = 2,
182
+ offset = 10
183
+ ).properties(width="container", height=height, title=title
184
+ ).configure_title(fontSize=18, align = "center",anchor='middle',offset = 10)
185
+ return chart
186
 
187
 
188
+ def area_plot(df, column): # Percent protected pie chart
189
  base = alt.Chart(df).encode(
190
+ alt.Theta("percent_CA:Q").stack(True),
191
  )
192
+ pie = (
193
+ base
194
+ .mark_arc(innerRadius=40, outerRadius=100, stroke="black", strokeWidth=0.5)
195
+ .encode(
196
+ alt.Color("color:N").scale(None).legend(None),
197
+ tooltip=[
198
+ alt.Tooltip(column, type="nominal"),
199
+ alt.Tooltip("percent_CA", type="quantitative", format=",.2f"),
200
+ alt.Tooltip("acres", type="quantitative", format=",.0f"),
201
+ ]
202
+ )
203
  )
204
+ text = (
205
+ base
206
+ .mark_text(radius=80, size=14, color="white")
207
+ .encode(text=column + ":N")
208
  )
209
+ plot = pie # pie + text
210
  return plot.properties(width="container", height=290)
211
 
212
 
213
+
214
  def bar_chart(df, x, y, title): #display summary stats for color_by column
215
  #axis label angles / chart size
216
+ if x == "manager_type": #labels are too long, making vertical
217
  angle = 270
218
  height = 373
219
  elif x == 'ecoregion': # make labels vertical and figure taller
 
224
  height = 310
225
 
226
  # order of bars
227
+ sort = 'x'
228
+ lineBreak = ''
229
  if x == "established": # order labels in chronological order, not alphabetic.
230
  sort = '-x'
231
  elif x == "access_type": #order based on levels of openness
232
  sort=['Open', 'Restricted', 'No Public', "Unknown"]
233
  elif x == "manager_type":
234
  sort = ["Federal","Tribal","State","Special District", "County", "City", "HOA","Joint","Non Profit","Private","Unknown"]
235
+ elif x == "ecoregion":
236
+ sort = ['SE. Great Basin','Mojave Desert','Sonoran Desert','Sierra Nevada','SoCal Mountains & Valleys','Mono',
237
+ 'Central CA Coast','Klamath Mountains','NorCal Coast','NorCal Coast Ranges',
238
+ 'NW. Basin & Range','Colorado Desert','Central Valley Coast Ranges','SoCal Coast',
239
+ 'Sierra Nevada Foothills','Southern Cascades','Modoc Plateau','Great Valley (North)','NorCal Interior Coast Ranges',
240
+ 'Great Valley (South)']
241
+ elif x == "status":
242
+ sort = ["30x30-conserved","other-conserved","unknown","non-conserved"]
243
+ lineBreak = '-'
244
 
245
  # modify label names in bar chart to fit in frame
246
  label_transform = f"datum.{x}" # default; no change
247
  if x == "access_type":
248
  label_transform = f"replace(datum.{x}, ' Access', '')" #omit 'access' from access_type
249
  elif x == "ecoregion":
250
+ label_transform = (
251
+ "replace("
252
+ "replace("
253
+ "replace("
254
+ "replace("
255
+ "replace("
256
+ "replace(datum.ecoregion, 'Northern California', 'NorCal'),"
257
+ "'Southern California', 'SoCal'),"
258
+ "'Southeastern', 'SE.'),"
259
+ "'Northwestern', 'NW.'),"
260
+ "'and', '&'),"
261
+ "'California', 'CA')"
262
+ )
263
+ y_titles = {
264
+ 'mean_richness': 'Richness (Mean)',
265
+ 'mean_rsr': 'Range-Size Rarity (Mean)',
266
+ 'mean_irrecoverable_carbon': 'Irrecoverable Carbon (Mean)',
267
+ 'mean_manageable_carbon': 'Manageable Carbon (Mean)',
268
+ 'mean_disadvantaged': 'Disadvantaged (Mean)',
269
+ 'mean_svi': 'SVI (Mean)',
270
+ 'mean_fire': 'Fire (Mean)',
271
+ 'mean_rxburn': 'Rx Fire (Mean)'
272
+ }
273
+ ytitle = y_titles.get(y, y) # Default to `y` if not in the dictionary
274
+
275
  x_title = next(key for key, value in select_column.items() if value == x)
276
  chart = alt.Chart(df).mark_bar(stroke = 'black', strokeWidth = .5).transform_calculate(
277
+ label=label_transform
278
  ).encode(
279
  x=alt.X("label:N",
280
  axis=alt.Axis(labelAngle=angle, title=x_title, labelLimit = 200),
281
+ sort=sort),
282
+ y=alt.Y(y, axis=alt.Axis(title = ytitle)),
283
  color=alt.Color('color').scale(None),
284
+ ).configure(lineBreak = lineBreak)
285
+
286
+ chart = chart.properties(width="container", height=height, title = title
287
+ ).configure_title(fontSize=18, align = "center",anchor='middle')
288
  return chart
289
 
290
+
291
+
292
  def sync_checkboxes(source):
293
  # gap 1 and gap 2 on -> 30x30-conserved on
294
  if source in ["gap_code1", "gap_code2"]:
 
302
  # other-conserved on <-> gap 3 on
303
  elif source == "gap_code3":
304
  st.session_state["statusother-conserved"] = st.session_state.gap_code3
 
305
  elif source == "statusother-conserved":
306
  if "gap_code3" in st.session_state and st.session_state["statusother-conserved"] != st.session_state.gap_code3:
307
  st.session_state.gap_code3 = st.session_state["statusother-conserved"]
 
308
 
309
  # unknown on <-> gap 4 on
310
  elif source == "gap_code4":
311
  st.session_state.statusunknown = st.session_state.gap_code4
312
+
313
  elif source == "statusunknown":
314
  if "gap_code4" in st.session_state and st.session_state.statusunknown != st.session_state.gap_code4:
315
  st.session_state.gap_code4 = st.session_state.statusunknown
 
316
 
317
  # non-conserved on <-> gap 0
318
  elif source == "gap_code0":
319
  st.session_state['statusnon-conserved'] = st.session_state.gap_code0
320
+
321
  elif source == "statusnon-conserved":
322
  if "gap_code0" in st.session_state and st.session_state['statusnon-conserved'] != st.session_state.gap_code0:
323
  st.session_state.gap_code0 = st.session_state['statusnon-conserved']
 
324
 
325
 
326
+ def getButtons(style_options, style_choice, default_boxes=None):
327
  column = style_options[style_choice]['property']
328
  opts = [style[0] for style in style_options[style_choice]['stops']]
329
+ default_boxes = default_boxes or {}
330
  buttons = {}
331
  for name in opts:
332
  key = column + str(name)
 
345
  return d
346
 
347
 
348
+ def getLegend(style_options, color_choice):
349
+ legend = {cat: color for cat, color in style_options[color_choice]['stops']}
350
+ position = 'bottom-left'
351
+ fontsize = 15
352
+ bg_color = 'white'
353
+ # shorten legend for ecoregions
354
+ if color_choice == "Ecoregion":
355
+ legend = {key.replace("Northern California", "NorCal"): value for key, value in legend.items()}
356
+ legend = {key.replace("Southern California", "SoCal"): value for key, value in legend.items()}
357
+ legend = {key.replace("Southeastern", "SE."): value for key, value in legend.items()}
358
+ legend = {key.replace("and", "&"): value for key, value in legend.items()}
359
+ legend = {key.replace("California", "CA"): value for key, value in legend.items()}
360
+ legend = {key.replace("Northwestern", "NW."): value for key, value in legend.items()}
361
+ bg_color = 'rgba(255, 255, 255, 0.6)'
362
+ fontsize = 12
363
+ return legend, position, bg_color, fontsize
364
+
365
+
366
+
367
  def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
368
  filters = []
369
  for col, val in zip(filter_cols, filter_vals):
370
  filters.append(["match", ["get", col], val, True, False])
371
  combined_filters = ["all"] + filters
 
372
  if "non-conserved" in list(chain.from_iterable(filter_vals)):
373
  combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"],True, False]]
 
374
  style = {
375
  "version": 8,
376
  "sources": {
 
421
  ]
422
  }
423
  return style
424
+
425
+ def run_sql(query,color_choice):
426
+ """
427
+ Filter data based on an LLM-generated SQL query and return matching IDs.
428
+
429
+ Args:
430
+ query (str): The natural language query to filter the data.
431
+ color_choice (str): The column used for plotting.
432
+ """
433
+ output = few_shot_structured_llm.invoke(query)
434
+ sql_query = output.sql_query
435
+ explanation =output.explanation
436
+
437
+ if not sql_query: # if the chatbot can't generate a SQL query.
438
+ st.success(explanation)
439
+ return pd.DataFrame({'id' : []})
440
+
441
+ result = ca.sql(sql_query).execute()
442
+ if result.empty :
443
+ explanation = "This query did not return any results. Please try again with a different query."
444
+ st.warning(explanation, icon="⚠️")
445
+ st.caption("SQL Query:")
446
+ st.code(sql_query,language = "sql")
447
+ if 'geom' in result.columns:
448
+ return result.drop('geom',axis = 1)
449
+ else:
450
+ return result
451
+
452
+ elif ("id" and "geom" in result.columns):
453
+ style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
454
+ legend, position, bg_color, fontsize = getLegend(style_options,color_choice)
455
+
456
+ m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
457
+ m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
458
+ m.fit_bounds(result.total_bounds.tolist())
459
+ result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
460
+ else:
461
+ st.write(result) # if we aren't mapping, just print out the data
462
+
463
+ with st.popover("Explanation"):
464
+ st.write(explanation)
465
+ st.caption("SQL Query:")
466
+ st.code(sql_query,language = "sql")
467
+
468
+ return result
469
+
470
+
471
+
472
+ def summary_table_sql(ca, column, colors, ids): # get df for charts + df_tab for printed table
473
+ filters = [_.id.isin(ids)]
474
+ combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
475
+ df = get_summary(ca, combined_filter, [column], colors) # df used for charts
476
+ return df
477
+
app/variables.py CHANGED
@@ -1,9 +1,9 @@
1
  # urls for main layer
2
- ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/38af68979644f52ac928c5e41c81ec4d93468eef/ca-30x30.parquet"
3
- ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/e283bb63ee76dd5acd2d187029a80ab6a011886b/ca-30x30.pmtiles"
4
-
5
 
6
  ca_area_acres = 1.014e8 #acres
 
7
  style_choice = "GAP Status Code"
8
 
9
  # urls for additional data layers
@@ -38,7 +38,7 @@ svi_color = "#1bc7c3" #cyan
38
  white = "#FFFFFF"
39
 
40
  # gap codes 3 and 4 are off by default.
41
- default_gap = {
42
  0: False,
43
  3: False,
44
  4: False,
@@ -119,6 +119,8 @@ status = {
119
  ['other-conserved', "#b6ce7a"],
120
  ['unknown', "#e5efdb"],
121
  ['non-conserved', "#e1e1e1"]
 
 
122
  ],
123
  }
124
 
@@ -153,9 +155,9 @@ ecoregion = {
153
  }
154
 
155
  style_options = {
156
- "Year": year,
157
  "30x30 Status": status,
158
  "GAP Code": gap,
 
159
  "Ecoregion": ecoregion,
160
  "Manager Type": manager,
161
  "Easement": easement,
@@ -262,9 +264,9 @@ svi_style = {
262
 
263
 
264
  select_column = {
265
- "Year": "established",
266
  "30x30 Status": "status",
267
  "GAP Code": "gap_code",
 
268
  "Ecoregion": "ecoregion",
269
  "Manager Type": "manager_type",
270
  "Easement": "easement",
 
1
  # urls for main layer
2
+ ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/65eb463312262e50d51f5c07bfad7568152803b0/ca-30x30.parquet"
3
+ ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/fc4dc523c27a8133452bb7596e3e520fda24eccd/ca-30x30.pmtiles"
 
4
 
5
  ca_area_acres = 1.014e8 #acres
6
+ # ca_area_acres = 103179953.76086558
7
  style_choice = "GAP Status Code"
8
 
9
  # urls for additional data layers
 
38
  white = "#FFFFFF"
39
 
40
  # gap codes 3 and 4 are off by default.
41
+ default_boxes = {
42
  0: False,
43
  3: False,
44
  4: False,
 
119
  ['other-conserved', "#b6ce7a"],
120
  ['unknown', "#e5efdb"],
121
  ['non-conserved', "#e1e1e1"]
122
+ # ['non-conserved', white]
123
+
124
  ],
125
  }
126
 
 
155
  }
156
 
157
  style_options = {
 
158
  "30x30 Status": status,
159
  "GAP Code": gap,
160
+ "Year": year,
161
  "Ecoregion": ecoregion,
162
  "Manager Type": manager,
163
  "Easement": easement,
 
264
 
265
 
266
  select_column = {
 
267
  "30x30 Status": "status",
268
  "GAP Code": "gap_code",
269
+ "Year": "established",
270
  "Ecoregion": "ecoregion",
271
  "Manager Type": "manager_type",
272
  "Easement": "easement",
preprocess/preprocess.ipynb CHANGED
@@ -10,7 +10,7 @@
10
  },
11
  {
12
  "cell_type": "code",
13
- "execution_count": 1,
14
  "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
15
  "metadata": {
16
  "editable": true,
@@ -182,25 +182,10 @@
182
  },
183
  {
184
  "cell_type": "code",
185
- "execution_count": 2,
186
  "id": "070bbdde-b141-4a63-8f8a-984dd01fd51a",
187
  "metadata": {},
188
- "outputs": [
189
- {
190
- "data": {
191
- "application/vnd.jupyter.widget-view+json": {
192
- "model_id": "3c217929b7744164a99f6e2314366359",
193
- "version_major": 2,
194
- "version_minor": 0
195
- },
196
- "text/plain": [
197
- "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
198
- ]
199
- },
200
- "metadata": {},
201
- "output_type": "display_data"
202
- }
203
- ],
204
  "source": [
205
  "con = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
206
  "\n",
@@ -210,7 +195,7 @@
210
  "con.create_table(\"eco\", eco.select(\"ECOREGION_\",\"geometry\"), overwrite = True)\n",
211
  "con.create_table(\"non\", non, overwrite = True)\n",
212
  "\n",
213
- "# split up the non-conserved areas by ecoregions\n",
214
  "con.con.execute('''\n",
215
  "CREATE TABLE non_conserved_eco AS\n",
216
  "SELECT \n",
@@ -223,12 +208,12 @@
223
  "WHERE ST_GeometryType(ST_Intersection(non.geom, eco.geometry)) IN ('POLYGON', 'MULTIPOLYGON');\n",
224
  "''')\n",
225
  "\n",
226
- "\n",
227
  "# save to parquet file so we don't have to run this again\n",
228
  "non_eco = (con.table(\"non_conserved_eco\")\n",
229
  " .drop('geom')\n",
230
  " .rename(geom = \"geom_1\")\n",
231
- " .mutate(geom = ST_MakeValid(_.geom)) \n",
 
232
  " )\n",
233
  "\n",
234
  "non_conserved_eco = non_eco.execute()\n",
@@ -256,7 +241,7 @@
256
  " .cast({\"geom\": \"geometry\"})\n",
257
  " .mutate(established = ibis.null(), gap_code = 0, name = ibis.literal(\"Non-Conserved Areas\"),\n",
258
  " access_type = ibis.null(), manager = ibis.null(), manager_type = ibis.null(),\n",
259
- " ecoregion = ibis.null(), easement = ibis.null(), id = 0, type = ibis.literal(\"Land\"),\n",
260
  " status = ibis.literal(\"non-conserved\"),\n",
261
  " acres = _.geom.area() / 4046.8564224 #convert sq meters to acres\n",
262
  " )\n",
@@ -466,7 +451,7 @@
466
  },
467
  {
468
  "cell_type": "code",
469
- "execution_count": 2,
470
  "id": "aade11d9-87b9-403d-bad1-3069663807a9",
471
  "metadata": {},
472
  "outputs": [],
 
10
  },
11
  {
12
  "cell_type": "code",
13
+ "execution_count": null,
14
  "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
15
  "metadata": {
16
  "editable": true,
 
182
  },
183
  {
184
  "cell_type": "code",
185
+ "execution_count": null,
186
  "id": "070bbdde-b141-4a63-8f8a-984dd01fd51a",
187
  "metadata": {},
188
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  "source": [
190
  "con = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
191
  "\n",
 
195
  "con.create_table(\"eco\", eco.select(\"ECOREGION_\",\"geometry\"), overwrite = True)\n",
196
  "con.create_table(\"non\", non, overwrite = True)\n",
197
  "\n",
198
+ "#split up the non-conserved areas by ecoregions\n",
199
  "con.con.execute('''\n",
200
  "CREATE TABLE non_conserved_eco AS\n",
201
  "SELECT \n",
 
208
  "WHERE ST_GeometryType(ST_Intersection(non.geom, eco.geometry)) IN ('POLYGON', 'MULTIPOLYGON');\n",
209
  "''')\n",
210
  "\n",
 
211
  "# save to parquet file so we don't have to run this again\n",
212
  "non_eco = (con.table(\"non_conserved_eco\")\n",
213
  " .drop('geom')\n",
214
  " .rename(geom = \"geom_1\")\n",
215
+ " .mutate(geom = ST_MakeValid(_.geom))\n",
216
+ " .mutate(id=ibis.row_number().over())\n",
217
  " )\n",
218
  "\n",
219
  "non_conserved_eco = non_eco.execute()\n",
 
241
  " .cast({\"geom\": \"geometry\"})\n",
242
  " .mutate(established = ibis.null(), gap_code = 0, name = ibis.literal(\"Non-Conserved Areas\"),\n",
243
  " access_type = ibis.null(), manager = ibis.null(), manager_type = ibis.null(),\n",
244
+ " easement = ibis.null(), type = ibis.literal(\"Land\"),\n",
245
  " status = ibis.literal(\"non-conserved\"),\n",
246
  " acres = _.geom.area() / 4046.8564224 #convert sq meters to acres\n",
247
  " )\n",
 
451
  },
452
  {
453
  "cell_type": "code",
454
+ "execution_count": null,
455
  "id": "aade11d9-87b9-403d-bad1-3069663807a9",
456
  "metadata": {},
457
  "outputs": [],