Cassie Buhler commited on
Commit
78fca1a
·
unverified ·
2 Parent(s): 1f8b4af 5516f91

Merge pull request #30 from boettiger-lab/feat/app-updates

Browse files
Files changed (6) hide show
  1. app/app.py +66 -142
  2. app/footer.md +3 -7
  3. app/system_prompt.txt +62 -67
  4. app/utils.py +28 -98
  5. app/variables.py +126 -17
  6. preprocess/preprocess.ipynb +10 -7
app/app.py CHANGED
@@ -18,36 +18,23 @@ from functools import reduce
18
  from variables import *
19
  from utils import *
20
 
21
-
22
-
23
- # Create the duckdb connection directly from the sqlalchemy engine instead.
24
- # Not as elegant as `ibis.duckdb.connect()` but shares connection with sqlalchemy.
25
- ## Create the engine
26
- #cwd = pathlib.Path.cwd()
27
- #connect_args = {'preload_extensions':['spatial']}
28
- #eng = sqlalchemy.create_engine(f"duckdb:///{cwd}/duck.db",connect_args = connect_args)
29
- #con = ibis.duckdb.from_connection(eng.raw_connection())
30
-
31
  ## Create the table from remote parquet only if it doesn't already exist on disk
32
-
33
  con = ibis.duckdb.connect("duck.db", extensions=["spatial"])
34
  current_tables = con.list_tables()
 
35
  if "mydata" not in set(current_tables):
36
  tbl = con.read_parquet(ca_parquet)
37
  con.create_table("mydata", tbl)
 
38
  ca = con.table("mydata")
39
 
40
-
41
  for key in [
42
  'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
43
- 'percent_fire_10yr', 'percent_rxburn_10yr', 'percent_disadvantaged',
44
- 'svi', 'svi_socioeconomic_status', 'svi_household_char',
45
- 'svi_racial_ethnic_minority', 'svi_housing_transit',
46
- 'deforest_carbon', 'human_impact'
47
- ]:
48
  if key not in st.session_state:
49
  st.session_state[key] = False
50
-
51
 
52
  st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
53
 
@@ -108,6 +95,7 @@ st.markdown(
108
  unsafe_allow_html=True,
109
  )
110
 
 
111
  st.markdown(
112
  """
113
  <style>
@@ -154,20 +142,19 @@ with open('app/system_prompt.txt', 'r') as file:
154
  template = file.read()
155
 
156
  from langchain_openai import ChatOpenAI
157
- # os.environ["OPENAI_API_KEY"] = st.secrets["LITELLM_KEY"]
158
- # llm = ChatOpenAI(model="gorilla", temperature=0, base_url="https://llm.nrp-nautilus.io/")
159
- # llm = ChatOpenAI(model = "llama3", api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io", temperature=0)
160
-
161
- llm = ChatOpenAI(model="gpt-4", temperature=0)
162
 
163
  managers = ca.sql("SELECT DISTINCT manager FROM mydata;").execute()
164
  names = ca.sql("SELECT name FROM mydata GROUP BY name HAVING SUM(acres) >10000;").execute()
 
165
 
166
  from langchain_core.prompts import ChatPromptTemplate
167
  prompt = ChatPromptTemplate.from_messages([
168
  ("system", template),
169
  ("human", "{input}")
170
- ]).partial(dialect="duckdb", table_info = ca.schema(), managers = managers, names = names)
171
 
172
  structured_llm = llm.with_structured_output(SQLResponse)
173
  few_shot_structured_llm = prompt | structured_llm
@@ -184,12 +171,11 @@ def run_sql(query,color_choice):
184
  output = few_shot_structured_llm.invoke(query)
185
  sql_query = output.sql_query
186
  explanation =output.explanation
187
-
188
  if not sql_query: # if the chatbot can't generate a SQL query.
189
  st.success(explanation)
190
  return pd.DataFrame({'id' : []})
191
 
192
-
193
  result = ca.sql(sql_query).execute()
194
  if result.empty :
195
  explanation = "This query did not return any results. Please try again with a different query."
@@ -204,11 +190,17 @@ def run_sql(query,color_choice):
204
  elif ("id" and "geom" in result.columns):
205
  style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
206
  legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
 
 
 
 
 
207
  m.add_legend(legend_dict=legend_d, position='bottom-left')
208
  m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
209
  m.fit_bounds(result.total_bounds.tolist())
210
  result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
211
  else:
 
212
  st.write(result) # if we aren't mapping, just print out the data
213
 
214
  with st.popover("Explanation"):
@@ -229,13 +221,12 @@ def summary_table_sql(ca, column, colors, ids): # get df for charts + df_tab for
229
 
230
  chatbot_toggles = {key: False for key in [
231
  'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
232
- 'percent_fire_10yr', 'percent_rxburn_10yr', 'percent_disadvantaged',
233
- 'svi', 'svi_socioeconomic_status', 'svi_household_char',
234
- 'svi_racial_ethnic_minority', 'svi_housing_transit',
235
- 'deforest_carbon', 'human_impact'
236
  ]}
237
 
238
 
 
239
  #############
240
 
241
 
@@ -245,7 +236,6 @@ with st.sidebar:
245
 
246
  color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
247
  colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
248
- # alpha = st.slider("transparency", 0.0, 1.0, 0.7)
249
  alpha = 0.8
250
  st.divider()
251
 
@@ -303,7 +293,6 @@ with st.container():
303
  st.stop()
304
 
305
 
306
-
307
  #### Data layers
308
  with st.sidebar:
309
  st.markdown('<p class = "medium-font-sidebar"> Data Layers:</p>', help = "Select data layers to visualize on the map. Summary charts will update based on the displayed layers.", unsafe_allow_html= True)
@@ -315,7 +304,6 @@ with st.sidebar:
315
 
316
  if show_richness:
317
  m.add_tile_layer(url_sr, name="MOBI Species Richness",opacity=a_bio)
318
-
319
  if show_rsr:
320
  m.add_tile_layer(url_rsr, name="MOBI Range-Size Rarity", opacity=a_bio)
321
 
@@ -330,72 +318,41 @@ with st.sidebar:
330
 
331
  if show_manageable_carbon:
332
  m.add_cog_layer(url_man_carbon, palette="purples", name="Manageable Carbon", opacity = a_climate, fit_bounds=False)
333
-
334
 
335
- # Justice40 Section
336
- with st.expander("🌱 Climate & Economic Justice"):
337
- a_justice = st.slider("transparency", 0.0, 1.0, 0.07, key = "social justice")
338
- show_justice40 = st.toggle("Disadvantaged Communities (Justice40)", key = "percent_disadvantaged", value=chatbot_toggles['percent_disadvantaged'])
339
-
340
- if show_justice40:
341
- m.add_pmtiles(url_justice40, style=justice40_style, name="Justice40", opacity=a_justice, tooltip=False, fit_bounds = False)
342
 
343
- # SVI Section
344
- with st.expander("🏡 Social Vulnerability"):
345
- a_svi = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
 
 
346
  show_sv = st.toggle("Social Vulnerability Index (SVI)", key = "svi", value=chatbot_toggles['svi'])
347
- show_sv_socio = st.toggle("Socioeconomic Status", key = "svi_socioeconomic_status", value=chatbot_toggles['svi_socioeconomic_status'])
348
- show_sv_household = st.toggle("Household Characteristics", key = "svi_household_char", value=chatbot_toggles['svi_household_char'])
349
- show_sv_minority = st.toggle("Racial & Ethnic Minority Status", key = "svi_racial_ethnic_minority", value=chatbot_toggles['svi_racial_ethnic_minority'])
350
- show_sv_housing = st.toggle("Housing Type & Transportation", key = "svi_housing_transit", value=chatbot_toggles['svi_housing_transit'])
351
 
 
 
 
352
  if show_sv:
353
- m.add_pmtiles(url_svi, style = get_sv_style("RPL_THEMES"), opacity=a_svi, tooltip=False, fit_bounds = False)
354
-
355
- if show_sv_socio:
356
- m.add_pmtiles(url_svi, style = get_sv_style("RPL_THEME1"), opacity=a_svi, tooltip=False, fit_bounds = False)
357
-
358
- if show_sv_household:
359
- m.add_pmtiles(url_svi, style = get_sv_style("RPL_THEME2"), opacity=a_svi, tooltip=False, fit_bounds = False)
360
-
361
- if show_sv_minority:
362
- m.add_pmtiles(url_svi, style = get_sv_style("RPL_THEME3"), opacity=a_svi, tooltip=False, fit_bounds = False)
363
 
364
- if show_sv_housing:
365
- m.add_pmtiles(url_svi, style = get_sv_style("RPL_THEME4"), opacity=a_svi, tooltip=False, fit_bounds = False)
366
-
367
  # Fire Section
368
  with st.expander("🔥 Fire"):
369
- a_fire = st.slider("transparency", 0.0, 1.0, 0.15, key = "fire")
370
- show_fire_10 = st.toggle("Fires (2013-2022)", key = "percent_fire_10yr", value=chatbot_toggles['percent_fire_10yr'])
371
 
372
- show_rx_10 = st.toggle("Prescribed Burns (2013-2022)", key = "percent_rxburn_10yr", value=chatbot_toggles['percent_rxburn_10yr'])
373
 
374
 
375
- if show_fire_10:
376
- m.add_pmtiles(url_calfire, style=fire_style("layer2"), name="CALFIRE Fire Polygons (2013-2022)", opacity=a_fire, tooltip=False, fit_bounds = True)
377
 
378
- if show_rx_10:
379
- m.add_pmtiles(url_rxburn, style=rx_style("layer2"), name="CAL FIRE Prescribed Burns (2013-2022)", opacity=a_fire, tooltip=False, fit_bounds = True)
380
 
381
 
382
- # HI Section
383
- with st.expander("🚜 Human Impacts"):
384
- a_hi = st.slider("transparency", 0.0, 1.0, 0.1, key = "hi")
385
- show_carbon_lost = st.toggle("Deforested Carbon", key = "deforest_carbon", value=chatbot_toggles['deforest_carbon'])
386
- show_human_impact = st.toggle("Human Footprint", key = "human_impact", value=chatbot_toggles['human_impact'])
387
-
388
- if show_carbon_lost:
389
- m.add_tile_layer(url_loss_carbon, name="Deforested Carbon (2002-2022)", opacity = a_hi)
390
-
391
- if show_human_impact:
392
- m.add_cog_layer(url_hi, name="Human Footprint (2017-2021)", opacity = a_hi, fit_bounds=False)
393
-
394
  st.divider()
395
  st.markdown('<p class = "medium-font-sidebar"> Filters:</p>', help = "Apply filters to adjust what data is shown on the map.", unsafe_allow_html= True)
396
  for label in style_options: # get selected filters (based on the buttons selected)
397
  with st.expander(label):
398
- if label == "GAP Status Code": # gap code 1 and 2 are on by default
399
  opts = getButtons(style_options, label, default_gap)
400
  else: # other buttons are not on by default.
401
  opts = getButtons(style_options, label)
@@ -408,7 +365,7 @@ with st.sidebar:
408
  else:
409
  filter_cols = []
410
  filter_vals = []
411
-
412
  st.divider()
413
  st.markdown("""
414
  <p class="medium-font-sidebar">
@@ -419,16 +376,23 @@ with st.sidebar:
419
  if 'out' not in locals():
420
  style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
421
  legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
422
- m.add_legend(legend_dict = legend_d, position = 'bottom-left')
423
- m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds = True)
424
-
425
 
 
 
 
 
 
 
 
426
 
 
427
  column = select_column[color_choice]
428
 
429
  select_colors = {
430
  "Year": year["stops"],
431
- "GAP Status Code": gap["stops"],
 
 
432
  "Manager Type": manager["stops"],
433
  "Easement": easement["stops"],
434
  "Access Type": access["stops"],
@@ -440,6 +404,7 @@ colors = (
440
  .to_pandas()
441
  )
442
 
 
443
  # get summary tables used for charts + printed table
444
  # df - charts; df_tab - printed table (omits colors)
445
  if 'out' not in locals():
@@ -451,20 +416,14 @@ total_percent = df.percent_protected.sum().round(2)
451
 
452
 
453
  # charts displayed based on color_by variable
454
- richness_chart = bar_chart(df, column, 'mean_richness', "Species Richness")
455
- rsr_chart = bar_chart(df, column, 'mean_rsr', "Range-Size Rarity")
456
- irr_carbon_chart = bar_chart(df, column, 'mean_irrecoverable_carbon', "Irrecoverable Carbon")
457
- man_carbon_chart = bar_chart(df, column, 'mean_manageable_carbon', "Manageable Carbon")
458
- fire_10_chart = bar_chart(df, column, 'mean_percent_fire_10yr', "Fires (2013-2022)")
459
- rx_10_chart = bar_chart(df, column, 'mean_percent_rxburn_10yr',"Prescribed Burns (2013-2022)")
460
- justice40_chart = bar_chart(df, column, 'mean_percent_disadvantaged', "Disadvantaged Communities (Justice40)")
461
- svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index")
462
- svi_socio_chart = bar_chart(df, column, 'mean_svi_socioeconomic_status', "SVI - Socioeconomic Status")
463
- svi_house_chart = bar_chart(df, column, 'mean_svi_household_char', "SVI - Household Characteristics")
464
- svi_minority_chart = bar_chart(df, column, 'mean_svi_racial_ethnic_minority', "SVI - Racial and Ethnic Minority")
465
- svi_transit_chart = bar_chart(df, column, 'mean_svi_housing_transit', "SVI - Housing Type and Transit")
466
- carbon_loss_chart = bar_chart(df, column, 'mean_carbon_lost', "Deforested Carbon (2002-2022)")
467
- hi_chart = bar_chart(df, column, 'mean_human_impact', "Human Footprint (2017-2021)")
468
 
469
 
470
  main = st.container()
@@ -475,7 +434,7 @@ with main:
475
  with map_col:
476
  m.to_streamlit(height=650)
477
  if 'out' not in locals():
478
- st.dataframe(df_tab, use_container_width = True)
479
  else:
480
  st.dataframe(out, use_container_width = True)
481
 
@@ -486,71 +445,36 @@ with main:
486
  st.altair_chart(area_plot(df, column), use_container_width=True)
487
 
488
  if show_richness:
489
- # "Species Richness"
490
  st.altair_chart(richness_chart, use_container_width=True)
491
 
492
  if show_rsr:
493
- # "Range-Size Rarity"
494
  st.altair_chart(rsr_chart, use_container_width=True)
495
 
496
  if show_irrecoverable_carbon:
497
- # "Irrecoverable Carbon"
498
  st.altair_chart(irr_carbon_chart, use_container_width=True)
499
 
500
  if show_manageable_carbon:
501
- # "Manageable Carbon"
502
  st.altair_chart(man_carbon_chart, use_container_width=True)
503
 
504
- if show_fire_10:
505
- # "Fires (2013-2022)"
506
- st.altair_chart(fire_10_chart, use_container_width=True)
507
-
508
- if show_rx_10:
509
- # "Prescribed Burns (2013-2022)"
510
- st.altair_chart(rx_10_chart, use_container_width=True)
511
-
512
  if show_justice40:
513
- # "Disadvantaged Communities (Justice40)"
514
  st.altair_chart(justice40_chart, use_container_width=True)
515
 
516
  if show_sv:
517
- # "Social Vulnerability Index"
518
  st.altair_chart(svi_chart, use_container_width=True)
519
-
520
- if show_sv_socio:
521
- # "SVI - Socioeconomic Status"
522
- st.altair_chart(svi_socio_chart, use_container_width=True)
523
-
524
- if show_sv_household:
525
- # "SVI - Household Characteristics"
526
- st.altair_chart(svi_house_chart, use_container_width=True)
527
-
528
- if show_sv_minority:
529
- # "SVI - Racial and Ethnic Minority"
530
- st.altair_chart(svi_minority_chart, use_container_width=True)
531
-
532
- if show_sv_housing:
533
- # "SVI - Housing Type and Transit"
534
- st.altair_chart(svi_transit_chart, use_container_width=True)
535
-
536
- if show_carbon_lost:
537
- # "Deforested Carbon (2002-2022)"
538
- st.altair_chart(carbon_loss_chart, use_container_width=True)
539
-
540
- if show_human_impact:
541
- # "Human Footprint (2017-2021)"
542
- st.altair_chart(hi_chart, use_container_width=True)
543
-
544
 
 
 
 
 
 
545
 
 
546
 
547
 
548
  st.caption("***The label 'established' is inferred from the California Protected Areas Database, which may introduce artifacts. For details on our methodology, please refer to our code: https://github.com/boettiger-lab/ca-30x30.")
549
 
550
  st.caption("***Under California’s 30x30 framework, only GAP codes 1 and 2 are counted toward the conservation goal.")
551
 
552
-
553
-
554
  st.divider()
555
 
556
  with open('app/footer.md', 'r') as file:
 
18
  from variables import *
19
  from utils import *
20
 
 
 
 
 
 
 
 
 
 
 
21
  ## Create the table from remote parquet only if it doesn't already exist on disk
 
22
  con = ibis.duckdb.connect("duck.db", extensions=["spatial"])
23
  current_tables = con.list_tables()
24
+
25
  if "mydata" not in set(current_tables):
26
  tbl = con.read_parquet(ca_parquet)
27
  con.create_table("mydata", tbl)
28
+
29
  ca = con.table("mydata")
30
 
31
+
32
  for key in [
33
  'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
34
+ 'fire', 'rxburn', 'disadvantaged_communities',
35
+ 'svi']:
 
 
 
36
  if key not in st.session_state:
37
  st.session_state[key] = False
 
38
 
39
  st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
40
 
 
95
  unsafe_allow_html=True,
96
  )
97
 
98
+
99
  st.markdown(
100
  """
101
  <style>
 
142
  template = file.read()
143
 
144
  from langchain_openai import ChatOpenAI
145
+
146
+ llm = ChatOpenAI(model = "kosbu/Llama-3.3-70B-Instruct-AWQ", api_key="cirrus-vllm-secret-api-key", base_url = "https://llm.cirrus.carlboettiger.info/v1/", temperature=0)
147
+ # llm = ChatOpenAI(model="gpt-4", temperature=0)
 
 
148
 
149
  managers = ca.sql("SELECT DISTINCT manager FROM mydata;").execute()
150
  names = ca.sql("SELECT name FROM mydata GROUP BY name HAVING SUM(acres) >10000;").execute()
151
+ ecoregions = ca.sql("SELECT DISTINCT ecoregion FROM mydata;").execute()
152
 
153
  from langchain_core.prompts import ChatPromptTemplate
154
  prompt = ChatPromptTemplate.from_messages([
155
  ("system", template),
156
  ("human", "{input}")
157
+ ]).partial(dialect="duckdb", table_info = ca.schema(), managers = managers, names = names, ecoregions = ecoregions)
158
 
159
  structured_llm = llm.with_structured_output(SQLResponse)
160
  few_shot_structured_llm = prompt | structured_llm
 
171
  output = few_shot_structured_llm.invoke(query)
172
  sql_query = output.sql_query
173
  explanation =output.explanation
174
+
175
  if not sql_query: # if the chatbot can't generate a SQL query.
176
  st.success(explanation)
177
  return pd.DataFrame({'id' : []})
178
 
 
179
  result = ca.sql(sql_query).execute()
180
  if result.empty :
181
  explanation = "This query did not return any results. Please try again with a different query."
 
190
  elif ("id" and "geom" in result.columns):
191
  style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
192
  legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
193
+
194
+ # shorten legend for ecoregions
195
+ if color_choice == "Ecoregion":
196
+ legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
197
+
198
  m.add_legend(legend_dict=legend_d, position='bottom-left')
199
  m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
200
  m.fit_bounds(result.total_bounds.tolist())
201
  result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
202
  else:
203
+
204
  st.write(result) # if we aren't mapping, just print out the data
205
 
206
  with st.popover("Explanation"):
 
221
 
222
  chatbot_toggles = {key: False for key in [
223
  'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
224
+ 'fire', 'rxburn', 'disadvantaged_communities',
225
+ 'svi',
 
 
226
  ]}
227
 
228
 
229
+
230
  #############
231
 
232
 
 
236
 
237
  color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
238
  colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
 
239
  alpha = 0.8
240
  st.divider()
241
 
 
293
  st.stop()
294
 
295
 
 
296
  #### Data layers
297
  with st.sidebar:
298
  st.markdown('<p class = "medium-font-sidebar"> Data Layers:</p>', help = "Select data layers to visualize on the map. Summary charts will update based on the displayed layers.", unsafe_allow_html= True)
 
304
 
305
  if show_richness:
306
  m.add_tile_layer(url_sr, name="MOBI Species Richness",opacity=a_bio)
 
307
  if show_rsr:
308
  m.add_tile_layer(url_rsr, name="MOBI Range-Size Rarity", opacity=a_bio)
309
 
 
318
 
319
  if show_manageable_carbon:
320
  m.add_cog_layer(url_man_carbon, palette="purples", name="Manageable Carbon", opacity = a_climate, fit_bounds=False)
 
321
 
 
 
 
 
 
 
 
322
 
323
+ # People Section
324
+
325
+ with st.expander("👤 People"):
326
+ a_people = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
327
+ show_justice40 = st.toggle("Disadvantaged Communities (Justice40)", key = "disadvantaged_communities", value=chatbot_toggles['disadvantaged_communities'])
328
  show_sv = st.toggle("Social Vulnerability Index (SVI)", key = "svi", value=chatbot_toggles['svi'])
 
 
 
 
329
 
330
+ if show_justice40:
331
+ m.add_pmtiles(url_justice40, style=justice40_style, name="Justice40", opacity=a_people, tooltip=False, fit_bounds = False)
332
+
333
  if show_sv:
334
+ m.add_pmtiles(url_svi, style = svi_style, opacity=a_people, tooltip=False, fit_bounds = False)
 
 
 
 
 
 
 
 
 
335
 
 
 
 
336
  # Fire Section
337
  with st.expander("🔥 Fire"):
338
+ a_fire = st.slider("transparency", 0.0, 1.0, 0.15, key = "calfire")
339
+ show_fire = st.toggle("Fires (2013-2023)", key = "fire", value=chatbot_toggles['fire'])
340
 
341
+ show_rxburn = st.toggle("Prescribed Burns (2013-2023)", key = "rxburn", value=chatbot_toggles['rxburn'])
342
 
343
 
344
+ if show_fire:
345
+ m.add_pmtiles(url_calfire, style=fire_style, name="CALFIRE Fire Polygons (2013-2023)", opacity=a_fire, tooltip=False, fit_bounds = False)
346
 
347
+ if show_rxburn:
348
+ m.add_pmtiles(url_rxburn, style=rx_style, name="CAL FIRE Prescribed Burns (2013-2023)", opacity=a_fire, tooltip=False, fit_bounds = False)
349
 
350
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  st.divider()
352
  st.markdown('<p class = "medium-font-sidebar"> Filters:</p>', help = "Apply filters to adjust what data is shown on the map.", unsafe_allow_html= True)
353
  for label in style_options: # get selected filters (based on the buttons selected)
354
  with st.expander(label):
355
+ if label == "GAP Code": # gap code 1 and 2 are on by default
356
  opts = getButtons(style_options, label, default_gap)
357
  else: # other buttons are not on by default.
358
  opts = getButtons(style_options, label)
 
365
  else:
366
  filter_cols = []
367
  filter_vals = []
368
+
369
  st.divider()
370
  st.markdown("""
371
  <p class="medium-font-sidebar">
 
376
  if 'out' not in locals():
377
  style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
378
  legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
 
 
 
379
 
380
+ # shorten legend for ecoregions
381
+ if color_choice == "Ecoregion":
382
+ legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
383
+
384
+ m.add_legend(legend_dict = legend_d, position = 'bottom-left')
385
+ m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds=True)
386
+
387
 
388
+
389
  column = select_column[color_choice]
390
 
391
  select_colors = {
392
  "Year": year["stops"],
393
+ "GAP Code": gap["stops"],
394
+ "30x30 Status": status["stops"],
395
+ "Ecoregion": ecoregion["stops"],
396
  "Manager Type": manager["stops"],
397
  "Easement": easement["stops"],
398
  "Access Type": access["stops"],
 
404
  .to_pandas()
405
  )
406
 
407
+
408
  # get summary tables used for charts + printed table
409
  # df - charts; df_tab - printed table (omits colors)
410
  if 'out' not in locals():
 
416
 
417
 
418
  # charts displayed based on color_by variable
419
+ richness_chart = bar_chart(df, column, 'mean_richness', "Species Richness (2022)")
420
+ rsr_chart = bar_chart(df, column, 'mean_rsr', "Range-Size Rarity (2022)")
421
+ irr_carbon_chart = bar_chart(df, column, 'mean_irrecoverable_carbon', "Irrecoverable Carbon (2018)")
422
+ man_carbon_chart = bar_chart(df, column, 'mean_manageable_carbon', "Manageable Carbon (2018)")
423
+ fire_10_chart = bar_chart(df, column, 'mean_fire', "Fires (2013-2023)")
424
+ rx_10_chart = bar_chart(df, column, 'mean_rxburn',"Prescribed Burns (2013-2023)")
425
+ justice40_chart = bar_chart(df, column, 'mean_disadvantaged', "Disadvantaged Communities (2021)")
426
+ svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index (2022)")
 
 
 
 
 
 
427
 
428
 
429
  main = st.container()
 
434
  with map_col:
435
  m.to_streamlit(height=650)
436
  if 'out' not in locals():
437
+ st.dataframe(df_tab, use_container_width = True)
438
  else:
439
  st.dataframe(out, use_container_width = True)
440
 
 
445
  st.altair_chart(area_plot(df, column), use_container_width=True)
446
 
447
  if show_richness:
 
448
  st.altair_chart(richness_chart, use_container_width=True)
449
 
450
  if show_rsr:
 
451
  st.altair_chart(rsr_chart, use_container_width=True)
452
 
453
  if show_irrecoverable_carbon:
 
454
  st.altair_chart(irr_carbon_chart, use_container_width=True)
455
 
456
  if show_manageable_carbon:
 
457
  st.altair_chart(man_carbon_chart, use_container_width=True)
458
 
 
 
 
 
 
 
 
 
459
  if show_justice40:
 
460
  st.altair_chart(justice40_chart, use_container_width=True)
461
 
462
  if show_sv:
 
463
  st.altair_chart(svi_chart, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
 
465
+ if show_fire:
466
+ st.altair_chart(fire_10_chart, use_container_width=True)
467
+
468
+ if show_rxburn:
469
+ st.altair_chart(rx_10_chart, use_container_width=True)
470
 
471
+
472
 
473
 
474
  st.caption("***The label 'established' is inferred from the California Protected Areas Database, which may introduce artifacts. For details on our methodology, please refer to our code: https://github.com/boettiger-lab/ca-30x30.")
475
 
476
  st.caption("***Under California’s 30x30 framework, only GAP codes 1 and 2 are counted toward the conservation goal.")
477
 
 
 
478
  st.divider()
479
 
480
  with open('app/footer.md', 'r') as file:
app/footer.md CHANGED
@@ -9,14 +9,10 @@ Data: https://huggingface.co/datasets/boettiger-lab/ca-30x30
9
 
10
  - Imperiled Species Richness and Range-Size-Rarity from NatureServe (2022). Data: https://beta.source.coop/repositories/cboettig/mobi. License CC-BY-NC-ND
11
 
12
- - Irrecoverable Carbon from Conservation International, reprocessed to COG on https://beta.source.coop/cboettig/carbon, citation: https://doi.org/10.1038/s41893-021-00803-6, License: CC-BY-NC
13
-
14
- - Fire polygons by CAL FIRE (2022), reprocessed to PMTiles on https://beta.source.coop/cboettig/fire/. License: Public Domain
15
 
16
  - Climate and Economic Justice Screening Tool, US Council on Environmental Quality, Justice40. Archived description: https://web.archive.org/web/20250121194509/https://screeningtool.geoplatform.gov/en/methodology#3/33.47/-97.5. Data: https://beta.source.coop/repositories/cboettig/justice40/description/, License: Public Domain
17
 
18
- - CDC 2020 Social Vulnerability Index by US County. Archived description: https://web.archive.org/web/20250126095916/https://www.atsdr.cdc.gov/place-health/php/svi/index.html. Data: https://source.coop/repositories/cboettig/social-vulnerability/description. License: Public Domain
19
-
20
- - Carbon-loss by Vizzuality, on https://beta.source.coop/repositories/vizzuality/lg-land-carbon-data. Citation: https://doi.org/10.1101/2023.11.01.565036, License: CC-BY
21
 
22
- - Human Footprint by Vizzuality, on https://beta.source.coop/repositories/vizzuality/hfp-100. Citation: https://doi.org/10.3389/frsen.2023.1130896, License: Public Domain
 
9
 
10
  - Imperiled Species Richness and Range-Size-Rarity from NatureServe (2022). Data: https://beta.source.coop/repositories/cboettig/mobi. License CC-BY-NC-ND
11
 
12
+ - Irrecoverable and Manageable Carbon from Conservation International, reprocessed to COG on https://beta.source.coop/cboettig/carbon, citation: https://doi.org/10.1038/s41893-021-00803-6, License: CC-BY-NC
 
 
13
 
14
  - Climate and Economic Justice Screening Tool, US Council on Environmental Quality, Justice40. Archived description: https://web.archive.org/web/20250121194509/https://screeningtool.geoplatform.gov/en/methodology#3/33.47/-97.5. Data: https://beta.source.coop/repositories/cboettig/justice40/description/, License: Public Domain
15
 
16
+ - CDC 2022 Social Vulnerability Index by US Census Tract. Archived description: https://web.archive.org/web/20250126095916/https://www.atsdr.cdc.gov/place-health/php/svi/index.html. Data: https://source.coop/repositories/cboettig/social-vulnerability/description. License: Public Domain
 
 
17
 
18
+ - Fire and Prescribed Fire by CAL FIRE (2023), reprocessed to PMTiles on https://beta.source.coop/cboettig/fire/. License: Public Domain
app/system_prompt.txt CHANGED
@@ -1,4 +1,4 @@
1
- You are an expert in SQL and an assistant for mapping and analyzing California land data. Given an input question, create a syntactically correct {dialect} query to run, and then provide an explanation of how you answered the input question.
2
 
3
  For example:
4
  {{
@@ -10,17 +10,50 @@ Ensure the response contains only this JSON object, with no additional text, for
10
 
11
  # Important Details
12
 
13
- - For map-related queries (e.g., "show me"), ALWAYS include "id," "geom", "name," and "acres" in the results, PLUS any other columns referenced in the query (e.g., in conditions, calculations, or subqueries). This output structure is MANDATORY for all map-related queries.
 
14
  - ONLY use LIMIT in your SQL queries if the user specifies a quantity (e.g., 'show me 5'). Otherwise, return all matching data without a limit.
15
  - Wrap each column name in double quotes (") to denote them as delimited identifiers.
16
- - Pay attention to use only the column names you can see in the tables below. DO NOT query for columns that do not exist.
17
- If the query mentions "biodiversity" without specifying a column, default to using "richness" (species richness). Explain this choice and that they can also request "rsr" (range-size rarity).
 
 
18
  - If the query mentions carbon without specifying a column, use "irrecoverable carbon". Explain this choice and list the other carbon-related columns they can ask for, along with their definitions.
19
- - If the query asks about the manager, use the "manager" column. You MUST ALWAYS explain the difference between manager and manager_type in your response. Clarify that "manager" refers to the name of the managing entity (e.g., an agency), while "manager_type" specifies the type of jurisdiction (e.g., Federal, State, Non Profit). Also, let the user know they can include "manager_type" in their query if they want to refine their results.
20
- - If the user's query is unclear, DO NOT make assumptions. Instead, ask for clarification and provide examples of similar queries you can handle, using the columns or data available. You MUST ONLY deliver accurate results.
21
- - If you are mapping the data, explicitly state that the data is being visualized on a map. ALWAYS include a statement encouraging the user to examine the queried data below the map, as some areas may be too small at the current zoom level.
 
22
  - Users may not be familiar with this data, so your explanation should be short, clear, and easily understandable. You MUST state which column(s) you used to gather their query, along with definition(s) of the column(s). Do NOT explain SQL commands.
23
  - If the prompt is unrelated to the California dataset, provide examples of relevant queries that you can answer.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # Example Questions and How to Approach Them
26
 
@@ -36,33 +69,33 @@ example_assistant: {{"sql_query":
36
  ## Example:
37
  example_user: "Which gap code has been impacted the most by fire?"
38
  example_assistant: {{"sql_query":
39
- SELECT "reGAP", SUM("percent_fire_10yr") AS temp
40
  FROM mydata
41
- GROUP BY "reGAP"
42
- ORDER BY temp ASC
43
  LIMIT 1;
44
- "explanation":"I used the `percent_fire_10yr` column, which shows the percentage of each area burned over the past 10 years (2013–2022), summing it for each GAP code to find the one with the highest total fire impact."
45
  }}
46
 
47
  ## Example:
48
  example_user: "Who manages the land with the worst biodiversity and highest SVI?"
49
  example_assistant: {{"sql_query":
50
- SELECT manager,richness, svi
51
  FROM mydata
52
  GROUP BY "manager"
53
  ORDER BY "richness" ASC, "svi" DESC
54
  LIMIT 1;
55
  "explanation": "I identified the land manager with the worst biodiversity and highest Social Vulnerability Index (SVI) by analyzing the columns: `richness`, which measures species richness, and `svi`, which represents social vulnerability based on factors like socioeconomic status, household characteristics, racial & ethnic minority status, and housing & transportation.
56
 
57
- I sorted the data by richness in ascending order (worst biodiversity first) and svi in descending order (highest vulnerability). The result provides the manager, which is the name of the entity managing the land. Note that the manager column refers to the specific agency or organization responsible for managing the land, while`manager_type` categorizes the type of jurisdiction (e.g., Federal, State, Non Profit)."
58
  }}
59
 
60
-
61
  ## Example:
62
  example_user: "Show me the biggest protected area"
63
  example_assistant: {{"sql_query":
64
  SELECT "id", "geom", "name", "acres", "manager", "manager_type", "acres"
65
  FROM mydata
 
66
  ORDER BY "acres" DESC
67
  LIMIT 1;
68
  "explanation": "I identified the biggest protected area by sorting the data in descending order based on the `acres` column, which represents the size of each area."
@@ -70,30 +103,28 @@ example_assistant: {{"sql_query":
70
  ## Example:
71
  example_user: "Show me the 50 most biodiverse areas found in disadvantaged communities."
72
  example_assistant: {{"sql_query":
73
- SELECT "id", "geom", "name", "acres", "richness", "percent_disadvantaged" FROM mydata
74
- WHERE "percent_disadvantaged" > 0
75
  ORDER BY "richness" DESC
76
  LIMIT 50;
77
- "explanation": "I used the `richness` column to measure biodiversity and the `percent_disadvantaged` column to identify areas located in disadvantaged communities. The `percent_disadvantaged` value is derived from the Justice40 initiative, which identifies communities burdened by systemic inequities and vulnerabilities across multiple domains, including climate resilience, energy access, health disparities, housing affordability, pollution exposure, transportation infrastructure, water quality, and workforce opportunities.
78
 
79
- The results are sorted in descending order by biodiversity richness (highest biodiversity first), and only areas with a `percent_disadvantaged` value greater than 0 (indicating some portion of the area overlaps with a disadvantaged community) are included."
80
  }}
81
 
82
-
83
  ## Example:
84
  example_user: "Show me federally managed gap 3 lands that are in the top 5% of biodiversity richness and have experienced forest fire over at least 50% of their area"
85
  sql_query:
86
- WITH temp_tab AS (
87
- SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "richness") AS temp
88
  FROM mydata
89
  )
90
- SELECT "id", "geom", "name", "acres","richness", "reGAP", "percent_fire_10yr"
91
  FROM mydata
92
- WHERE "reGAP" = 3
93
- AND "percent_fire_10yr" >= 0.5
94
  and "manager_type" = 'Federal'
95
- AND "richness" > (SELECT temp FROM temp_tab);
96
-
97
 
98
  ## Example:
99
  example_user: "What is the total acreage of areas designated as easements?
@@ -102,18 +133,16 @@ sql_query:
102
  FROM mydata
103
  WHERE "easement" = 'True';
104
 
105
-
106
  ## Example:
107
- example_user: "Show me land where irrecoverable carbon is in the top 10%"
108
  sql_query:
109
- WITH temp_tab AS (
110
- SELECT PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY "irrecoverable_carbon") AS temp
111
  FROM mydata
112
  )
113
- SELECT "id", "geom", "name", "acres", "irrecoverable_carbon"
114
  FROM mydata
115
- WHERE "irrecoverable_carbon" > (SELECT temp FROM temp_tab);
116
-
117
 
118
  ## Example:
119
  example_user: "Show me protected lands in disadvantaged communities that have had prescribed fires in at least 30% of its area."
@@ -124,38 +153,4 @@ sql_query:
124
  AND "percent_rxburn_10yr" >= 0.3;
125
 
126
 
127
- # Detailed Explanation of the Columns in the California Dataset
128
- - "established": The time range which the land was acquired, either "2024" or "pre-2024".
129
- - "reGAP": The GAP status code; corresponds to the level of protection the area has. There are 4 gap codes and are defined as the following.
130
- Status 1: Permanently protected to maintain a natural state, allowing natural disturbances or mimicking them through management.
131
- Status 2: Permanently protected but may allow some uses or management practices that degrade natural communities or suppress natural disturbances.
132
- Status 3: Permanently protected from major land cover conversion but allows some extractive uses (e.g., logging, mining) and protects federally listed species.
133
- Status 4: No protection mandates; land may be converted to unnatural habitat types or its management intent is unknown.
134
-
135
- - "name": The name of a protected area. The user may use a shortened name and/or not capitalize it. For example, "redwoods" may refer to "Redwood National Park", or "klamath" refers to "Klamath National Forest". Another example, "san diego wildlife refuge" could refer to multiple areas, so you would use "WHERE LOWER("name") LIKE '%san diego%' AND LOWER("name") LIKE '%wildlife%' AND LOWER("name") LIKE '%refuge%';" in your SQL query, to ensure that it is case-insensitive and matches any record that includes our phrases, because we don't want to overlook a match. If the name isn't capitalized, you MUST ensure the search is case-insensitive by converting "name" to lowercase.
136
- The names of the largest parks are {names}.
137
- - "access_type": Level of access to the land: "Unknown Access","Restricted Access","No Public Access" and "Open Access".
138
- - "manager": The name of land manager for the area. Also referred to as the agency name. These are the manager names: {managers}. Users might use acronyms or could omit "United States" in the agency name, make sure to use the name used in the table. Some examples: "BLM" or "Bureau of Land Management" refers to the "United States Bureau of Land Management" or "CDFW" is "California Department of Fish and Wildlife". Similar to the "name" field, you can search for managers using "LIKE" in the SQL query.
139
- - "manager_type": The jurisdiction of the land manager: "Federal","State","Non Profit","Special District","Unknown","County","City","Joint","Tribal","Private","HOA". If the user says "non-profit", do not use a hyphen in your query.
140
- - "easement": Boolean value; whether or not the land is an easement.
141
- - "acres": Land acreage; measures the size of the area.
142
- - "id": unique id for each area. This is necessary for displaying queried results on a map.
143
- - "type": Physical type of area, either "Land" or "Water".
144
- - "richness": Species richness; higher values indicate better biodiversity.
145
- - "rsr": Range-size rarity; higher values indicate better rarity metrics.
146
- - "svi": Social Vulnerability Index based on 4 themes: socioeconomic status, household characteristics, racial & ethnic minority status, and housing & transportation. Higher values indicate greater vulnerability.
147
- - Themes:
148
- - "svi_socioeconomic_status": Poverty, unemployment, housing cost burden, education, and health insurance.
149
- - "svi_household_char": Age, disability, single-parent households, and language proficiency.
150
- - "svi_racial_ethnic_minority": Race and ethnicity variables.
151
- - "svi_housing_transit": Housing type, crowding, vehicles, and group quarters.
152
- - "percent_disadvantaged": Justice40-defined disadvantaged communities overburdened by climate, energy, health, housing, pollution, transportation, water, and workforce factors. Higher values indicate more disadvantage. Range is between 0 and 1.
153
- - "deforest_carbon": Carbon emissions due to deforestation.
154
- - "human_impact": A score representing the human footprint: cumulative anthropogenic impacts such as land cover change, population density, and infrastructure.
155
- - "percent_fire_10yr": The percentage of the area burned by fires from (2013-2022). Range is between 0 and 1.
156
- - "percent_rxburn_10yr": The percentage of the area affected by prescribed burns from (2013-2022). Range is between 0 and 1.
157
-
158
- Only use the following tables:
159
- {table_info}.
160
-
161
  Question: {input}
 
1
+ You are an expert in SQL and an assistant for mapping and analyzing California land data, used for the California's 30x30 initiative (protecting 30% of land and coast waters by 2030). Given an input question, create a syntactically correct {dialect} query to run, and then provide an explanation of how you answered the input question. If the question doesn't necessitate a SQL query, only output an explanation.
2
 
3
  For example:
4
  {{
 
10
 
11
  # Important Details
12
 
13
+ - For map-related queries (e.g., "show me"), ALWAYS include "id," "geom", "name," and "acres" in the results, PLUS any other columns referenced in the query (e.g., in conditions, calculations, or subqueries). All columns used in the query MUST be returned in the results. This output structure is MANDATORY for all map-related queries.
14
+ - If the user specifies "protected" land or areas, only return records where "status" is "30x30-conserved" and "other-conserved".
15
  - ONLY use LIMIT in your SQL queries if the user specifies a quantity (e.g., 'show me 5'). Otherwise, return all matching data without a limit.
16
  - Wrap each column name in double quotes (") to denote them as delimited identifiers.
17
+ - Wrap values that are strings in single quotes (') to distinguish them from column names.
18
+ - Pay attention to use only the column names you can see in the tables below. Your SQL queries MUST ONLY use these columns.
19
+ - ONLY write SQL queries using the records and columns that exist in the table. You are given the schema and all distinct values in this prompt.
20
+ - If the query mentions "biodiversity" without specifying a column, default to using "richness" (species richness). Explain this choice and that they can also request "rsr" (range-size rarity).
21
  - If the query mentions carbon without specifying a column, use "irrecoverable carbon". Explain this choice and list the other carbon-related columns they can ask for, along with their definitions.
22
+ - If the query asks about the manager, use the "manager" column. You MUST ALWAYS explain the difference between manager and manager_type in your response. Clarify that "manager" refers to the name of the managing entity (e.g., an agency), while "manager_type" specifies the type of jurisdiction.
23
+ - Users might use shortened labels in their queries. For example, "redwoods" may refer to "Redwood National Park", or "klamath" refers to "Klamath National Forest".
24
+ - Do NOT overlook a match. SQL queries should be case-insensitive and match any record that includes phrases from the user. For example, "san diego wildlife refuge" could refer to multiple areas, so you would use "WHERE LOWER("name") LIKE '%san diego%' AND LOWER("name") LIKE '%wildlife%' AND LOWER("name") LIKE '%refuge%';" in your SQL query.
25
+ - Users might use acronyms or could omit "United States" in the agency name, make sure to use the name used in the table. Some examples: "BLM" or "Bureau of Land Management" refers to the "United States Bureau of Land Management" and "CDFW" is "California Department of Fish and Wildlife".
26
  - Users may not be familiar with this data, so your explanation should be short, clear, and easily understandable. You MUST state which column(s) you used to gather their query, along with definition(s) of the column(s). Do NOT explain SQL commands.
27
  - If the prompt is unrelated to the California dataset, provide examples of relevant queries that you can answer.
28
+ - If the user's query is unclear, DO NOT make assumptions. Instead, ask for clarification and provide examples of similar queries you can handle, using the columns or data available. You MUST ONLY deliver accurate results.
29
+ - Not every query will require SQL code, users may ask more information about values and columns in the table which you can answer based on the information in this prompt. For these cases, your "sql_query" field should be empty.
30
+
31
+ # Column Descriptions
32
+ - "established": The time range which the land was acquired, either "2024" or "pre-2024".
33
+ - "gap_code": The GAP code corresponds to the level of biodiversity protection for an area; GAP 1 has the highest protections whereas GAP 4 has the weakest. There are 4 gap codes and are defined as the following.
34
+ GAP 1: Permanently protected to maintain a natural state, allowing natural disturbances or mimicking them through management.
35
+ GAP 2: Permanently protected but may allow some uses or management practices that degrade natural communities or suppress natural disturbances.
36
+ GAP 3: Permanently protected from major land cover conversion but allows some extractive uses (e.g., logging, mining) and protects federally listed species.
37
+ GAP 4: No protection mandates; land may be converted to unnatural habitat types or its management intent is unknown.
38
+ - "name": The name of the protected area. The names of the largest parks are {names}.
39
+ - "access_type": Level of access to the land: "Unknown Access","Restricted Access","No Public Access" and "Open Access".
40
+ - "manager": The land manager's name, also known as the agency name.These are the manager names: {managers}.
41
+ - "manager_type": The jurisdiction of the land manager: "Federal","State","Non Profit","Special District","Unknown","County","City","Joint","Tribal","Private","HOA". If the user says "non-profit", do not use a hyphen in your query.
42
+ - "easement": Boolean value; whether or not the land is an easement.
43
+ - "acres": Land acreage; measures the size of the area.
44
+ - "id": unique id for each area. This is necessary for displaying queried results on a map.
45
+ - "type": Physical type of area, either "Land" or "Water".
46
+ - "richness": Species richness; higher values indicate better biodiversity.
47
+ - "rsr": Range-size rarity; higher values indicate better rarity metrics.
48
+ - "svi": Social Vulnerability Index based on 4 themes: socioeconomic status, household characteristics, racial & ethnic minority status, and housing & transportation. Higher values indicate greater vulnerability.
49
+ - "disadvantaged_communities": The percentage of overlap that the protected area has with a disadvantaged community. Justice40-defined disadvantaged communities overburdened by climate, energy, health, housing, pollution, transportation, water, and workforce factors. Higher values indicate more disadvantage. Range is between 0 and 1.
50
+ - "fire": The percentage of the area burned by fires from (2013-2022). Areas can burn more than once, thus the percentage can be above 1
51
+ - "rxburn": The percentage of the area affected by prescribed burns from (2013-2022). Areas can be burned more than once.
52
+ - "status": The conservation status. GAP 1 and 2 lands have the highest biodiversity protections and count towards the 30x30 goal, thus are "30x30-conserved". GAP 3 and 4 lands are grouped into "other-conserved", as their biodiversity protections are lower. Areas that aren't protected--that is, they're not GAP 1, 2, 3, or 4--are designed "non-conserved".
53
+ - "ecoregion": Ecoregions are areas with similar ecosystems and environmental resources. The ecoregions in this table are {ecoregions}.
54
+
55
+ Only use the following table:
56
+ {table_info}.
57
 
58
  # Example Questions and How to Approach Them
59
 
 
69
  ## Example:
70
  example_user: "Which gap code has been impacted the most by fire?"
71
  example_assistant: {{"sql_query":
72
+ SELECT "gap_code", SUM("fire") AS total_fire
73
  FROM mydata
74
+ GROUP BY "gap_code"
75
+ ORDER BY total_fire ASC
76
  LIMIT 1;
77
+ "explanation":"I used the `fire` column, which shows the percentage of each area burned over the past 10 years (2013–2022), summing it for each GAP code to find the one with the highest total fire impact."
78
  }}
79
 
80
  ## Example:
81
  example_user: "Who manages the land with the worst biodiversity and highest SVI?"
82
  example_assistant: {{"sql_query":
83
+ SELECT "manager", "richness", "svi"
84
  FROM mydata
85
  GROUP BY "manager"
86
  ORDER BY "richness" ASC, "svi" DESC
87
  LIMIT 1;
88
  "explanation": "I identified the land manager with the worst biodiversity and highest Social Vulnerability Index (SVI) by analyzing the columns: `richness`, which measures species richness, and `svi`, which represents social vulnerability based on factors like socioeconomic status, household characteristics, racial & ethnic minority status, and housing & transportation.
89
 
90
+ I sorted the data by richness in ascending order (worst biodiversity first) and svi in descending order (highest vulnerability). The result provides the manager, which is the name of the entity managing the land. Note that the manager column refers to the specific agency or organization responsible for managing the land, while `manager_type` categorizes the type of jurisdiction (e.g., Federal, State, Non Profit)."
91
  }}
92
 
 
93
  ## Example:
94
  example_user: "Show me the biggest protected area"
95
  example_assistant: {{"sql_query":
96
  SELECT "id", "geom", "name", "acres", "manager", "manager_type", "acres"
97
  FROM mydata
98
+ WHERE "status" = '30x30-conserved'
99
  ORDER BY "acres" DESC
100
  LIMIT 1;
101
  "explanation": "I identified the biggest protected area by sorting the data in descending order based on the `acres` column, which represents the size of each area."
 
103
  ## Example:
104
  example_user: "Show me the 50 most biodiverse areas found in disadvantaged communities."
105
  example_assistant: {{"sql_query":
106
+ SELECT "id", "geom", "name", "acres", "richness", "disadvantaged_communities" FROM mydata
107
+ WHERE "disadvantaged_communities" > 0
108
  ORDER BY "richness" DESC
109
  LIMIT 50;
110
+ "explanation": "I used the `richness` column to measure biodiversity and the `disadvantaged_communities` column to identify areas located in disadvantaged communities. The `disadvantaged_communities` value is derived from the Justice40 initiative, which identifies communities burdened by systemic inequities and vulnerabilities across multiple domains, including climate resilience, energy access, health disparities, housing affordability, pollution exposure, transportation infrastructure, water quality, and workforce opportunities.
111
 
112
+ The results are sorted in descending order by biodiversity richness (highest biodiversity first), and only areas with a `disadvantaged_communities` value greater than 0 (indicating some portion of the area overlaps with a disadvantaged community) are included."
113
  }}
114
 
 
115
  ## Example:
116
  example_user: "Show me federally managed gap 3 lands that are in the top 5% of biodiversity richness and have experienced forest fire over at least 50% of their area"
117
  sql_query:
118
+ WITH temp AS (
119
+ SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "richness") AS richness_95_percentile
120
  FROM mydata
121
  )
122
+ SELECT "id", "geom", "name", "acres", "richness", "gap_code", "fire"
123
  FROM mydata
124
+ WHERE "gap_code" = 3
125
+ AND "fire" >= 0.5
126
  and "manager_type" = 'Federal'
127
+ AND "richness" > (SELECT richness_95_percentile FROM temp);
 
128
 
129
  ## Example:
130
  example_user: "What is the total acreage of areas designated as easements?
 
133
  FROM mydata
134
  WHERE "easement" = 'True';
135
 
 
136
  ## Example:
137
+ example_user: "Which ecoregions are in the top 10% of range-size rarity?"
138
  sql_query:
139
+ WITH temp AS (
140
+ SELECT PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY "rsr") AS rsr_90_percentile
141
  FROM mydata
142
  )
143
+ SELECT "ecoregion"
144
  FROM mydata
145
+ WHERE "rsr" > (SELECT rsr_90_percentile FROM temp);
 
146
 
147
  ## Example:
148
  example_user: "Show me protected lands in disadvantaged communities that have had prescribed fires in at least 30% of its area."
 
153
  AND "percent_rxburn_10yr" >= 0.3;
154
 
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  Question: {input}
app/utils.py CHANGED
@@ -26,16 +26,10 @@ def get_summary(ca, combined_filter, column, colors=None): #summary stats, based
26
  mean_rsr = (_.rsr * _.acres).sum() / _.acres.sum(),
27
  mean_irrecoverable_carbon = (_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
28
  mean_manageable_carbon = (_.manageable_carbon * _.acres).sum() / _.acres.sum(),
29
- mean_percent_fire_10yr = (_.percent_fire_10yr *_.acres).sum()/_.acres.sum(),
30
- mean_percent_rxburn_10yr = (_.percent_rxburn_10yr *_.acres).sum()/_.acres.sum(),
31
- mean_percent_disadvantaged = (_.percent_disadvantaged * _.acres).sum() / _.acres.sum(),
32
  mean_svi = (_.svi * _.acres).sum() / _.acres.sum(),
33
- mean_svi_socioeconomic_status = (_.svi_socioeconomic_status * _.acres).sum() / _.acres.sum(),
34
- mean_svi_household_char = (_.svi_household_char * _.acres).sum() / _.acres.sum(),
35
- mean_svi_racial_ethnic_minority = (_.svi_racial_ethnic_minority * _.acres).sum() / _.acres.sum(),
36
- mean_svi_housing_transit = (_.svi_housing_transit * _.acres).sum() / _.acres.sum(),
37
- mean_carbon_lost = (_.deforest_carbon * _.acres).sum() / _.acres.sum(),
38
- mean_human_impact = (_.human_impact * _.acres).sum() / _.acres.sum(),
39
  )
40
  .mutate(percent_protected=_.percent_protected.round(1))
41
  )
@@ -58,6 +52,10 @@ def summary_table(ca, column, colors, filter_cols, filter_vals,colorby_vals): #
58
  filter_cols.append(column)
59
  filters.append(getattr(_, column).isin(colorby_vals[column]))
60
  combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
 
 
 
 
61
  df = get_summary(ca, combined_filter, [column], colors) # df used for charts
62
  df_tab = get_summary(ca, combined_filter, filter_cols, colors = None) #df used for printed table
63
  return df, df_tab
@@ -69,7 +67,7 @@ def area_plot(df, column): #percent protected pie chart
69
  alt.Theta("percent_protected:Q").stack(True),
70
  )
71
  pie = ( base
72
- .mark_arc(innerRadius= 40, outerRadius=100)
73
  .encode(alt.Color("color:N").scale(None).legend(None),
74
  tooltip=['percent_protected', column])
75
  )
@@ -82,11 +80,13 @@ def area_plot(df, column): #percent protected pie chart
82
 
83
 
84
  def bar_chart(df, x, y, title): #display summary stats for color_by column
85
-
86
  #axis label angles / chart size
87
- if x == "manager_type": #labels are too long, making vertical
88
  angle = 270
89
  height = 373
 
 
 
90
  else: #other labels are horizontal
91
  angle = 0
92
  height = 310
@@ -101,28 +101,26 @@ def bar_chart(df, x, y, title): #display summary stats for color_by column
101
  else:
102
  sort = 'x'
103
 
 
 
 
 
 
 
 
104
  x_title = next(key for key, value in select_column.items() if value == x)
105
- chart = alt.Chart(df).mark_bar().transform_calculate(
106
- access_label=f"replace(datum.{x}, ' Access', '')" #omit access from access_type labels so it fits in frame
107
  ).encode(
108
- x=alt.X("access_label:N",
109
- axis=alt.Axis(labelAngle=angle, title=x_title),
110
  sort=sort),
111
  y=alt.Y(y, axis=alt.Axis()),
112
- color=alt.Color('color').scale(None)
113
- ).properties(width="container", height=height, title = title
114
- )
115
- # sizing for poster
116
- # ).configure_title(
117
- # fontSize=40
118
- # ).configure_axis(
119
- # labelFontSize=24,
120
- # titleFontSize=34
121
- # )
122
  return chart
123
 
124
 
125
-
126
  def getButtons(style_options, style_choice, default_gap=None): #finding the buttons selected to use as filters
127
  column = style_options[style_choice]['property']
128
  opts = [style[0] for style in style_options[style_choice]['stops']]
@@ -137,7 +135,6 @@ def getButtons(style_options, style_choice, default_gap=None): #finding the butt
137
  return d
138
 
139
 
140
-
141
  def getColorVals(style_options, style_choice):
142
  #df_tab only includes filters selected, we need to manually add "color_by" column (if it's not already a filter).
143
  column = style_options[style_choice]['property']
@@ -147,73 +144,6 @@ def getColorVals(style_options, style_choice):
147
  return d
148
 
149
 
150
-
151
- def fire_style(layer):
152
- return {"version": 8,
153
- "sources": {
154
- "source1": {
155
- "type": "vector",
156
- "url": "pmtiles://" + url_calfire,
157
- "attribution": "CAL FIRE"
158
- }
159
- },
160
- "layers": [
161
- {
162
- "id": "fire",
163
- "source": "source1",
164
- "source-layer": layer,
165
- "type": "fill",
166
- "paint": {
167
- "fill-color": "#D22B2B",
168
- }
169
- }
170
- ]
171
- }
172
- def rx_style(layer):
173
- return{
174
- "version": 8,
175
- "sources": {
176
- "source2": {
177
- "type": "vector",
178
- "url": "pmtiles://" + url_rxburn,
179
- "attribution": "CAL FIRE"
180
- }
181
- },
182
- "layers": [
183
- {
184
- "id": "fire",
185
- "source": "source2",
186
- "source-layer": layer,
187
- # "filter": [">=", ["get", "YEAR_"], year],
188
- "type": "fill",
189
- "paint": {
190
- "fill-color": "#702963",
191
- }
192
- }
193
- ]
194
- }
195
-
196
- def get_sv_style(column):
197
- return {
198
- "layers": [
199
- {
200
- "id": "SVI",
201
- "source": column, #need different "source" for multiple pmtiles layers w/ same file
202
- "source-layer": "SVI2020_US_county",
203
- "filter": ["match", ["get", "STATE"], "California", True, False],
204
- "type": "fill",
205
- "paint": {
206
- "fill-color": [
207
- "interpolate", ["linear"], ["get", column],
208
- 0, white,
209
- 1, svi_color
210
- ]
211
- }
212
- }
213
- ]
214
- }
215
-
216
-
217
  def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
218
  filters = []
219
  for col, val in zip(filter_cols, filter_vals):
@@ -231,7 +161,7 @@ def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
231
  {
232
  "id": "ca30x30",
233
  "source": "ca",
234
- "source-layer": "layer",
235
  "type": "fill",
236
  "filter": combined_filters,
237
  "paint": {
@@ -242,6 +172,7 @@ def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
242
  ]
243
  }
244
  return style
 
245
 
246
  def get_pmtiles_style_llm(paint, ids):
247
  combined_filters = ["all", ["match", ["get", "id"], ids, True, False]]
@@ -257,13 +188,12 @@ def get_pmtiles_style_llm(paint, ids):
257
  {
258
  "id": "ca30x30",
259
  "source": "ca",
260
- "source-layer": "layer",
261
  "type": "fill",
262
  "filter": combined_filters,
263
  "paint": {
264
  "fill-color": paint,
265
  "fill-opacity": 1,
266
- # "fill-extrusion-height": 1000
267
  }
268
  }
269
  ]
 
26
  mean_rsr = (_.rsr * _.acres).sum() / _.acres.sum(),
27
  mean_irrecoverable_carbon = (_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
28
  mean_manageable_carbon = (_.manageable_carbon * _.acres).sum() / _.acres.sum(),
29
+ mean_fire = (_.fire *_.acres).sum()/_.acres.sum(),
30
+ mean_rxburn = (_.rxburn *_.acres).sum()/_.acres.sum(),
31
+ mean_disadvantaged = (_.disadvantaged_communities * _.acres).sum() / _.acres.sum(),
32
  mean_svi = (_.svi * _.acres).sum() / _.acres.sum(),
 
 
 
 
 
 
33
  )
34
  .mutate(percent_protected=_.percent_protected.round(1))
35
  )
 
52
  filter_cols.append(column)
53
  filters.append(getattr(_, column).isin(colorby_vals[column]))
54
  combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
55
+
56
+ if column == "status": #need to include non-conserved in summary stats
57
+ combined_filter = (combined_filter) | (_.status.isin(['30x30-conserved','other-conserved','non-conserved']))
58
+
59
  df = get_summary(ca, combined_filter, [column], colors) # df used for charts
60
  df_tab = get_summary(ca, combined_filter, filter_cols, colors = None) #df used for printed table
61
  return df, df_tab
 
67
  alt.Theta("percent_protected:Q").stack(True),
68
  )
69
  pie = ( base
70
+ .mark_arc(innerRadius= 40, outerRadius=100, stroke = 'black', strokeWidth = .5)
71
  .encode(alt.Color("color:N").scale(None).legend(None),
72
  tooltip=['percent_protected', column])
73
  )
 
80
 
81
 
82
  def bar_chart(df, x, y, title): #display summary stats for color_by column
 
83
  #axis label angles / chart size
84
+ if x in ["manager_type",'status']: #labels are too long, making vertical
85
  angle = 270
86
  height = 373
87
+ elif x == 'ecoregion': # make labels vertical and figure taller
88
+ angle = 270
89
+ height = 430
90
  else: #other labels are horizontal
91
  angle = 0
92
  height = 310
 
101
  else:
102
  sort = 'x'
103
 
104
+ # modify label names in bar chart to fit in frame
105
+ label_transform = f"datum.{x}" # default; no change
106
+ if x == "access_type":
107
+ label_transform = f"replace(datum.{x}, ' Access', '')" #omit 'access' from access_type
108
+ elif x == "ecoregion":
109
+ label_transform = f"replace(datum.{x}, 'California', 'CA')" # Replace "California" with "CA"
110
+
111
  x_title = next(key for key, value in select_column.items() if value == x)
112
+ chart = alt.Chart(df).mark_bar(stroke = 'black', strokeWidth = .5).transform_calculate(
113
+ label=label_transform
114
  ).encode(
115
+ x=alt.X("label:N",
116
+ axis=alt.Axis(labelAngle=angle, title=x_title, labelLimit = 200),
117
  sort=sort),
118
  y=alt.Y(y, axis=alt.Axis()),
119
+ color=alt.Color('color').scale(None),
120
+ ).properties(width="container", height=height, title = title)
 
 
 
 
 
 
 
 
121
  return chart
122
 
123
 
 
124
  def getButtons(style_options, style_choice, default_gap=None): #finding the buttons selected to use as filters
125
  column = style_options[style_choice]['property']
126
  opts = [style[0] for style in style_options[style_choice]['stops']]
 
135
  return d
136
 
137
 
 
138
  def getColorVals(style_options, style_choice):
139
  #df_tab only includes filters selected, we need to manually add "color_by" column (if it's not already a filter).
140
  column = style_options[style_choice]['property']
 
144
  return d
145
 
146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
148
  filters = []
149
  for col, val in zip(filter_cols, filter_vals):
 
161
  {
162
  "id": "ca30x30",
163
  "source": "ca",
164
+ "source-layer": "ca30x30",
165
  "type": "fill",
166
  "filter": combined_filters,
167
  "paint": {
 
172
  ]
173
  }
174
  return style
175
+
176
 
177
  def get_pmtiles_style_llm(paint, ids):
178
  combined_filters = ["all", ["match", ["get", "id"], ids, True, False]]
 
188
  {
189
  "id": "ca30x30",
190
  "source": "ca",
191
+ "source-layer": "ca30x30",
192
  "type": "fill",
193
  "filter": combined_filters,
194
  "paint": {
195
  "fill-color": paint,
196
  "fill-opacity": 1,
 
197
  }
198
  }
199
  ]
app/variables.py CHANGED
@@ -1,22 +1,19 @@
1
- # # urls for main layer
2
- ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/bdce1e6767e799abd0d828ebc7208537af6246df/ca-30x30.pmtiles"
3
- ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/dd69c8cbaee47ea2b64c19963177edb6635be5d9/ca-30x30.parquet"
4
 
5
  ca_area_acres = 1.014e8 #acres
6
  style_choice = "GAP Status Code"
7
 
8
-
9
  # urls for additional data layers
10
  url_sr = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/species-richness-ca/{z}/{x}/{y}.png"
11
  url_rsr = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/range-size-rarity/{z}/{x}/{y}.png"
12
  url_irr_carbon = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/ca_irrecoverable_c_2018_cog.tif"
13
  url_man_carbon = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/ca_manageable_c_2018_cog.tif"
14
- url_svi = "https://data.source.coop/cboettig/social-vulnerability/svi2020_us_county.pmtiles"
15
  url_justice40 = "https://data.source.coop/cboettig/justice40/disadvantaged-communities.pmtiles"
16
- url_loss_carbon = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/deforest-carbon-ca/{z}/{x}/{y}.png"
17
- url_hi = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/ca_human_impact_cog.tif"
18
- url_calfire = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/cal_fire_2022.pmtiles"
19
- url_rxburn = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/cal_rxburn_2022.pmtiles"
20
 
21
  # colors for plotting
22
  private_access_color = "#DE881E" # orange
@@ -41,6 +38,7 @@ white = "#FFFFFF"
41
 
42
  # gap codes 3 and 4 are off by default.
43
  default_gap = {
 
44
  3: False,
45
  4: False,
46
  }
@@ -60,7 +58,7 @@ manager = {
60
  ['Joint', joint_color],
61
  ['Tribal', tribal_color],
62
  ['Private', private_color],
63
- ['HOA', hoa_color]
64
  ]
65
  }
66
 
@@ -69,7 +67,7 @@ easement = {
69
  'type': 'categorical',
70
  'stops': [
71
  ['True', private_access_color],
72
- ['False', public_access_color]
73
  ]
74
  }
75
 
@@ -78,7 +76,7 @@ year = {
78
  'type': 'categorical',
79
  'stops': [
80
  ['pre-2024', year2023_color],
81
- ['2024', year2024_color]
82
  ]
83
  }
84
 
@@ -89,12 +87,12 @@ access = {
89
  ['Open Access', public_access_color],
90
  ['No Public Access', private_access_color],
91
  ['Unknown Access', "#bbbbbb"],
92
- ['Restricted Access', tribal_color]
93
  ]
94
  }
95
 
96
  gap = {
97
- 'property': 'reGAP',
98
  'type': 'categorical',
99
  'stops': [
100
  [1, "#26633d"],
@@ -104,9 +102,50 @@ gap = {
104
  ]
105
  }
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  style_options = {
108
  "Year": year,
109
- "GAP Status Code": gap,
 
 
110
  "Manager Type": manager,
111
  "Easement": easement,
112
  "Access Type": access,
@@ -143,12 +182,82 @@ justice40_style = {
143
  }
144
  ]
145
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  select_column = {
148
  "Year": "established",
149
- "GAP Status Code": "reGAP",
 
 
150
  "Manager Type": "manager_type",
151
  "Easement": "easement",
152
- "Access Type": "access_type",
 
153
  }
154
 
 
1
+ # urls for main layer
2
+ ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/8d5d938c04d3206e6bfb04055b5e779c4c28222f/ca-30x30.parquet"
3
+ ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/c58913a279d13c414722c4299b0e0867e923946a/ca-30x30.pmtiles"
4
 
5
  ca_area_acres = 1.014e8 #acres
6
  style_choice = "GAP Status Code"
7
 
 
8
  # urls for additional data layers
9
  url_sr = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/species-richness-ca/{z}/{x}/{y}.png"
10
  url_rsr = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/range-size-rarity/{z}/{x}/{y}.png"
11
  url_irr_carbon = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/ca_irrecoverable_c_2018_cog.tif"
12
  url_man_carbon = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/ca_manageable_c_2018_cog.tif"
 
13
  url_justice40 = "https://data.source.coop/cboettig/justice40/disadvantaged-communities.pmtiles"
14
+ url_calfire = 'https://minio.carlboettiger.info/public-fire/calfire-2023.pmtiles'
15
+ url_rxburn = 'https://minio.carlboettiger.info/public-fire/calfire-rxburn-2023.pmtiles'
16
+ url_svi = 'https://minio.carlboettiger.info/public-data/social-vulnerability/2022/SVI2022_US_tract.pmtiles'
 
17
 
18
  # colors for plotting
19
  private_access_color = "#DE881E" # orange
 
38
 
39
  # gap codes 3 and 4 are off by default.
40
  default_gap = {
41
+ 0: False,
42
  3: False,
43
  4: False,
44
  }
 
58
  ['Joint', joint_color],
59
  ['Tribal', tribal_color],
60
  ['Private', private_color],
61
+ ['HOA', hoa_color],
62
  ]
63
  }
64
 
 
67
  'type': 'categorical',
68
  'stops': [
69
  ['True', private_access_color],
70
+ ['False', public_access_color],
71
  ]
72
  }
73
 
 
76
  'type': 'categorical',
77
  'stops': [
78
  ['pre-2024', year2023_color],
79
+ ['2024', year2024_color],
80
  ]
81
  }
82
 
 
87
  ['Open Access', public_access_color],
88
  ['No Public Access', private_access_color],
89
  ['Unknown Access', "#bbbbbb"],
90
+ ['Restricted Access', tribal_color],
91
  ]
92
  }
93
 
94
  gap = {
95
+ 'property': 'gap_code',
96
  'type': 'categorical',
97
  'stops': [
98
  [1, "#26633d"],
 
102
  ]
103
  }
104
 
105
+ status = {
106
+ 'property': 'status',
107
+ 'type': 'categorical',
108
+ 'stops': [
109
+ ['30x30-conserved', "#26633d"],
110
+ ['other-conserved', "#879647"],
111
+ ['non-conserved', white]
112
+ ]
113
+ }
114
+
115
+
116
+ ecoregion = {
117
+ 'property': 'ecoregion',
118
+ 'type': 'categorical',
119
+ 'stops': [
120
+ ['Sierra Nevada Foothills', "#1f77b4"],
121
+ ['Southern Cascades', "#ff7f0e"],
122
+ ['Southeastern Great Basin', "#2ca02c"],
123
+ ['Southern California Mountains and Valleys', "#d62728"],
124
+ ['Sonoran Desert', "#9467bd"],
125
+ ['Northwestern Basin and Range', "#8c564b"],
126
+ ['Colorado Desert', "#e377c2"],
127
+ ['Central Valley Coast Ranges', "#7f7f7f"],
128
+ ['Great Valley (South)', "#bcbd22"],
129
+ ['Sierra Nevada', "#17becf"],
130
+ ['Northern California Coast Ranges', "#aec7e8"],
131
+ ['Northern California Interior Coast Ranges', "#ffbb78"],
132
+ ['Mojave Desert', "#98df8a"],
133
+ ['Mono', "#ff9896"],
134
+ ['Southern California Coast', "#c5b0d5"],
135
+ ['Modoc Plateau', "#c49c94"],
136
+ ['Klamath Mountains', "#f7b6d2"],
137
+ ['Northern California Coast', "#c7c7c7"],
138
+ ['Great Valley (North)', "#dbdb8d"],
139
+ ['Central California Coast', "#9edae5"],
140
+ ]
141
+ }
142
+
143
+
144
  style_options = {
145
  "Year": year,
146
+ "GAP Code": gap,
147
+ "30x30 Status": status,
148
+ "Ecoregion": ecoregion,
149
  "Manager Type": manager,
150
  "Easement": easement,
151
  "Access Type": access,
 
182
  }
183
  ]
184
  }
185
+ fire_style = {"version": 8,
186
+ "sources": {
187
+ "source1": {
188
+ "type": "vector",
189
+ "url": "pmtiles://" + url_calfire,
190
+ "attribution": "CAL FIRE"
191
+ }
192
+ },
193
+ "layers": [
194
+ {
195
+ "id": "fire",
196
+ "source": "source1",
197
+ "source-layer": 'calfire2023',
198
+ "filter": [">=", ["get", "YEAR_"], 2013],
199
+
200
+ "type": "fill",
201
+ "paint": {
202
+ "fill-color": "#D22B2B",
203
+ }
204
+ }
205
+ ]
206
+ }
207
+ rx_style = {
208
+ "version": 8,
209
+ "sources": {
210
+ "source2": {
211
+ "type": "vector",
212
+ "url": "pmtiles://" + url_rxburn,
213
+ "attribution": "CAL FIRE"
214
+ }
215
+ },
216
+ "layers": [
217
+ {
218
+ "id": "rxburn",
219
+ "source": "source2",
220
+ "source-layer": 'calfirerxburn2023',
221
+ "filter": [">=", ["get", "YEAR_"], 2013],
222
+ "type": "fill",
223
+ "paint": {
224
+ "fill-color": "#702963",
225
+ }
226
+ }
227
+ ]
228
+ }
229
+
230
+
231
+ svi_style = {
232
+ "layers": [
233
+ {
234
+ "id": "svi",
235
+ "source": "svi",
236
+ "source-layer": "svi",
237
+ "filter": ["match", ["get", "ST_ABBR"], "CA", True, False],
238
+ "type": "fill",
239
+ "paint": {
240
+ "fill-color": [
241
+ "interpolate", ["linear"], ["get", "RPL_THEMES"],
242
+ 0, white,
243
+ 1, svi_color
244
+ ]
245
+ }
246
+ }
247
+ ]
248
+ }
249
+
250
+
251
+
252
 
253
  select_column = {
254
  "Year": "established",
255
+ "GAP Code": "gap_code",
256
+ "30x30 Status": "status",
257
+ "Ecoregion": "ecoregion",
258
  "Manager Type": "manager_type",
259
  "Easement": "easement",
260
+ "Access Type": "access_type"
261
+
262
  }
263
 
preprocess/preprocess.ipynb CHANGED
@@ -10,7 +10,7 @@
10
  },
11
  {
12
  "cell_type": "code",
13
- "execution_count": null,
14
  "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
15
  "metadata": {
16
  "editable": true,
@@ -452,6 +452,10 @@
452
  " gdf_stats = gdf_stats.reset_index() \n",
453
  "\n",
454
  "gdf_stats = gdf_stats.rename(columns ={'ca_id':'id'}) #reverting back to \"id\" col name, since we are finished with exact_extract() \n",
 
 
 
 
455
  "gdf_stats.to_parquet(ca_parquet) # save results "
456
  ]
457
  },
@@ -476,20 +480,19 @@
476
  "hf_upload('ca-30x30.parquet', ca_parquet)\n",
477
  "s3_cp(ca_parquet, \"s3://public-ca30x30/ca-30x30.parquet\", \"minio\")\n",
478
  "\n",
479
- "#to use PMTiles, need to convert to 4326 and geojson\n",
480
- "ca_4326 = (con\n",
481
  " .read_parquet(ca_parquet)\n",
482
- " .mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")) \n",
483
  " .filter(_.status != 'non-conserved') #omitting the non-conserved to only for pmtiles \n",
484
  " )\n",
485
  "\n",
486
  "#can't go directly from parquet -> pmtiles, need to go parquet -> geojson -> pmtiles \n",
487
- "ca_4326.execute().set_crs(\"epsg:4326\").to_file(path + 'ca-30x30.geojson') \n",
488
- "pmtiles = to_pmtiles(path+ 'ca-30x30.geojson', ca_pmtiles)\n",
489
  "\n",
490
  "# upload pmtiles to minio and HF\n",
491
  "hf_upload('ca-30x30.pmtiles', ca_pmtiles)\n",
492
- "s3_cp(ca_pmtiles, \"s3://public-ca30x30/ca-30x30.pmtiles\", \"minio\")\n"
493
  ]
494
  }
495
  ],
 
10
  },
11
  {
12
  "cell_type": "code",
13
+ "execution_count": 1,
14
  "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
15
  "metadata": {
16
  "editable": true,
 
452
  " gdf_stats = gdf_stats.reset_index() \n",
453
  "\n",
454
  "gdf_stats = gdf_stats.rename(columns ={'ca_id':'id'}) #reverting back to \"id\" col name, since we are finished with exact_extract() \n",
455
+ "\n",
456
+ "\n",
457
+ "# reproject to epsg:4326 since that's what pmtiles requires and we want to match that \n",
458
+ "gdf_stats = gdf_stats.to_crs(\"epsg:4326\")\n",
459
  "gdf_stats.to_parquet(ca_parquet) # save results "
460
  ]
461
  },
 
480
  "hf_upload('ca-30x30.parquet', ca_parquet)\n",
481
  "s3_cp(ca_parquet, \"s3://public-ca30x30/ca-30x30.parquet\", \"minio\")\n",
482
  "\n",
483
+ "#to use PMTiles, need to convert to geojson\n",
484
+ "ca_geojson = (con\n",
485
  " .read_parquet(ca_parquet)\n",
 
486
  " .filter(_.status != 'non-conserved') #omitting the non-conserved to only for pmtiles \n",
487
  " )\n",
488
  "\n",
489
  "#can't go directly from parquet -> pmtiles, need to go parquet -> geojson -> pmtiles \n",
490
+ "ca_geojson.execute().to_file(path + 'ca-30x30.geojson') \n",
491
+ "pmtiles = to_pmtiles(path+ 'ca-30x30.geojson', ca_pmtiles, options = ['--extend-zooms-if-still-dropping'])\n",
492
  "\n",
493
  "# upload pmtiles to minio and HF\n",
494
  "hf_upload('ca-30x30.pmtiles', ca_pmtiles)\n",
495
+ "s3_cp(ca_pmtiles, \"s3://public-ca30x30/ca-30x30.pmtiles\", \"minio\")"
496
  ]
497
  }
498
  ],