Spaces:

boettiger-lab
/

ca-30x30

Running

App Files Files Community

Cassie Buhler commited on Feb 11

Commit

78fca1a

unverified ·

2 Parent(s): 1f8b4af 5516f91

Merge pull request #30 from boettiger-lab/feat/app-updates

Browse files

Files changed (6) hide show

app/app.py +66 -142
app/footer.md +3 -7
app/system_prompt.txt +62 -67
app/utils.py +28 -98
app/variables.py +126 -17
preprocess/preprocess.ipynb +10 -7

app/app.py CHANGED Viewed

@@ -18,36 +18,23 @@ from functools import reduce
 from variables import *
 from utils import *
-# Create the duckdb connection directly from the sqlalchemy engine instead.
-# Not as elegant as `ibis.duckdb.connect()` but shares connection with sqlalchemy.
-## Create the engine
-#cwd = pathlib.Path.cwd()
-#connect_args = {'preload_extensions':['spatial']}
-#eng = sqlalchemy.create_engine(f"duckdb:///{cwd}/duck.db",connect_args = connect_args)
-#con = ibis.duckdb.from_connection(eng.raw_connection())
 ## Create the table from remote parquet only if it doesn't already exist on disk
 con = ibis.duckdb.connect("duck.db", extensions=["spatial"])
 current_tables = con.list_tables()
 if "mydata" not in set(current_tables):
     tbl = con.read_parquet(ca_parquet)
     con.create_table("mydata", tbl)
 ca = con.table("mydata")
 for key in [
     'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
-    'percent_fire_10yr', 'percent_rxburn_10yr', 'percent_disadvantaged',
-    'svi', 'svi_socioeconomic_status', 'svi_household_char',
-    'svi_racial_ethnic_minority', 'svi_housing_transit',
-    'deforest_carbon', 'human_impact'
-]:
     if key not in st.session_state:
         st.session_state[key] = False
 st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
@@ -108,6 +95,7 @@ st.markdown(
     unsafe_allow_html=True,
 )
 st.markdown(
     """
     <style>
@@ -154,20 +142,19 @@ with open('app/system_prompt.txt', 'r') as file:
     template = file.read()
 from langchain_openai import ChatOpenAI
-# os.environ["OPENAI_API_KEY"] = st.secrets["LITELLM_KEY"]
-# llm = ChatOpenAI(model="gorilla", temperature=0, base_url="https://llm.nrp-nautilus.io/")
-# llm = ChatOpenAI(model = "llama3", api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io",  temperature=0)
-llm = ChatOpenAI(model="gpt-4", temperature=0)
 managers = ca.sql("SELECT DISTINCT manager FROM mydata;").execute()
 names = ca.sql("SELECT name FROM mydata GROUP BY name HAVING SUM(acres) >10000;").execute()
 from langchain_core.prompts import ChatPromptTemplate
 prompt = ChatPromptTemplate.from_messages([
     ("system", template),
     ("human", "{input}")
-]).partial(dialect="duckdb", table_info = ca.schema(), managers = managers, names = names)
 structured_llm = llm.with_structured_output(SQLResponse)
 few_shot_structured_llm = prompt | structured_llm
@@ -184,12 +171,11 @@ def run_sql(query,color_choice):
     output = few_shot_structured_llm.invoke(query)
     sql_query = output.sql_query
     explanation =output.explanation
     if not sql_query: # if the chatbot can't generate a SQL query.
         st.success(explanation)
         return pd.DataFrame({'id' : []})
     result = ca.sql(sql_query).execute()
     if result.empty :
         explanation = "This query did not return any results. Please try again with a different query."
@@ -204,11 +190,17 @@ def run_sql(query,color_choice):
     elif ("id" and "geom" in result.columns):
         style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
         legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
         m.add_legend(legend_dict=legend_d, position='bottom-left')
         m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
         m.fit_bounds(result.total_bounds.tolist())
         result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
     else:
         st.write(result)  # if we aren't mapping, just print out the data
     with st.popover("Explanation"):
@@ -229,13 +221,12 @@ def summary_table_sql(ca, column, colors, ids): # get df for charts + df_tab for
 chatbot_toggles = {key: False for key in [
     'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
-    'percent_fire_10yr', 'percent_rxburn_10yr', 'percent_disadvantaged',
-    'svi', 'svi_socioeconomic_status', 'svi_household_char',
-    'svi_racial_ethnic_minority', 'svi_housing_transit',
-    'deforest_carbon', 'human_impact'
 ]}
 #############
@@ -245,7 +236,6 @@ with st.sidebar:
     color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
     colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
-    # alpha = st.slider("transparency", 0.0, 1.0, 0.7)
     alpha = 0.8
     st.divider()
@@ -303,7 +293,6 @@ with st.container():
             st.stop()
 #### Data layers
 with st.sidebar:
     st.markdown('<p class = "medium-font-sidebar"> Data Layers:</p>', help = "Select data layers to visualize on the map. Summary charts will update based on the displayed layers.", unsafe_allow_html= True)
@@ -315,7 +304,6 @@ with st.sidebar:
         if show_richness:
             m.add_tile_layer(url_sr, name="MOBI Species Richness",opacity=a_bio)
         if show_rsr:
             m.add_tile_layer(url_rsr, name="MOBI Range-Size Rarity", opacity=a_bio)
@@ -330,72 +318,41 @@ with st.sidebar:
         if show_manageable_carbon:
            m.add_cog_layer(url_man_carbon, palette="purples", name="Manageable Carbon", opacity = a_climate, fit_bounds=False)
-    # Justice40 Section
-    with st.expander("🌱 Climate & Economic Justice"):
-        a_justice = st.slider("transparency", 0.0, 1.0, 0.07, key = "social justice")
-        show_justice40 = st.toggle("Disadvantaged Communities (Justice40)", key = "percent_disadvantaged", value=chatbot_toggles['percent_disadvantaged'])
-        if show_justice40:
-            m.add_pmtiles(url_justice40, style=justice40_style, name="Justice40", opacity=a_justice, tooltip=False, fit_bounds = False)
-    # SVI Section
-    with st.expander("🏡 Social Vulnerability"):
-        a_svi = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
         show_sv = st.toggle("Social Vulnerability Index (SVI)", key = "svi", value=chatbot_toggles['svi'])
-        show_sv_socio = st.toggle("Socioeconomic Status", key = "svi_socioeconomic_status", value=chatbot_toggles['svi_socioeconomic_status'])
-        show_sv_household = st.toggle("Household Characteristics", key = "svi_household_char", value=chatbot_toggles['svi_household_char'])
-        show_sv_minority = st.toggle("Racial & Ethnic Minority Status", key = "svi_racial_ethnic_minority", value=chatbot_toggles['svi_racial_ethnic_minority'])
-        show_sv_housing = st.toggle("Housing Type & Transportation", key = "svi_housing_transit", value=chatbot_toggles['svi_housing_transit'])
         if show_sv:
-            m.add_pmtiles(url_svi, style = get_sv_style("RPL_THEMES"), opacity=a_svi, tooltip=False, fit_bounds = False)
-        if show_sv_socio:
-            m.add_pmtiles(url_svi, style = get_sv_style("RPL_THEME1"), opacity=a_svi, tooltip=False, fit_bounds = False)
-        if show_sv_household:
-            m.add_pmtiles(url_svi, style = get_sv_style("RPL_THEME2"), opacity=a_svi, tooltip=False, fit_bounds = False)
-        if show_sv_minority:
-            m.add_pmtiles(url_svi, style = get_sv_style("RPL_THEME3"), opacity=a_svi, tooltip=False, fit_bounds = False)
-        if show_sv_housing:
-            m.add_pmtiles(url_svi, style = get_sv_style("RPL_THEME4"), opacity=a_svi, tooltip=False, fit_bounds = False)
     # Fire Section
     with st.expander("🔥 Fire"):
-        a_fire = st.slider("transparency", 0.0, 1.0, 0.15, key = "fire")
-        show_fire_10 = st.toggle("Fires (2013-2022)", key = "percent_fire_10yr", value=chatbot_toggles['percent_fire_10yr'])
-        show_rx_10 = st.toggle("Prescribed Burns (2013-2022)", key = "percent_rxburn_10yr", value=chatbot_toggles['percent_rxburn_10yr'])
-        if show_fire_10:
-            m.add_pmtiles(url_calfire, style=fire_style("layer2"), name="CALFIRE Fire Polygons (2013-2022)", opacity=a_fire, tooltip=False, fit_bounds = True)
-        if show_rx_10:
-            m.add_pmtiles(url_rxburn, style=rx_style("layer2"), name="CAL FIRE Prescribed Burns (2013-2022)", opacity=a_fire, tooltip=False, fit_bounds = True)
-    # HI Section
-    with st.expander("🚜 Human Impacts"):
-        a_hi = st.slider("transparency", 0.0, 1.0, 0.1, key = "hi")
-        show_carbon_lost = st.toggle("Deforested Carbon", key = "deforest_carbon", value=chatbot_toggles['deforest_carbon'])
-        show_human_impact = st.toggle("Human Footprint", key = "human_impact", value=chatbot_toggles['human_impact'])
-        if show_carbon_lost:
-            m.add_tile_layer(url_loss_carbon, name="Deforested Carbon (2002-2022)", opacity = a_hi)
-        if show_human_impact:
-            m.add_cog_layer(url_hi, name="Human Footprint (2017-2021)", opacity = a_hi, fit_bounds=False)
     st.divider()
     st.markdown('<p class = "medium-font-sidebar"> Filters:</p>', help = "Apply filters to adjust what data is shown on the map.", unsafe_allow_html= True)
     for label in style_options: # get selected filters (based on the buttons selected)
         with st.expander(label):
-            if label == "GAP Status Code": # gap code 1 and 2 are on by default
                 opts = getButtons(style_options, label, default_gap)
             else: # other buttons are not on by default.
                 opts = getButtons(style_options, label)
@@ -408,7 +365,7 @@ with st.sidebar:
         else:
             filter_cols = []
             filter_vals = []
     st.divider()
     st.markdown("""
     <p class="medium-font-sidebar">
@@ -419,16 +376,23 @@ with st.sidebar:
 if 'out' not in locals():
     style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
     legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
-    m.add_legend(legend_dict = legend_d, position = 'bottom-left')
-    m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds = True)
 column = select_column[color_choice]
 select_colors = {
     "Year": year["stops"],
-    "GAP Status Code": gap["stops"],
     "Manager Type": manager["stops"],
     "Easement": easement["stops"],
     "Access Type": access["stops"],
@@ -440,6 +404,7 @@ colors = (
     .to_pandas()
 )
 # get summary tables used for charts + printed table
 # df - charts; df_tab - printed table (omits colors)
 if 'out' not in locals():
@@ -451,20 +416,14 @@ total_percent = df.percent_protected.sum().round(2)
 # charts displayed based on color_by variable
-richness_chart = bar_chart(df, column, 'mean_richness', "Species Richness")
-rsr_chart = bar_chart(df, column, 'mean_rsr', "Range-Size Rarity")
-irr_carbon_chart = bar_chart(df, column, 'mean_irrecoverable_carbon', "Irrecoverable Carbon")
-man_carbon_chart = bar_chart(df, column, 'mean_manageable_carbon', "Manageable Carbon")
-fire_10_chart = bar_chart(df, column, 'mean_percent_fire_10yr', "Fires (2013-2022)")
-rx_10_chart = bar_chart(df, column, 'mean_percent_rxburn_10yr',"Prescribed Burns (2013-2022)")
-justice40_chart = bar_chart(df, column, 'mean_percent_disadvantaged', "Disadvantaged Communities (Justice40)")
-svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index")
-svi_socio_chart = bar_chart(df, column, 'mean_svi_socioeconomic_status', "SVI - Socioeconomic Status")
-svi_house_chart = bar_chart(df, column, 'mean_svi_household_char', "SVI - Household Characteristics")
-svi_minority_chart = bar_chart(df, column, 'mean_svi_racial_ethnic_minority', "SVI - Racial and Ethnic Minority")
-svi_transit_chart = bar_chart(df, column, 'mean_svi_housing_transit', "SVI - Housing Type and Transit")
-carbon_loss_chart = bar_chart(df, column, 'mean_carbon_lost', "Deforested Carbon (2002-2022)")
-hi_chart = bar_chart(df, column, 'mean_human_impact', "Human Footprint (2017-2021)")
 main = st.container()
@@ -475,7 +434,7 @@ with main:
     with map_col:
         m.to_streamlit(height=650)
         if 'out' not in locals():
-            st.dataframe(df_tab, use_container_width = True)
         else:
             st.dataframe(out, use_container_width = True)
@@ -486,71 +445,36 @@ with main:
             st.altair_chart(area_plot(df, column), use_container_width=True)
             if show_richness:
-                # "Species Richness"
                 st.altair_chart(richness_chart, use_container_width=True)
             if show_rsr:
-                # "Range-Size Rarity"
                 st.altair_chart(rsr_chart, use_container_width=True)
             if show_irrecoverable_carbon:
-                # "Irrecoverable Carbon"
                 st.altair_chart(irr_carbon_chart, use_container_width=True)
             if show_manageable_carbon:
-                # "Manageable Carbon"
                 st.altair_chart(man_carbon_chart, use_container_width=True)
-            if show_fire_10:
-                # "Fires (2013-2022)"
-                st.altair_chart(fire_10_chart, use_container_width=True)
-            if show_rx_10:
-                # "Prescribed Burns (2013-2022)"
-                st.altair_chart(rx_10_chart, use_container_width=True)
             if show_justice40:
-                # "Disadvantaged Communities (Justice40)"
                 st.altair_chart(justice40_chart, use_container_width=True)
             if show_sv:
-                # "Social Vulnerability Index"
                 st.altair_chart(svi_chart, use_container_width=True)
-            if show_sv_socio:
-                # "SVI - Socioeconomic Status"
-                st.altair_chart(svi_socio_chart, use_container_width=True)
-            if show_sv_household:
-                # "SVI - Household Characteristics"
-                st.altair_chart(svi_house_chart, use_container_width=True)
-            if show_sv_minority:
-                # "SVI - Racial and Ethnic Minority"
-                st.altair_chart(svi_minority_chart, use_container_width=True)
-            if show_sv_housing:
-                # "SVI - Housing Type and Transit"
-                st.altair_chart(svi_transit_chart, use_container_width=True)
-            if show_carbon_lost:
-                # "Deforested Carbon (2002-2022)"
-                st.altair_chart(carbon_loss_chart, use_container_width=True)
-            if show_human_impact:
-                # "Human Footprint (2017-2021)"
-                st.altair_chart(hi_chart, use_container_width=True)
 st.caption("***The label 'established' is inferred from the California Protected Areas Database, which may introduce artifacts. For details on our methodology, please refer to our code: https://github.com/boettiger-lab/ca-30x30.")
 st.caption("***Under California’s 30x30 framework, only GAP codes 1 and 2 are counted toward the conservation goal.")
 st.divider()
 with open('app/footer.md', 'r') as file:

 from variables import *
 from utils import *
 ## Create the table from remote parquet only if it doesn't already exist on disk
 con = ibis.duckdb.connect("duck.db", extensions=["spatial"])
 current_tables = con.list_tables()
 if "mydata" not in set(current_tables):
     tbl = con.read_parquet(ca_parquet)
     con.create_table("mydata", tbl)
 ca = con.table("mydata")
 for key in [
     'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
+    'fire', 'rxburn', 'disadvantaged_communities',
+    'svi']:
     if key not in st.session_state:
         st.session_state[key] = False
 st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
     unsafe_allow_html=True,
 )
 st.markdown(
     """
     <style>
     template = file.read()
 from langchain_openai import ChatOpenAI
+llm = ChatOpenAI(model = "kosbu/Llama-3.3-70B-Instruct-AWQ", api_key="cirrus-vllm-secret-api-key", base_url = "https://llm.cirrus.carlboettiger.info/v1/",  temperature=0)
+# llm = ChatOpenAI(model="gpt-4", temperature=0)
 managers = ca.sql("SELECT DISTINCT manager FROM mydata;").execute()
 names = ca.sql("SELECT name FROM mydata GROUP BY name HAVING SUM(acres) >10000;").execute()
+ecoregions = ca.sql("SELECT DISTINCT ecoregion FROM mydata;").execute()
 from langchain_core.prompts import ChatPromptTemplate
 prompt = ChatPromptTemplate.from_messages([
     ("system", template),
     ("human", "{input}")
+]).partial(dialect="duckdb", table_info = ca.schema(), managers = managers, names = names, ecoregions = ecoregions)
 structured_llm = llm.with_structured_output(SQLResponse)
 few_shot_structured_llm = prompt | structured_llm
     output = few_shot_structured_llm.invoke(query)
     sql_query = output.sql_query
     explanation =output.explanation
     if not sql_query: # if the chatbot can't generate a SQL query.
         st.success(explanation)
         return pd.DataFrame({'id' : []})
     result = ca.sql(sql_query).execute()
     if result.empty :
         explanation = "This query did not return any results. Please try again with a different query."
     elif ("id" and "geom" in result.columns):
         style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
         legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
+        # shorten legend for ecoregions
+        if color_choice == "Ecoregion":
+            legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
         m.add_legend(legend_dict=legend_d, position='bottom-left')
         m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
         m.fit_bounds(result.total_bounds.tolist())
         result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
     else:
         st.write(result)  # if we aren't mapping, just print out the data
     with st.popover("Explanation"):
 chatbot_toggles = {key: False for key in [
     'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
+    'fire', 'rxburn', 'disadvantaged_communities',
+    'svi',
 ]}
 #############
     color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
     colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
     alpha = 0.8
     st.divider()
             st.stop()
 #### Data layers
 with st.sidebar:
     st.markdown('<p class = "medium-font-sidebar"> Data Layers:</p>', help = "Select data layers to visualize on the map. Summary charts will update based on the displayed layers.", unsafe_allow_html= True)
         if show_richness:
             m.add_tile_layer(url_sr, name="MOBI Species Richness",opacity=a_bio)
         if show_rsr:
             m.add_tile_layer(url_rsr, name="MOBI Range-Size Rarity", opacity=a_bio)
         if show_manageable_carbon:
            m.add_cog_layer(url_man_carbon, palette="purples", name="Manageable Carbon", opacity = a_climate, fit_bounds=False)
+    # People Section
+    with st.expander("👤 People"):
+        a_people = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
+        show_justice40 = st.toggle("Disadvantaged Communities (Justice40)", key = "disadvantaged_communities", value=chatbot_toggles['disadvantaged_communities'])
         show_sv = st.toggle("Social Vulnerability Index (SVI)", key = "svi", value=chatbot_toggles['svi'])
+        if show_justice40:
+            m.add_pmtiles(url_justice40, style=justice40_style, name="Justice40", opacity=a_people, tooltip=False, fit_bounds = False)
         if show_sv:
+            m.add_pmtiles(url_svi, style = svi_style, opacity=a_people, tooltip=False, fit_bounds = False)
     # Fire Section
     with st.expander("🔥 Fire"):
+        a_fire = st.slider("transparency", 0.0, 1.0, 0.15, key = "calfire")
+        show_fire = st.toggle("Fires (2013-2023)", key = "fire", value=chatbot_toggles['fire'])
+        show_rxburn = st.toggle("Prescribed Burns (2013-2023)", key = "rxburn", value=chatbot_toggles['rxburn'])
+        if show_fire:
+            m.add_pmtiles(url_calfire, style=fire_style, name="CALFIRE Fire Polygons (2013-2023)", opacity=a_fire, tooltip=False, fit_bounds = False)
+        if show_rxburn:
+            m.add_pmtiles(url_rxburn, style=rx_style, name="CAL FIRE Prescribed Burns (2013-2023)", opacity=a_fire, tooltip=False, fit_bounds = False)
     st.divider()
     st.markdown('<p class = "medium-font-sidebar"> Filters:</p>', help = "Apply filters to adjust what data is shown on the map.", unsafe_allow_html= True)
     for label in style_options: # get selected filters (based on the buttons selected)
         with st.expander(label):
+            if label == "GAP Code": # gap code 1 and 2 are on by default
                 opts = getButtons(style_options, label, default_gap)
             else: # other buttons are not on by default.
                 opts = getButtons(style_options, label)
         else:
             filter_cols = []
             filter_vals = []
     st.divider()
     st.markdown("""
     <p class="medium-font-sidebar">
 if 'out' not in locals():
     style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
     legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
+    # shorten legend for ecoregions
+    if color_choice == "Ecoregion":
+        legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
+    m.add_legend(legend_dict = legend_d, position = 'bottom-left')
+    m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds=True)
 column = select_column[color_choice]
 select_colors = {
     "Year": year["stops"],
+    "GAP Code": gap["stops"],
+    "30x30 Status": status["stops"],
+    "Ecoregion": ecoregion["stops"],
     "Manager Type": manager["stops"],
     "Easement": easement["stops"],
     "Access Type": access["stops"],
     .to_pandas()
 )
 # get summary tables used for charts + printed table
 # df - charts; df_tab - printed table (omits colors)
 if 'out' not in locals():
 # charts displayed based on color_by variable
+richness_chart = bar_chart(df, column, 'mean_richness', "Species Richness (2022)")
+rsr_chart = bar_chart(df, column, 'mean_rsr', "Range-Size Rarity (2022)")
+irr_carbon_chart = bar_chart(df, column, 'mean_irrecoverable_carbon', "Irrecoverable Carbon (2018)")
+man_carbon_chart = bar_chart(df, column, 'mean_manageable_carbon', "Manageable Carbon (2018)")
+fire_10_chart = bar_chart(df, column, 'mean_fire', "Fires (2013-2023)")
+rx_10_chart = bar_chart(df, column, 'mean_rxburn',"Prescribed Burns (2013-2023)")
+justice40_chart = bar_chart(df, column, 'mean_disadvantaged', "Disadvantaged Communities (2021)")
+svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index (2022)")
 main = st.container()
     with map_col:
         m.to_streamlit(height=650)
         if 'out' not in locals():
+            st.dataframe(df_tab, use_container_width = True)
         else:
             st.dataframe(out, use_container_width = True)
             st.altair_chart(area_plot(df, column), use_container_width=True)
             if show_richness:
                 st.altair_chart(richness_chart, use_container_width=True)
             if show_rsr:
                 st.altair_chart(rsr_chart, use_container_width=True)
             if show_irrecoverable_carbon:
                 st.altair_chart(irr_carbon_chart, use_container_width=True)
             if show_manageable_carbon:
                 st.altair_chart(man_carbon_chart, use_container_width=True)
             if show_justice40:
                 st.altair_chart(justice40_chart, use_container_width=True)
             if show_sv:
                 st.altair_chart(svi_chart, use_container_width=True)
+            if show_fire:
+                st.altair_chart(fire_10_chart, use_container_width=True)
+            if show_rxburn:
+                st.altair_chart(rx_10_chart, use_container_width=True)
 st.caption("***The label 'established' is inferred from the California Protected Areas Database, which may introduce artifacts. For details on our methodology, please refer to our code: https://github.com/boettiger-lab/ca-30x30.")
 st.caption("***Under California’s 30x30 framework, only GAP codes 1 and 2 are counted toward the conservation goal.")
 st.divider()
 with open('app/footer.md', 'r') as file:

app/footer.md CHANGED Viewed

@@ -9,14 +9,10 @@ Data: https://huggingface.co/datasets/boettiger-lab/ca-30x30
 - Imperiled Species Richness and Range-Size-Rarity from NatureServe (2022). Data: https://beta.source.coop/repositories/cboettig/mobi. License CC-BY-NC-ND
-- Irrecoverable Carbon from Conservation International, reprocessed to COG on https://beta.source.coop/cboettig/carbon, citation: https://doi.org/10.1038/s41893-021-00803-6, License: CC-BY-NC
-- Fire polygons by CAL FIRE (2022), reprocessed to PMTiles on https://beta.source.coop/cboettig/fire/. License: Public Domain
 - Climate and Economic Justice Screening Tool, US Council on Environmental Quality, Justice40. Archived description: https://web.archive.org/web/20250121194509/https://screeningtool.geoplatform.gov/en/methodology#3/33.47/-97.5. Data: https://beta.source.coop/repositories/cboettig/justice40/description/, License: Public Domain
-- CDC 2020 Social Vulnerability Index by US County. Archived description: https://web.archive.org/web/20250126095916/https://www.atsdr.cdc.gov/place-health/php/svi/index.html. Data: https://source.coop/repositories/cboettig/social-vulnerability/description. License: Public Domain
-- Carbon-loss by Vizzuality, on https://beta.source.coop/repositories/vizzuality/lg-land-carbon-data. Citation: https://doi.org/10.1101/2023.11.01.565036, License: CC-BY
-- Human Footprint by Vizzuality, on https://beta.source.coop/repositories/vizzuality/hfp-100.  Citation: https://doi.org/10.3389/frsen.2023.1130896, License: Public Domain

 - Imperiled Species Richness and Range-Size-Rarity from NatureServe (2022). Data: https://beta.source.coop/repositories/cboettig/mobi. License CC-BY-NC-ND
+- Irrecoverable and Manageable Carbon from Conservation International, reprocessed to COG on https://beta.source.coop/cboettig/carbon, citation: https://doi.org/10.1038/s41893-021-00803-6, License: CC-BY-NC
 - Climate and Economic Justice Screening Tool, US Council on Environmental Quality, Justice40. Archived description: https://web.archive.org/web/20250121194509/https://screeningtool.geoplatform.gov/en/methodology#3/33.47/-97.5. Data: https://beta.source.coop/repositories/cboettig/justice40/description/, License: Public Domain
+- CDC 2022 Social Vulnerability Index by US Census Tract. Archived description: https://web.archive.org/web/20250126095916/https://www.atsdr.cdc.gov/place-health/php/svi/index.html. Data: https://source.coop/repositories/cboettig/social-vulnerability/description. License: Public Domain
+- Fire and Prescribed Fire by CAL FIRE (2023), reprocessed to PMTiles on https://beta.source.coop/cboettig/fire/. License: Public Domain

app/system_prompt.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-You are an expert in SQL and an assistant for mapping and analyzing California land data. Given an input question, create a syntactically correct {dialect} query to run, and then provide an explanation of how you answered the input question.
 For example:
 {{
@@ -10,17 +10,50 @@ Ensure the response contains only this JSON object, with no additional text, for
 # Important Details
-    - For map-related queries (e.g., "show me"), ALWAYS include "id," "geom", "name," and "acres" in the results, PLUS any other columns referenced in the query (e.g., in conditions, calculations, or subqueries). This output structure is MANDATORY for all map-related queries.
     - ONLY use LIMIT in your SQL queries if the user specifies a quantity (e.g., 'show me 5'). Otherwise, return all matching data without a limit.
     - Wrap each column name in double quotes (") to denote them as delimited identifiers.
-    - Pay attention to use only the column names you can see in the tables below. DO NOT query for columns that do not exist.
-    If the query mentions "biodiversity" without specifying a column, default to using "richness" (species richness). Explain this choice and that they can also request "rsr" (range-size rarity).
     - If the query mentions carbon without specifying a column, use "irrecoverable carbon". Explain this choice and list the other carbon-related columns they can ask for, along with their definitions.
-    - If the query asks about the manager, use the "manager" column. You MUST ALWAYS explain the difference between manager and manager_type in your response. Clarify that "manager" refers to the name of the managing entity (e.g., an agency), while "manager_type" specifies the type of jurisdiction (e.g., Federal, State, Non Profit). Also, let the user know they can include "manager_type" in their query if they want to refine their results.
-    - If the user's query is unclear, DO NOT make assumptions. Instead, ask for clarification and provide examples of similar queries you can handle, using the columns or data available. You MUST ONLY deliver accurate results.
-    - If you are mapping the data, explicitly state that the data is being visualized on a map. ALWAYS include a statement encouraging the user to examine the queried data below the map, as some areas may be too small at the current zoom level.
     - Users may not be familiar with this data, so your explanation should be short, clear, and easily understandable. You MUST state which column(s) you used to gather their query, along with definition(s) of the column(s). Do NOT explain SQL commands.
     - If the prompt is unrelated to the California dataset, provide examples of relevant queries that you can answer.
 # Example Questions and How to Approach Them
@@ -36,33 +69,33 @@ example_assistant: {{"sql_query":
 ## Example:
 example_user: "Which gap code has been impacted the most by fire?"
 example_assistant: {{"sql_query":
-    SELECT "reGAP", SUM("percent_fire_10yr") AS temp
     FROM mydata
-    GROUP BY "reGAP"
-    ORDER BY temp ASC
     LIMIT 1;
-"explanation":"I used the `percent_fire_10yr` column, which shows the percentage of each area burned over the past 10 years (2013–2022), summing it for each GAP code to find the one with the highest total fire impact."
 }}
 ## Example:
 example_user: "Who manages the land with the worst biodiversity and highest SVI?"
 example_assistant: {{"sql_query":
-SELECT manager,richness, svi
     FROM mydata
     GROUP BY "manager"
     ORDER BY "richness" ASC, "svi" DESC
     LIMIT 1;
 "explanation": "I identified the land manager with the worst biodiversity and highest Social Vulnerability Index (SVI) by analyzing the columns: `richness`, which measures species richness, and `svi`, which represents social vulnerability based on factors like socioeconomic status, household characteristics, racial & ethnic minority status, and housing & transportation.
-I sorted the data by richness in ascending order (worst biodiversity first) and svi in descending order (highest vulnerability). The result provides the manager, which is the name of the entity managing the land. Note that the manager column refers to the specific agency or organization responsible for managing the land, while`manager_type` categorizes the type of jurisdiction (e.g., Federal, State, Non Profit)."
 }}
 ## Example:
 example_user: "Show me the biggest protected area"
 example_assistant: {{"sql_query":
     SELECT "id", "geom", "name", "acres", "manager", "manager_type", "acres"
     FROM mydata
     ORDER BY "acres" DESC
     LIMIT 1;
 "explanation": "I identified the biggest protected area by sorting the data in descending order based on the `acres` column, which represents the size of each area."
@@ -70,30 +103,28 @@ example_assistant: {{"sql_query":
 ## Example:
 example_user: "Show me the 50 most biodiverse areas found in disadvantaged communities."
 example_assistant: {{"sql_query":
-    SELECT "id", "geom", "name", "acres", "richness", "percent_disadvantaged" FROM mydata
-    WHERE "percent_disadvantaged" > 0
     ORDER BY "richness" DESC
     LIMIT 50;
-"explanation": "I used the `richness` column to measure biodiversity and the `percent_disadvantaged` column to identify areas located in disadvantaged communities. The `percent_disadvantaged` value is derived from the Justice40 initiative, which identifies communities burdened by systemic inequities and vulnerabilities across multiple domains, including climate resilience, energy access, health disparities, housing affordability, pollution exposure, transportation infrastructure, water quality, and workforce opportunities.
-The results are sorted in descending order by biodiversity richness (highest biodiversity first), and only areas with a `percent_disadvantaged` value greater than 0 (indicating some portion of the area overlaps with a disadvantaged community) are included."
 }}
 ## Example:
 example_user: "Show me federally managed gap 3 lands that are in the top 5% of biodiversity richness and have experienced forest fire over at least 50% of their area"
 sql_query:
-    WITH temp_tab AS (
-        SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "richness") AS temp
         FROM mydata
     )
-    SELECT "id", "geom", "name", "acres","richness", "reGAP", "percent_fire_10yr"
     FROM mydata
-    WHERE "reGAP" = 3
-        AND "percent_fire_10yr" >= 0.5
         and "manager_type" = 'Federal'
-        AND "richness" > (SELECT temp FROM temp_tab);
 ## Example:
 example_user: "What is the total acreage of areas designated as easements?
@@ -102,18 +133,16 @@ sql_query:
     FROM mydata
     WHERE "easement" = 'True';
 ## Example:
-example_user: "Show me land where irrecoverable carbon is in the top 10%"
 sql_query:
-    WITH temp_tab AS (
-        SELECT PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY "irrecoverable_carbon") AS temp
         FROM mydata
     )
-    SELECT "id", "geom", "name", "acres", "irrecoverable_carbon"
     FROM mydata
-    WHERE "irrecoverable_carbon" > (SELECT temp FROM temp_tab);
 ## Example:
 example_user: "Show me protected lands in disadvantaged communities that have had prescribed fires in at least 30% of its area."
@@ -124,38 +153,4 @@ sql_query:
     AND "percent_rxburn_10yr" >= 0.3;
-# Detailed Explanation of the Columns in the California Dataset
-- "established": The time range which the land was acquired, either "2024" or "pre-2024".
-- "reGAP": The GAP status code; corresponds to the level of protection the area has. There are 4 gap codes and are defined as the following.
-    Status 1: Permanently protected to maintain a natural state, allowing natural disturbances or mimicking them through management.
-    Status 2: Permanently protected but may allow some uses or management practices that degrade natural communities or suppress natural disturbances.
-    Status 3: Permanently protected from major land cover conversion but allows some extractive uses (e.g., logging, mining) and protects federally listed species.
-    Status 4: No protection mandates; land may be converted to unnatural habitat types or its management intent is unknown.
-- "name": The name of a protected area. The user may use a shortened name and/or not capitalize it. For example, "redwoods" may refer to "Redwood National Park", or "klamath" refers to "Klamath National Forest". Another example, "san diego wildlife refuge" could refer to multiple areas, so you would use "WHERE LOWER("name") LIKE '%san diego%' AND LOWER("name") LIKE '%wildlife%' AND LOWER("name") LIKE '%refuge%';" in your SQL query, to ensure that it is case-insensitive and matches any record that includes our phrases, because we don't want to overlook a match.  If the name isn't capitalized, you MUST ensure the search is case-insensitive by converting "name" to lowercase.
-The names of the largest parks are {names}.
-- "access_type": Level of access to the land: "Unknown Access","Restricted Access","No Public Access" and "Open Access".
-- "manager": The name of land manager for the area. Also referred to as the agency name. These are the manager names: {managers}. Users might use acronyms or could omit "United States" in the agency name, make sure to use the name used in the table. Some examples: "BLM" or "Bureau of Land Management" refers to the "United States Bureau of Land Management" or "CDFW" is "California Department of Fish and Wildlife". Similar to the "name" field, you can search for managers using "LIKE" in the SQL query.
-- "manager_type": The jurisdiction of the land manager: "Federal","State","Non Profit","Special District","Unknown","County","City","Joint","Tribal","Private","HOA". If the user says "non-profit", do not use a hyphen in your query.
-- "easement": Boolean value; whether or not the land is an easement.
-- "acres": Land acreage; measures the size of the area.
-- "id": unique id for each area. This is necessary for displaying queried results on a map.
-- "type": Physical type of area, either "Land" or "Water".
-- "richness": Species richness; higher values indicate better biodiversity.
-- "rsr": Range-size rarity; higher values indicate better rarity metrics.
-- "svi": Social Vulnerability Index based on 4 themes: socioeconomic status, household characteristics, racial & ethnic minority status, and housing & transportation. Higher values indicate greater vulnerability.
-    - Themes:
-        - "svi_socioeconomic_status": Poverty, unemployment, housing cost burden, education, and health insurance.
-        - "svi_household_char": Age, disability, single-parent households, and language proficiency.
-        - "svi_racial_ethnic_minority": Race and ethnicity variables.
-        - "svi_housing_transit": Housing type, crowding, vehicles, and group quarters.
-- "percent_disadvantaged": Justice40-defined disadvantaged communities overburdened by climate, energy, health, housing, pollution, transportation, water, and workforce factors. Higher values indicate more disadvantage. Range is between 0 and 1.
-- "deforest_carbon": Carbon emissions due to deforestation.
-- "human_impact": A score representing the human footprint: cumulative anthropogenic impacts such as land cover change, population density, and infrastructure.
-- "percent_fire_10yr": The percentage of the area burned by fires from (2013-2022). Range is between 0 and 1.
-- "percent_rxburn_10yr": The percentage of the area affected by prescribed burns from (2013-2022). Range is between 0 and 1.
-Only use the following tables:
-{table_info}.
 Question: {input}

+You are an expert in SQL and an assistant for mapping and analyzing California land data, used for the California's 30x30 initiative (protecting 30% of land and coast waters by 2030). Given an input question, create a syntactically correct {dialect} query to run, and then provide an explanation of how you answered the input question. If the question doesn't necessitate a SQL query, only output an explanation.
 For example:
 {{
 # Important Details
+    - For map-related queries (e.g., "show me"), ALWAYS include "id," "geom", "name," and "acres" in the results, PLUS any other columns referenced in the query (e.g., in conditions, calculations, or subqueries). All columns used in the query MUST be returned in the results.  This output structure is MANDATORY for all map-related queries.
+    - If the user specifies "protected" land or areas, only return records where "status" is "30x30-conserved" and "other-conserved".
     - ONLY use LIMIT in your SQL queries if the user specifies a quantity (e.g., 'show me 5'). Otherwise, return all matching data without a limit.
     - Wrap each column name in double quotes (") to denote them as delimited identifiers.
+    - Wrap values that are strings in single quotes (') to distinguish them from column names.
+    - Pay attention to use only the column names you can see in the tables below. Your SQL queries MUST ONLY use these columns.
+    - ONLY write SQL queries using the records and columns that exist in the table. You are given the schema and all distinct values in this prompt.
+    - If the query mentions "biodiversity" without specifying a column, default to using "richness" (species richness). Explain this choice and that they can also request "rsr" (range-size rarity).
     - If the query mentions carbon without specifying a column, use "irrecoverable carbon". Explain this choice and list the other carbon-related columns they can ask for, along with their definitions.
+    - If the query asks about the manager, use the "manager" column. You MUST ALWAYS explain the difference between manager and manager_type in your response. Clarify that "manager" refers to the name of the managing entity (e.g., an agency), while "manager_type" specifies the type of jurisdiction.
+    - Users might use shortened labels in their queries. For example, "redwoods" may refer to "Redwood National Park", or "klamath" refers to "Klamath National Forest".
+    - Do NOT overlook a match. SQL queries should be case-insensitive and match any record that includes phrases from the user. For example, "san diego wildlife refuge" could refer to multiple areas, so you would use "WHERE LOWER("name") LIKE '%san diego%' AND LOWER("name") LIKE '%wildlife%' AND LOWER("name") LIKE '%refuge%';" in your SQL query.
+    -  Users might use acronyms or could omit "United States" in the agency name, make sure to use the name used in the table. Some examples: "BLM" or "Bureau of Land Management" refers to the "United States Bureau of Land Management" and "CDFW" is "California Department of Fish and Wildlife".
     - Users may not be familiar with this data, so your explanation should be short, clear, and easily understandable. You MUST state which column(s) you used to gather their query, along with definition(s) of the column(s). Do NOT explain SQL commands.
     - If the prompt is unrelated to the California dataset, provide examples of relevant queries that you can answer.
+    - If the user's query is unclear, DO NOT make assumptions. Instead, ask for clarification and provide examples of similar queries you can handle, using the columns or data available. You MUST ONLY deliver accurate results.
+    - Not every query will require SQL code, users may ask more information about values and columns in the table which you can answer based on the information in this prompt. For these cases, your "sql_query" field should be empty.
+# Column Descriptions
+- "established": The time range which the land was acquired, either "2024" or "pre-2024".
+- "gap_code": The GAP code corresponds to the level of biodiversity protection for an area; GAP 1 has the highest protections whereas GAP 4 has the weakest.  There are 4 gap codes and are defined as the following.
+    GAP 1: Permanently protected to maintain a natural state, allowing natural disturbances or mimicking them through management.
+    GAP 2: Permanently protected but may allow some uses or management practices that degrade natural communities or suppress natural disturbances.
+    GAP 3: Permanently protected from major land cover conversion but allows some extractive uses (e.g., logging, mining) and protects federally listed species.
+    GAP 4: No protection mandates; land may be converted to unnatural habitat types or its management intent is unknown.
+- "name": The name of the protected area. The names of the largest parks are {names}.
+- "access_type": Level of access to the land: "Unknown Access","Restricted Access","No Public Access" and "Open Access".
+- "manager": The land manager's name, also known as the agency name.These are the manager names: {managers}.
+- "manager_type": The jurisdiction of the land manager: "Federal","State","Non Profit","Special District","Unknown","County","City","Joint","Tribal","Private","HOA". If the user says "non-profit", do not use a hyphen in your query.
+- "easement": Boolean value; whether or not the land is an easement.
+- "acres": Land acreage; measures the size of the area.
+- "id": unique id for each area. This is necessary for displaying queried results on a map.
+- "type": Physical type of area, either "Land" or "Water".
+- "richness": Species richness; higher values indicate better biodiversity.
+- "rsr": Range-size rarity; higher values indicate better rarity metrics.
+- "svi": Social Vulnerability Index based on 4 themes: socioeconomic status, household characteristics, racial & ethnic minority status, and housing & transportation. Higher values indicate greater vulnerability.
+- "disadvantaged_communities": The percentage of overlap that the protected area has with a disadvantaged community. Justice40-defined disadvantaged communities overburdened by climate, energy, health, housing, pollution, transportation, water, and workforce factors. Higher values indicate more disadvantage. Range is between 0 and 1.
+- "fire": The percentage of the area burned by fires from (2013-2022). Areas can burn more than once, thus the percentage can be above 1
+- "rxburn": The percentage of the area affected by prescribed burns from (2013-2022). Areas can be burned more than once.
+- "status": The conservation status. GAP 1 and 2 lands have the highest biodiversity protections and count towards the 30x30 goal, thus are "30x30-conserved". GAP 3 and 4 lands are grouped into "other-conserved", as their biodiversity protections are lower. Areas that aren't protected--that is, they're not GAP 1, 2, 3, or 4--are designed "non-conserved".
+- "ecoregion": Ecoregions are areas with similar ecosystems and environmental resources. The ecoregions in this table are {ecoregions}.
+Only use the following table:
+{table_info}.
 # Example Questions and How to Approach Them
 ## Example:
 example_user: "Which gap code has been impacted the most by fire?"
 example_assistant: {{"sql_query":
+    SELECT "gap_code", SUM("fire") AS total_fire
     FROM mydata
+    GROUP BY "gap_code"
+    ORDER BY total_fire ASC
     LIMIT 1;
+"explanation":"I used the `fire` column, which shows the percentage of each area burned over the past 10 years (2013–2022), summing it for each GAP code to find the one with the highest total fire impact."
 }}
 ## Example:
 example_user: "Who manages the land with the worst biodiversity and highest SVI?"
 example_assistant: {{"sql_query":
+SELECT "manager", "richness", "svi"
     FROM mydata
     GROUP BY "manager"
     ORDER BY "richness" ASC, "svi" DESC
     LIMIT 1;
 "explanation": "I identified the land manager with the worst biodiversity and highest Social Vulnerability Index (SVI) by analyzing the columns: `richness`, which measures species richness, and `svi`, which represents social vulnerability based on factors like socioeconomic status, household characteristics, racial & ethnic minority status, and housing & transportation.
+I sorted the data by richness in ascending order (worst biodiversity first) and svi in descending order (highest vulnerability). The result provides the manager, which is the name of the entity managing the land. Note that the manager column refers to the specific agency or organization responsible for managing the land, while `manager_type` categorizes the type of jurisdiction (e.g., Federal, State, Non Profit)."
 }}
 ## Example:
 example_user: "Show me the biggest protected area"
 example_assistant: {{"sql_query":
     SELECT "id", "geom", "name", "acres", "manager", "manager_type", "acres"
     FROM mydata
+    WHERE "status" = '30x30-conserved'
     ORDER BY "acres" DESC
     LIMIT 1;
 "explanation": "I identified the biggest protected area by sorting the data in descending order based on the `acres` column, which represents the size of each area."
 ## Example:
 example_user: "Show me the 50 most biodiverse areas found in disadvantaged communities."
 example_assistant: {{"sql_query":
+    SELECT "id", "geom", "name", "acres", "richness", "disadvantaged_communities" FROM mydata
+    WHERE "disadvantaged_communities" > 0
     ORDER BY "richness" DESC
     LIMIT 50;
+"explanation": "I used the `richness` column to measure biodiversity and the `disadvantaged_communities` column to identify areas located in disadvantaged communities. The `disadvantaged_communities` value is derived from the Justice40 initiative, which identifies communities burdened by systemic inequities and vulnerabilities across multiple domains, including climate resilience, energy access, health disparities, housing affordability, pollution exposure, transportation infrastructure, water quality, and workforce opportunities.
+The results are sorted in descending order by biodiversity richness (highest biodiversity first), and only areas with a `disadvantaged_communities` value greater than 0 (indicating some portion of the area overlaps with a disadvantaged community) are included."
 }}
 ## Example:
 example_user: "Show me federally managed gap 3 lands that are in the top 5% of biodiversity richness and have experienced forest fire over at least 50% of their area"
 sql_query:
+    WITH temp AS (
+        SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "richness") AS richness_95_percentile
         FROM mydata
     )
+    SELECT "id", "geom", "name", "acres", "richness", "gap_code", "fire"
     FROM mydata
+    WHERE "gap_code" = 3
+        AND "fire" >= 0.5
         and "manager_type" = 'Federal'
+        AND "richness" > (SELECT richness_95_percentile FROM temp);
 ## Example:
 example_user: "What is the total acreage of areas designated as easements?
     FROM mydata
     WHERE "easement" = 'True';
 ## Example:
+example_user: "Which ecoregions are in the top 10% of range-size rarity?"
 sql_query:
+    WITH temp AS (
+        SELECT PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY "rsr") AS rsr_90_percentile
         FROM mydata
     )
+    SELECT "ecoregion"
     FROM mydata
+    WHERE "rsr" > (SELECT rsr_90_percentile FROM temp);
 ## Example:
 example_user: "Show me protected lands in disadvantaged communities that have had prescribed fires in at least 30% of its area."
     AND "percent_rxburn_10yr" >= 0.3;
 Question: {input}

app/utils.py CHANGED Viewed

@@ -26,16 +26,10 @@ def get_summary(ca, combined_filter, column, colors=None): #summary stats, based
                        mean_rsr = (_.rsr * _.acres).sum() / _.acres.sum(),
                        mean_irrecoverable_carbon = (_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
                        mean_manageable_carbon = (_.manageable_carbon * _.acres).sum() / _.acres.sum(),
-                       mean_percent_fire_10yr = (_.percent_fire_10yr *_.acres).sum()/_.acres.sum(),
-                       mean_percent_rxburn_10yr = (_.percent_rxburn_10yr *_.acres).sum()/_.acres.sum(),
-                       mean_percent_disadvantaged =  (_.percent_disadvantaged * _.acres).sum() / _.acres.sum(),
                        mean_svi =  (_.svi * _.acres).sum() / _.acres.sum(),
-                       mean_svi_socioeconomic_status =  (_.svi_socioeconomic_status * _.acres).sum() / _.acres.sum(),
-                       mean_svi_household_char =  (_.svi_household_char * _.acres).sum() / _.acres.sum(),
-                       mean_svi_racial_ethnic_minority =  (_.svi_racial_ethnic_minority * _.acres).sum() / _.acres.sum(),
-                       mean_svi_housing_transit =  (_.svi_housing_transit * _.acres).sum() / _.acres.sum(),
-                       mean_carbon_lost = (_.deforest_carbon * _.acres).sum() / _.acres.sum(),
-                       mean_human_impact =  (_.human_impact * _.acres).sum() / _.acres.sum(),
                       )
             .mutate(percent_protected=_.percent_protected.round(1))
          )
@@ -58,6 +52,10 @@ def summary_table(ca, column, colors, filter_cols, filter_vals,colorby_vals): #
         filter_cols.append(column)
         filters.append(getattr(_, column).isin(colorby_vals[column]))
     combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
     df = get_summary(ca, combined_filter, [column], colors) # df used for charts
     df_tab = get_summary(ca, combined_filter, filter_cols, colors = None) #df used for printed table
     return df, df_tab
@@ -69,7 +67,7 @@ def area_plot(df, column): #percent protected pie chart
         alt.Theta("percent_protected:Q").stack(True),
     )
     pie = ( base
-           .mark_arc(innerRadius= 40, outerRadius=100)
            .encode(alt.Color("color:N").scale(None).legend(None),
                    tooltip=['percent_protected', column])
     )
@@ -82,11 +80,13 @@ def area_plot(df, column): #percent protected pie chart
 def bar_chart(df, x, y, title): #display summary stats for color_by column
     #axis label angles / chart size
-    if x == "manager_type": #labels are too long, making vertical
         angle = 270
         height = 373
     else: #other labels are horizontal
         angle = 0
         height = 310
@@ -101,28 +101,26 @@ def bar_chart(df, x, y, title): #display summary stats for color_by column
     else:
         sort = 'x'
     x_title = next(key for key, value in select_column.items() if value == x)
-    chart = alt.Chart(df).mark_bar().transform_calculate(
-        access_label=f"replace(datum.{x}, ' Access', '')"  #omit access from access_type labels so it fits in frame
         ).encode(
-        x=alt.X("access_label:N",
-                axis=alt.Axis(labelAngle=angle, title=x_title),
                         sort=sort),
         y=alt.Y(y, axis=alt.Axis()),
-        color=alt.Color('color').scale(None)
-        ).properties(width="container", height=height, title = title
-        )
-    # sizing for poster
-    # ).configure_title(
-    # fontSize=40
-    # ).configure_axis(
-    # labelFontSize=24,
-    # titleFontSize=34
-    # )
     return chart
 def getButtons(style_options, style_choice, default_gap=None): #finding the buttons selected to use as filters
     column = style_options[style_choice]['property']
     opts = [style[0] for style in style_options[style_choice]['stops']]
@@ -137,7 +135,6 @@ def getButtons(style_options, style_choice, default_gap=None): #finding the butt
     return d
 def getColorVals(style_options, style_choice):
     #df_tab only includes filters selected, we need to manually add "color_by" column (if it's not already a filter).
     column = style_options[style_choice]['property']
@@ -147,73 +144,6 @@ def getColorVals(style_options, style_choice):
     return d
-def fire_style(layer):
-    return {"version": 8,
-    "sources": {
-        "source1": {
-            "type": "vector",
-            "url": "pmtiles://" + url_calfire,
-            "attribution": "CAL FIRE"
-        }
-    },
-    "layers": [
-        {
-            "id": "fire",
-            "source": "source1",
-            "source-layer": layer,
-            "type": "fill",
-            "paint": {
-                "fill-color": "#D22B2B",
-            }
-        }
-    ]
-}
-def rx_style(layer):
-    return{
-    "version": 8,
-    "sources": {
-        "source2": {
-            "type": "vector",
-            "url": "pmtiles://" + url_rxburn,
-            "attribution": "CAL FIRE"
-        }
-    },
-    "layers": [
-        {
-            "id": "fire",
-            "source": "source2",
-            "source-layer": layer,
-            # "filter": [">=", ["get", "YEAR_"], year],
-            "type": "fill",
-            "paint": {
-                "fill-color": "#702963",
-            }
-        }
-    ]
-}
-def get_sv_style(column):
-    return {
-        "layers": [
-            {
-                "id": "SVI",
-                "source": column, #need different "source" for multiple pmtiles layers w/ same file
-                "source-layer": "SVI2020_US_county",
-                "filter": ["match", ["get", "STATE"], "California", True, False],
-                "type": "fill",
-                "paint": {
-                    "fill-color": [
-                        "interpolate", ["linear"], ["get", column],
-                        0, white,
-                        1, svi_color
-                    ]
-                }
-            }
-        ]
-    }
 def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
     filters = []
     for col, val in zip(filter_cols, filter_vals):
@@ -231,7 +161,7 @@ def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
             {
                 "id": "ca30x30",
                 "source": "ca",
-                "source-layer": "layer",
                 "type": "fill",
                 "filter": combined_filters,
                 "paint": {
@@ -242,6 +172,7 @@ def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
         ]
     }
     return style
 def get_pmtiles_style_llm(paint, ids):
     combined_filters = ["all", ["match", ["get", "id"], ids, True, False]]
@@ -257,13 +188,12 @@ def get_pmtiles_style_llm(paint, ids):
             {
                 "id": "ca30x30",
                 "source": "ca",
-                "source-layer": "layer",
                 "type": "fill",
                 "filter": combined_filters,
                 "paint": {
                     "fill-color": paint,
                     "fill-opacity": 1,
-                    # "fill-extrusion-height": 1000
                 }
             }
         ]

                        mean_rsr = (_.rsr * _.acres).sum() / _.acres.sum(),
                        mean_irrecoverable_carbon = (_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
                        mean_manageable_carbon = (_.manageable_carbon * _.acres).sum() / _.acres.sum(),
+                       mean_fire = (_.fire *_.acres).sum()/_.acres.sum(),
+                       mean_rxburn = (_.rxburn *_.acres).sum()/_.acres.sum(),
+                       mean_disadvantaged =  (_.disadvantaged_communities * _.acres).sum() / _.acres.sum(),
                        mean_svi =  (_.svi * _.acres).sum() / _.acres.sum(),
                       )
             .mutate(percent_protected=_.percent_protected.round(1))
          )
         filter_cols.append(column)
         filters.append(getattr(_, column).isin(colorby_vals[column]))
     combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
+    if column == "status": #need to include non-conserved in summary stats
+        combined_filter = (combined_filter) | (_.status.isin(['30x30-conserved','other-conserved','non-conserved']))
     df = get_summary(ca, combined_filter, [column], colors) # df used for charts
     df_tab = get_summary(ca, combined_filter, filter_cols, colors = None) #df used for printed table
     return df, df_tab
         alt.Theta("percent_protected:Q").stack(True),
     )
     pie = ( base
+           .mark_arc(innerRadius= 40, outerRadius=100, stroke = 'black', strokeWidth = .5)
            .encode(alt.Color("color:N").scale(None).legend(None),
                    tooltip=['percent_protected', column])
     )
 def bar_chart(df, x, y, title): #display summary stats for color_by column
     #axis label angles / chart size
+    if x in ["manager_type",'status']: #labels are too long, making vertical
         angle = 270
         height = 373
+    elif x == 'ecoregion': # make labels vertical and figure taller
+        angle = 270
+        height = 430
     else: #other labels are horizontal
         angle = 0
         height = 310
     else:
         sort = 'x'
+    # modify label names in bar chart to fit in frame
+    label_transform = f"datum.{x}"  # default; no change
+    if x == "access_type":
+        label_transform = f"replace(datum.{x}, ' Access', '')"  #omit 'access' from access_type
+    elif x == "ecoregion":
+        label_transform = f"replace(datum.{x}, 'California', 'CA')"  # Replace "California" with "CA"
     x_title = next(key for key, value in select_column.items() if value == x)
+    chart = alt.Chart(df).mark_bar(stroke = 'black', strokeWidth = .5).transform_calculate(
+        label=label_transform
         ).encode(
+        x=alt.X("label:N",
+                axis=alt.Axis(labelAngle=angle, title=x_title, labelLimit = 200),
                         sort=sort),
         y=alt.Y(y, axis=alt.Axis()),
+        color=alt.Color('color').scale(None),
+        ).properties(width="container", height=height, title = title)
     return chart
 def getButtons(style_options, style_choice, default_gap=None): #finding the buttons selected to use as filters
     column = style_options[style_choice]['property']
     opts = [style[0] for style in style_options[style_choice]['stops']]
     return d
 def getColorVals(style_options, style_choice):
     #df_tab only includes filters selected, we need to manually add "color_by" column (if it's not already a filter).
     column = style_options[style_choice]['property']
     return d
 def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
     filters = []
     for col, val in zip(filter_cols, filter_vals):
             {
                 "id": "ca30x30",
                 "source": "ca",
+                "source-layer": "ca30x30",
                 "type": "fill",
                 "filter": combined_filters,
                 "paint": {
         ]
     }
     return style
 def get_pmtiles_style_llm(paint, ids):
     combined_filters = ["all", ["match", ["get", "id"], ids, True, False]]
             {
                 "id": "ca30x30",
                 "source": "ca",
+                "source-layer": "ca30x30",
                 "type": "fill",
                 "filter": combined_filters,
                 "paint": {
                     "fill-color": paint,
                     "fill-opacity": 1,
                 }
             }
         ]

app/variables.py CHANGED Viewed

@@ -1,22 +1,19 @@
-# # urls for main layer
-ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/bdce1e6767e799abd0d828ebc7208537af6246df/ca-30x30.pmtiles"
-ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/dd69c8cbaee47ea2b64c19963177edb6635be5d9/ca-30x30.parquet"
 ca_area_acres = 1.014e8 #acres
 style_choice = "GAP Status Code"
 # urls for additional data layers
 url_sr = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/species-richness-ca/{z}/{x}/{y}.png"
 url_rsr = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/range-size-rarity/{z}/{x}/{y}.png"
 url_irr_carbon = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/ca_irrecoverable_c_2018_cog.tif"
 url_man_carbon = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/ca_manageable_c_2018_cog.tif"
-url_svi = "https://data.source.coop/cboettig/social-vulnerability/svi2020_us_county.pmtiles"
 url_justice40 = "https://data.source.coop/cboettig/justice40/disadvantaged-communities.pmtiles"
-url_loss_carbon = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/deforest-carbon-ca/{z}/{x}/{y}.png"
-url_hi = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/ca_human_impact_cog.tif"
-url_calfire = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/cal_fire_2022.pmtiles"
-url_rxburn = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/cal_rxburn_2022.pmtiles"
 # colors for plotting
 private_access_color = "#DE881E" # orange
@@ -41,6 +38,7 @@ white =  "#FFFFFF"
 # gap codes 3 and 4 are off by default.
 default_gap = {
     3: False,
     4: False,
 }
@@ -60,7 +58,7 @@ manager = {
         ['Joint', joint_color],
         ['Tribal', tribal_color],
         ['Private', private_color],
-        ['HOA', hoa_color]
     ]
 }
@@ -69,7 +67,7 @@ easement = {
     'type': 'categorical',
     'stops': [
         ['True', private_access_color],
-        ['False', public_access_color]
     ]
 }
@@ -78,7 +76,7 @@ year = {
     'type': 'categorical',
     'stops': [
         ['pre-2024', year2023_color],
-        ['2024', year2024_color]
     ]
 }
@@ -89,12 +87,12 @@ access = {
         ['Open Access', public_access_color],
         ['No Public Access', private_access_color],
         ['Unknown Access', "#bbbbbb"],
-        ['Restricted Access', tribal_color]
     ]
 }
 gap = {
-    'property': 'reGAP',
     'type': 'categorical',
     'stops': [
         [1, "#26633d"],
@@ -104,9 +102,50 @@ gap = {
     ]
 }
 style_options = {
     "Year": year,
-    "GAP Status Code": gap,
     "Manager Type": manager,
     "Easement": easement,
     "Access Type": access,
@@ -143,12 +182,82 @@ justice40_style = {
         }
     ]
 }
 select_column = {
     "Year": "established",
-    "GAP Status Code": "reGAP",
     "Manager Type": "manager_type",
     "Easement": "easement",
-    "Access Type": "access_type",
 }

+# urls for main layer
+ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/8d5d938c04d3206e6bfb04055b5e779c4c28222f/ca-30x30.parquet"
+ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/c58913a279d13c414722c4299b0e0867e923946a/ca-30x30.pmtiles"
 ca_area_acres = 1.014e8 #acres
 style_choice = "GAP Status Code"
 # urls for additional data layers
 url_sr = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/species-richness-ca/{z}/{x}/{y}.png"
 url_rsr = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/range-size-rarity/{z}/{x}/{y}.png"
 url_irr_carbon = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/ca_irrecoverable_c_2018_cog.tif"
 url_man_carbon = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/main/ca_manageable_c_2018_cog.tif"
 url_justice40 = "https://data.source.coop/cboettig/justice40/disadvantaged-communities.pmtiles"
+url_calfire = 'https://minio.carlboettiger.info/public-fire/calfire-2023.pmtiles'
+url_rxburn = 'https://minio.carlboettiger.info/public-fire/calfire-rxburn-2023.pmtiles'
+url_svi = 'https://minio.carlboettiger.info/public-data/social-vulnerability/2022/SVI2022_US_tract.pmtiles'
 # colors for plotting
 private_access_color = "#DE881E" # orange
 # gap codes 3 and 4 are off by default.
 default_gap = {
+    0: False,
     3: False,
     4: False,
 }
         ['Joint', joint_color],
         ['Tribal', tribal_color],
         ['Private', private_color],
+        ['HOA', hoa_color],
     ]
 }
     'type': 'categorical',
     'stops': [
         ['True', private_access_color],
+        ['False', public_access_color],
     ]
 }
     'type': 'categorical',
     'stops': [
         ['pre-2024', year2023_color],
+        ['2024', year2024_color],
     ]
 }
         ['Open Access', public_access_color],
         ['No Public Access', private_access_color],
         ['Unknown Access', "#bbbbbb"],
+        ['Restricted Access', tribal_color],
     ]
 }
 gap = {
+    'property': 'gap_code',
     'type': 'categorical',
     'stops': [
         [1, "#26633d"],
     ]
 }
+status = {
+    'property': 'status',
+    'type': 'categorical',
+    'stops': [
+        ['30x30-conserved', "#26633d"],
+        ['other-conserved', "#879647"],
+        ['non-conserved', white]
+    ]
+}
+ecoregion = {
+    'property': 'ecoregion',
+    'type': 'categorical',
+    'stops': [
+        ['Sierra Nevada Foothills', "#1f77b4"],
+        ['Southern Cascades', "#ff7f0e"],
+        ['Southeastern Great Basin', "#2ca02c"],
+        ['Southern California Mountains and Valleys', "#d62728"],
+        ['Sonoran Desert', "#9467bd"],
+        ['Northwestern Basin and Range', "#8c564b"],
+        ['Colorado Desert', "#e377c2"],
+        ['Central Valley Coast Ranges', "#7f7f7f"],
+        ['Great Valley (South)', "#bcbd22"],
+        ['Sierra Nevada', "#17becf"],
+        ['Northern California Coast Ranges', "#aec7e8"],
+        ['Northern California Interior Coast Ranges', "#ffbb78"],
+        ['Mojave Desert', "#98df8a"],
+        ['Mono', "#ff9896"],
+        ['Southern California Coast', "#c5b0d5"],
+        ['Modoc Plateau', "#c49c94"],
+        ['Klamath Mountains', "#f7b6d2"],
+        ['Northern California Coast', "#c7c7c7"],
+        ['Great Valley (North)', "#dbdb8d"],
+        ['Central California Coast', "#9edae5"],
+    ]
+}
 style_options = {
     "Year": year,
+    "GAP Code": gap,
+    "30x30 Status": status,
+    "Ecoregion": ecoregion,
     "Manager Type": manager,
     "Easement": easement,
     "Access Type": access,
         }
     ]
 }
+fire_style = {"version": 8,
+    "sources": {
+        "source1": {
+            "type": "vector",
+            "url": "pmtiles://" + url_calfire,
+            "attribution": "CAL FIRE"
+        }
+    },
+    "layers": [
+        {
+            "id": "fire",
+            "source": "source1",
+            "source-layer": 'calfire2023',
+            "filter": [">=", ["get", "YEAR_"], 2013],
+            "type": "fill",
+            "paint": {
+                "fill-color": "#D22B2B",
+            }
+        }
+    ]
+}
+rx_style = {
+    "version": 8,
+    "sources": {
+        "source2": {
+            "type": "vector",
+            "url": "pmtiles://" + url_rxburn,
+            "attribution": "CAL FIRE"
+        }
+    },
+    "layers": [
+        {
+            "id": "rxburn",
+            "source": "source2",
+            "source-layer": 'calfirerxburn2023',
+            "filter": [">=", ["get", "YEAR_"], 2013],
+            "type": "fill",
+            "paint": {
+                "fill-color": "#702963",
+            }
+        }
+    ]
+}
+svi_style = {
+        "layers": [
+            {
+                "id": "svi",
+                "source": "svi",
+                "source-layer": "svi",
+                "filter": ["match", ["get", "ST_ABBR"], "CA", True, False],
+                "type": "fill",
+                "paint": {
+                    "fill-color": [
+                        "interpolate", ["linear"], ["get", "RPL_THEMES"],
+                        0, white,
+                        1, svi_color
+                    ]
+                }
+            }
+        ]
+    }
 select_column = {
     "Year": "established",
+    "GAP Code": "gap_code",
+    "30x30 Status":  "status",
+    "Ecoregion":  "ecoregion",
     "Manager Type": "manager_type",
     "Easement": "easement",
+    "Access Type": "access_type"
 }

preprocess/preprocess.ipynb CHANGED Viewed

@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
    "metadata": {
     "editable": true,
@@ -452,6 +452,10 @@
     "    gdf_stats = gdf_stats.reset_index() \n",
     "\n",
     "gdf_stats = gdf_stats.rename(columns ={'ca_id':'id'}) #reverting back to \"id\" col name, since we are finished with exact_extract() \n",
     "gdf_stats.to_parquet(ca_parquet) # save results "
    ]
   },
@@ -476,20 +480,19 @@
     "hf_upload('ca-30x30.parquet', ca_parquet)\n",
     "s3_cp(ca_parquet, \"s3://public-ca30x30/ca-30x30.parquet\", \"minio\")\n",
     "\n",
-    "#to use PMTiles, need to convert to 4326 and geojson\n",
-    "ca_4326 = (con\n",
     "            .read_parquet(ca_parquet)\n",
-    "            .mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")) \n",
     "            .filter(_.status != 'non-conserved') #omitting the non-conserved to only for pmtiles  \n",
     "            )\n",
     "\n",
     "#can't go directly from parquet -> pmtiles, need to go parquet -> geojson -> pmtiles \n",
-    "ca_4326.execute().set_crs(\"epsg:4326\").to_file(path + 'ca-30x30.geojson') \n",
-    "pmtiles = to_pmtiles(path+ 'ca-30x30.geojson', ca_pmtiles)\n",
     "\n",
     "# upload pmtiles  to minio and HF\n",
     "hf_upload('ca-30x30.pmtiles', ca_pmtiles)\n",
-    "s3_cp(ca_pmtiles, \"s3://public-ca30x30/ca-30x30.pmtiles\", \"minio\")\n"
    ]
   }
  ],

   },
   {
    "cell_type": "code",
+   "execution_count": 1,
    "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
    "metadata": {
     "editable": true,
     "    gdf_stats = gdf_stats.reset_index() \n",
     "\n",
     "gdf_stats = gdf_stats.rename(columns ={'ca_id':'id'}) #reverting back to \"id\" col name, since we are finished with exact_extract() \n",
+    "\n",
+    "\n",
+    "# reproject to epsg:4326 since that's what pmtiles requires and we want to match that \n",
+    "gdf_stats = gdf_stats.to_crs(\"epsg:4326\")\n",
     "gdf_stats.to_parquet(ca_parquet) # save results "
    ]
   },
     "hf_upload('ca-30x30.parquet', ca_parquet)\n",
     "s3_cp(ca_parquet, \"s3://public-ca30x30/ca-30x30.parquet\", \"minio\")\n",
     "\n",
+    "#to use PMTiles, need to convert to geojson\n",
+    "ca_geojson = (con\n",
     "            .read_parquet(ca_parquet)\n",
     "            .filter(_.status != 'non-conserved') #omitting the non-conserved to only for pmtiles  \n",
     "            )\n",
     "\n",
     "#can't go directly from parquet -> pmtiles, need to go parquet -> geojson -> pmtiles \n",
+    "ca_geojson.execute().to_file(path + 'ca-30x30.geojson') \n",
+    "pmtiles = to_pmtiles(path+ 'ca-30x30.geojson', ca_pmtiles, options = ['--extend-zooms-if-still-dropping'])\n",
     "\n",
     "# upload pmtiles  to minio and HF\n",
     "hf_upload('ca-30x30.pmtiles', ca_pmtiles)\n",
+    "s3_cp(ca_pmtiles, \"s3://public-ca30x30/ca-30x30.pmtiles\", \"minio\")"
    ]
   }
  ],