Spaces:

boettiger-lab
/

ca-30x30

Running

cassiebuhler commited on Feb 22

Commit

593d846

1 Parent(s): 56784d5

wip - charts

added charts grouped by status, fixed non-conserved data (needed a unique id), fixed chatbot for fire risk.

Need to fix: 1) SE Great Basin is missing for non-conserved areas, 2) 30x30 status chart doesn't dynamically change and 3) implement ecoregion colors into labels for 30x30 status charts.

Files changed (5) hide show

app/app.py +25 -118
app/system_prompt.txt +9 -1
app/utils.py +273 -42
app/variables.py +8 -6
preprocess/preprocess.ipynb +8 -23

app/app.py CHANGED Viewed

@@ -26,10 +26,9 @@ if "mydata" not in set(current_tables):
     tbl = con.read_parquet(ca_parquet)
     con.create_table("mydata", tbl)
 ca = con.table("mydata")
 for key in [
     'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
     'fire', 'rxburn', 'disadvantaged_communities',
@@ -37,6 +36,13 @@ for key in [
     if key not in st.session_state:
         st.session_state[key] = False
 st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
 #customizing style with CSS
@@ -128,8 +134,6 @@ m = leafmap.Map(style="positron")
 #############
 ##### Chatbot stuff
@@ -160,98 +164,22 @@ prompt = ChatPromptTemplate.from_messages([
 structured_llm = llm.with_structured_output(SQLResponse)
 few_shot_structured_llm = prompt | structured_llm
-# @st.cache_data(ttl=600)  # Cache expires every 10 minutes
-def run_sql(query,color_choice):
-    """
-    Filter data based on an LLM-generated SQL query and return matching IDs.
-    Args:
-        query (str): The natural language query to filter the data.
-        color_choice (str): The column used for plotting.
-    """
-    output = few_shot_structured_llm.invoke(query)
-    sql_query = output.sql_query
-    explanation =output.explanation
-    if not sql_query: # if the chatbot can't generate a SQL query.
-        st.success(explanation)
-        return pd.DataFrame({'id' : []})
-    result = ca.sql(sql_query).execute()
-    if result.empty :
-        explanation = "This query did not return any results. Please try again with a different query."
-        st.warning(explanation, icon="⚠️")
-        st.caption("SQL Query:")
-        st.code(sql_query,language = "sql")
-        if 'geom' in result.columns:
-            return result.drop('geom',axis = 1)
-        else:
-            return result
-    elif ("id" and "geom" in result.columns):
-        style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
-        legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
-        position = 'bottom-left'
-        fontsize = 15
-        bg_color = 'white'
-        # shorten legend for ecoregions
-        if color_choice == "Ecoregion":
-            legend_d = {key.replace("Northern California", "NorCal"): value for key, value in legend_d.items()}
-            legend_d = {key.replace("Southern California", "SoCal"): value for key, value in legend_d.items()}
-            legend_d = {key.replace("Southeastern", "SE."): value for key, value in legend_d.items()}
-            legend_d = {key.replace("and", "&"): value for key, value in legend_d.items()}
-            legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
-            legend_d = {key.replace("Northwestern", "NW."): value for key, value in legend_d.items()}
-            bg_color = 'rgba(255, 255, 255, 0.6)'
-            fontsize = 12
-        m.add_legend(legend_dict = legend_d, position = position, bg_color = bg_color, fontsize = fontsize)
-        m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
-        m.fit_bounds(result.total_bounds.tolist())
-        result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
-    else:
-        st.write(result)  # if we aren't mapping, just print out the data
-    with st.popover("Explanation"):
-        st.write(explanation)
-        st.caption("SQL Query:")
-        st.code(sql_query,language = "sql")
-    return result
-def summary_table_sql(ca, column, colors, ids): # get df for charts + df_tab for printed table
-    filters = [_.id.isin(ids)]
-    combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
-    df = get_summary(ca, combined_filter, [column], colors) # df used for charts
-    return df
 chatbot_toggles = {key: False for key in [
     'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
     'fire', 'rxburn', 'disadvantaged_communities',
     'svi',
 ]}
 #############
 filters = {}
 with st.sidebar:
-    color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
     colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
     alpha = 0.8
     st.divider()
 ##### Chatbot
 with st.container():
@@ -271,8 +199,8 @@ with st.container():
         '''
         Exploratory data queries:
         - What is a GAP code?
         - What is the total acreage of areas designated as easements?
-        - Which GAP code has been impacted the most by fire?
         - Who manages the land with the highest amount of irrecoverable carbon and highest social vulnerability index?
         '''
@@ -332,7 +260,6 @@ with st.sidebar:
     # People Section
     with st.expander("👤 People"):
         a_people = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
         show_justice40 = st.toggle("Disadvantaged Communities (Justice40)", key = "disadvantaged_communities", value=chatbot_toggles['disadvantaged_communities'])
@@ -361,15 +288,11 @@ with st.sidebar:
     st.divider()
     st.markdown('<p class = "medium-font-sidebar"> Filters:</p>', help = "Apply filters to adjust what data is shown on the map.", unsafe_allow_html= True)
-    for col,val in style_options.items():
-        for name in val['stops'][0]:
-            key = val['property']+str(name)
-            st.session_state[key] = default_gap.get(name, True)
     for label in style_options: # get selected filters (based on the buttons selected)
         with st.expander(label):
             if label in ["GAP Code","30x30 Status"]: # gap code 1 and 2 are on by default
-                opts = getButtons(style_options, label, default_gap)
             else: # other buttons are not on by default.
                 opts = getButtons(style_options, label)
             filters.update(opts)
@@ -383,42 +306,25 @@ with st.sidebar:
             filter_vals = []
     st.divider()
     st.markdown("""
     <p class="medium-font-sidebar">
     <svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' class='bi bi-github ' style='height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;margin-right:4px;'  aria-hidden='true' role='img'><path d='M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z'></path></svg>Source Code: </p> <a href='https://github.com/boettiger-lab/ca-30x30' target='_blank'>https://github.com/boettiger-lab/ca-30x30</a>
-    """, unsafe_allow_html=True)# adding github logo
 # Display CA 30x30 Data
 if 'out' not in locals():
     style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
-    legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
-    position = 'bottom-left'
-    fontsize = 15
-    bg_color = 'white'
-    # shorten legend for ecoregions
-    if color_choice == "Ecoregion":
-        legend_d = {key.replace("Northern California", "NorCal"): value for key, value in legend_d.items()}
-        legend_d = {key.replace("Southern California", "SoCal"): value for key, value in legend_d.items()}
-        legend_d = {key.replace("Southeastern", "SE."): value for key, value in legend_d.items()}
-        legend_d = {key.replace("and", "&"): value for key, value in legend_d.items()}
-        legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
-        legend_d = {key.replace("Northwestern", "NW."): value for key, value in legend_d.items()}
-        bg_color = 'rgba(255, 255, 255, 0.6)'
-        fontsize = 12
-    m.add_legend(legend_dict = legend_d, position = position, bg_color = bg_color, fontsize = fontsize)
     m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds=True)
 column = select_column[color_choice]
 select_colors = {
-    "Year": year["stops"],
-    "GAP Code": gap["stops"],
     "30x30 Status": status["stops"],
     "Ecoregion": ecoregion["stops"],
     "Manager Type": manager["stops"],
     "Easement": easement["stops"],
@@ -435,12 +341,12 @@ colors = (
 # get summary tables used for charts + printed table
 # df - charts; df_tab - printed table (omits colors)
 if 'out' not in locals():
-    df,df_tab,df_percent = summary_table(ca, column, colors, filter_cols, filter_vals, colorby_vals)
-    total_percent = df_percent.percent_protected.sum().round(2)
 else:
     df = summary_table_sql(ca, column, colors, ids)
-    total_percent = df.percent_protected.sum().round(2)
 # charts displayed based on color_by variable
@@ -453,7 +359,6 @@ rx_10_chart = bar_chart(df, column, 'mean_rxburn',"Prescribed Burns (2013-2023)"
 justice40_chart = bar_chart(df, column, 'mean_disadvantaged', "Disadvantaged Communities (2021)")
 svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index (2022)")
 main = st.container()
 with main:
@@ -471,7 +376,11 @@ with main:
             st.markdown(f"{total_percent}% CA Covered", help = "Updates based on displayed data")
             st.altair_chart(area_plot(df, column), use_container_width=True)
             if show_richness:
                 st.altair_chart(richness_chart, use_container_width=True)
@@ -496,8 +405,6 @@ with main:
             if show_rxburn:
                 st.altair_chart(rx_10_chart, use_container_width=True)
 st.caption("***The label 'established' is inferred from the California Protected Areas Database, which may introduce artifacts. For details on our methodology, please refer to our code: https://github.com/boettiger-lab/ca-30x30.")

     tbl = con.read_parquet(ca_parquet)
     con.create_table("mydata", tbl)
 ca = con.table("mydata")
+# session state for syncing app
 for key in [
     'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
     'fire', 'rxburn', 'disadvantaged_communities',
     if key not in st.session_state:
         st.session_state[key] = False
+for col,val in style_options.items():
+    for name in val['stops']:
+        key = val['property']+str(name[0])
+        if key not in st.session_state:
+            st.session_state[key] = default_boxes.get(name[0], True)
 st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
 #customizing style with CSS
 #############
 ##### Chatbot stuff
 structured_llm = llm.with_structured_output(SQLResponse)
 few_shot_structured_llm = prompt | structured_llm
 chatbot_toggles = {key: False for key in [
     'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
     'fire', 'rxburn', 'disadvantaged_communities',
     'svi',
 ]}
 #############
 filters = {}
 with st.sidebar:
+    color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
     colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
     alpha = 0.8
     st.divider()
 ##### Chatbot
 with st.container():
         '''
         Exploratory data queries:
         - What is a GAP code?
+        - What percentage of 30x30 conserved land has been impacted by wildfire?
         - What is the total acreage of areas designated as easements?
         - Who manages the land with the highest amount of irrecoverable carbon and highest social vulnerability index?
         '''
     # People Section
     with st.expander("👤 People"):
         a_people = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
         show_justice40 = st.toggle("Disadvantaged Communities (Justice40)", key = "disadvantaged_communities", value=chatbot_toggles['disadvantaged_communities'])
     st.divider()
     st.markdown('<p class = "medium-font-sidebar"> Filters:</p>', help = "Apply filters to adjust what data is shown on the map.", unsafe_allow_html= True)
     for label in style_options: # get selected filters (based on the buttons selected)
         with st.expander(label):
             if label in ["GAP Code","30x30 Status"]: # gap code 1 and 2 are on by default
+                opts = getButtons(style_options, label, default_boxes)
             else: # other buttons are not on by default.
                 opts = getButtons(style_options, label)
             filters.update(opts)
             filter_vals = []
     st.divider()
+    # adding github logo
     st.markdown("""
     <p class="medium-font-sidebar">
     <svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' class='bi bi-github ' style='height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;margin-right:4px;'  aria-hidden='true' role='img'><path d='M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z'></path></svg>Source Code: </p> <a href='https://github.com/boettiger-lab/ca-30x30' target='_blank'>https://github.com/boettiger-lab/ca-30x30</a>
+    """, unsafe_allow_html=True)
 # Display CA 30x30 Data
 if 'out' not in locals():
     style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
+    legend, position, bg_color, fontsize = getLegend(style_options, color_choice)
+    m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
     m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds=True)
 column = select_column[color_choice]
 select_colors = {
     "30x30 Status": status["stops"],
+    "GAP Code": gap["stops"],
+    "Year": year["stops"],
     "Ecoregion": ecoregion["stops"],
     "Manager Type": manager["stops"],
     "Easement": easement["stops"],
 # get summary tables used for charts + printed table
 # df - charts; df_tab - printed table (omits colors)
 if 'out' not in locals():
+    df, df_tab, df_percent, df_bar_30x30 = summary_table(ca, column, select_colors, color_choice, filter_cols, filter_vals,colorby_vals)
+    total_percent = df_percent.percent_CA.sum().round(2)
 else:
     df = summary_table_sql(ca, column, colors, ids)
+    total_percent = df.percent_CA.sum().round(2)
 # charts displayed based on color_by variable
 justice40_chart = bar_chart(df, column, 'mean_disadvantaged', "Disadvantaged Communities (2021)")
 svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index (2022)")
 main = st.container()
 with main:
             st.markdown(f"{total_percent}% CA Covered", help = "Updates based on displayed data")
             st.altair_chart(area_plot(df, column), use_container_width=True)
+            if 'df_bar_30x30' in locals(): #if we use chatbot, we won't have these graphs.
+                if column not in ["status", "gap_code"]:
+                    st.altair_chart(stacked_bar(df_bar_30x30, column,'percent_group','status', color_choice + ' by 30x30 Status'), use_container_width=True)
             if show_richness:
                 st.altair_chart(richness_chart, use_container_width=True)
             if show_rxburn:
                 st.altair_chart(rx_10_chart, use_container_width=True)
 st.caption("***The label 'established' is inferred from the California Protected Areas Database, which may introduce artifacts. For details on our methodology, please refer to our code: https://github.com/boettiger-lab/ca-30x30.")

app/system_prompt.txt CHANGED Viewed

@@ -147,10 +147,18 @@ sql_query:
 ## Example:
 example_user: "Show me protected lands in disadvantaged communities that have had prescribed fires in at least 30% of its area."
 sql_query:
-    SELECT "id", "geom", "name", "acres", "percent_rxburn_10yr", "percent_disadvantaged"
     FROM mydata
     WHERE "percent_disadvantaged" > 0
     AND "percent_rxburn_10yr" >= 0.3;
 Question: {input}

 ## Example:
 example_user: "Show me protected lands in disadvantaged communities that have had prescribed fires in at least 30% of its area."
 sql_query:
+    SELECT "id", "geom", "name", "acres", "rxburn", "percent_disadvantaged"
     FROM mydata
     WHERE "percent_disadvantaged" > 0
     AND "percent_rxburn_10yr" >= 0.3;
+## Example:
+example_user: "What percentage of 30x30 conserved land has been impacted by wildfire?"
+sql_query:
+    SELECT SUM("fire" * "acres") / SUM("acres") * 100 AS percent_fire
+    FROM mydata
+    WHERE "status" = '30x30-conserved';
 Question: {input}

app/utils.py CHANGED Viewed

@@ -18,11 +18,22 @@ from itertools import chain
 from variables import *
-def get_summary(ca, combined_filter, column, colors=None): #summary stats, based on filtered data
     df = ca.filter(combined_filter)
     df = (df
             .group_by(*column)  # unpack the list for grouping
-            .aggregate(percent_protected=100 * _.acres.sum() / ca_area_acres,
                        mean_richness = (_.richness * _.acres).sum() / _.acres.sum(),
                        mean_rsr = (_.rsr * _.acres).sum() / _.acres.sum(),
                        mean_irrecoverable_carbon = (_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
@@ -32,16 +43,21 @@ def get_summary(ca, combined_filter, column, colors=None): #summary stats, based
                        mean_disadvantaged =  (_.disadvantaged_communities * _.acres).sum() / _.acres.sum(),
                        mean_svi =  (_.svi * _.acres).sum() / _.acres.sum(),
                       )
-            .mutate(percent_protected=_.percent_protected.round(1))
          )
     if colors is not None and not colors.empty: #only the df will have colors, df_tab doesn't since we are printing it.
-        df = df.inner_join(colors, column)
     df = df.cast({col: "string" for col in column})
     df = df.to_pandas()
     return df
-def summary_table(ca, column, colors, filter_cols, filter_vals,colorby_vals): # get df for charts + df_tab for printed table
     filters = []
     if filter_cols and filter_vals: #if a filter is selected, add to list of filters
         for filter_col, filter_val in zip(filter_cols, filter_vals):
@@ -51,42 +67,153 @@ def summary_table(ca, column, colors, filter_cols, filter_vals,colorby_vals): #
                 filters.append(getattr(_, filter_col) == filter_val[0])
     if column not in filter_cols: #show color_by column in table by adding it as a filter (if it's not already a filter)
         filter_cols.append(column)
-        filters.append(getattr(_, column).isin(colorby_vals[column]))
     combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
-    df_percent = get_summary(ca, combined_filter, [column], colors) # df used for charts
-    if column == "status": #need to include non-conserved in summary stats
-        combined_filter = (combined_filter) | (_.status.isin(['30x30-conserved','other-conserved','non-conserved']))
-    df = get_summary(ca, combined_filter, [column], colors) # df used for charts
-    df_tab = get_summary(ca, combined_filter, filter_cols, colors = None) #df used for printed table
-    return df, df_tab, df_percent
-def area_plot(df, column): #percent protected pie chart
     base = alt.Chart(df).encode(
-        alt.Theta("percent_protected:Q").stack(True),
     )
-    pie = ( base
-           .mark_arc(innerRadius= 40, outerRadius=100, stroke = 'black', strokeWidth = .5)
-           .encode(alt.Color("color:N").scale(None).legend(None),
-                   tooltip=['percent_protected', column])
     )
-    text = ( base
-            .mark_text(radius=80, size=14, color="white")
-            .encode(text = column + ":N")
     )
-    plot = pie # pie + text
     return plot.properties(width="container", height=290)
 def bar_chart(df, x, y, title): #display summary stats for color_by column
     #axis label angles / chart size
-    if x in ["manager_type",'status']: #labels are too long, making vertical
         angle = 270
         height = 373
     elif x == 'ecoregion': # make labels vertical and figure taller
@@ -97,34 +224,71 @@ def bar_chart(df, x, y, title): #display summary stats for color_by column
         height = 310
     # order of bars
     if x == "established": # order labels in chronological order, not alphabetic.
         sort = '-x'
     elif x == "access_type": #order based on levels of openness
         sort=['Open', 'Restricted', 'No Public', "Unknown"]
     elif x == "manager_type":
         sort = ["Federal","Tribal","State","Special District", "County", "City", "HOA","Joint","Non Profit","Private","Unknown"]
-    else:
-        sort = 'x'
     # modify label names in bar chart to fit in frame
     label_transform = f"datum.{x}"  # default; no change
     if x == "access_type":
         label_transform = f"replace(datum.{x}, ' Access', '')"  #omit 'access' from access_type
     elif x == "ecoregion":
-        label_transform = f"replace(datum.{x}, 'California', 'CA')"  # Replace "California" with "CA"
     x_title = next(key for key, value in select_column.items() if value == x)
     chart = alt.Chart(df).mark_bar(stroke = 'black', strokeWidth = .5).transform_calculate(
-        label=label_transform
         ).encode(
         x=alt.X("label:N",
                 axis=alt.Axis(labelAngle=angle, title=x_title, labelLimit = 200),
-                        sort=sort),
-        y=alt.Y(y, axis=alt.Axis()),
         color=alt.Color('color').scale(None),
-        ).properties(width="container", height=height, title = title)
     return chart
 def sync_checkboxes(source):
     # gap 1 and gap 2 on -> 30x30-conserved on
     if source in ["gap_code1", "gap_code2"]:
@@ -138,35 +302,31 @@ def sync_checkboxes(source):
     # other-conserved on <-> gap 3 on
     elif source == "gap_code3":
         st.session_state["statusother-conserved"] = st.session_state.gap_code3
-        rerun_needed = True
     elif source == "statusother-conserved":
         if "gap_code3" in st.session_state and st.session_state["statusother-conserved"] != st.session_state.gap_code3:
             st.session_state.gap_code3 = st.session_state["statusother-conserved"]
-            rerun_needed = True  # Ensure UI updates
     # unknown on <-> gap 4 on
     elif source == "gap_code4":
         st.session_state.statusunknown = st.session_state.gap_code4
-        rerun_needed = True
     elif source == "statusunknown":
         if "gap_code4" in st.session_state and st.session_state.statusunknown != st.session_state.gap_code4:
             st.session_state.gap_code4 = st.session_state.statusunknown
-            rerun_needed = True
     # non-conserved on <-> gap 0
     elif source == "gap_code0":
         st.session_state['statusnon-conserved'] = st.session_state.gap_code0
-        rerun_needed = True
     elif source == "statusnon-conserved":
         if "gap_code0" in st.session_state and st.session_state['statusnon-conserved'] != st.session_state.gap_code0:
             st.session_state.gap_code0 = st.session_state['statusnon-conserved']
-            rerun_needed = True
-def getButtons(style_options, style_choice, default_gap=None):
     column = style_options[style_choice]['property']
     opts = [style[0] for style in style_options[style_choice]['stops']]
-    default_gap = default_gap or {}
     buttons = {}
     for name in opts:
         key = column + str(name)
@@ -185,15 +345,32 @@ def getColorVals(style_options, style_choice):
     return d
 def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
     filters = []
     for col, val in zip(filter_cols, filter_vals):
         filters.append(["match", ["get", col], val, True, False])
     combined_filters = ["all"] + filters
     if "non-conserved" in list(chain.from_iterable(filter_vals)):
        combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"],True, False]]
     style = {
         "version": 8,
         "sources": {
@@ -244,3 +421,57 @@ def get_pmtiles_style_llm(paint, ids):
         ]
     }
     return style

 from variables import *
+def colorTable(select_colors,color_choice,column):
+    colors = (ibis
+              .memtable(select_colors[color_choice], columns=[column, "color"])
+              .to_pandas()
+             )
+    return colors
+def get_summary(ca, combined_filter, column, main_group, colors=None):
+    df = ca.filter(combined_filter)
+    #total acres for each group
+    group_totals = df.group_by(main_group).aggregate(total_acres=_.acres.sum())
     df = ca.filter(combined_filter)
     df = (df
             .group_by(*column)  # unpack the list for grouping
+            .aggregate(percent_CA=100 * _.acres.sum() / ca_area_acres,
+                       acres = _.acres.sum(),
                        mean_richness = (_.richness * _.acres).sum() / _.acres.sum(),
                        mean_rsr = (_.rsr * _.acres).sum() / _.acres.sum(),
                        mean_irrecoverable_carbon = (_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
                        mean_disadvantaged =  (_.disadvantaged_communities * _.acres).sum() / _.acres.sum(),
                        mean_svi =  (_.svi * _.acres).sum() / _.acres.sum(),
                       )
+            .mutate(percent_CA=_.percent_CA.round(1),
+                    acres=_.acres.round(1))
          )
+    df = df.inner_join(group_totals, main_group)
+    df = df.mutate(percent_group=(100 * _.acres / _.total_acres).round(1))
     if colors is not None and not colors.empty: #only the df will have colors, df_tab doesn't since we are printing it.
+        df = df.inner_join(colors, column[-1])
     df = df.cast({col: "string" for col in column})
     df = df.to_pandas()
     return df
+def summary_table(ca, column, select_colors, color_choice, filter_cols, filter_vals,colorby_vals): # get df for charts + df_tab for printed table
+    colors = colorTable(select_colors,color_choice,column)
     filters = []
     if filter_cols and filter_vals: #if a filter is selected, add to list of filters
         for filter_col, filter_val in zip(filter_cols, filter_vals):
                 filters.append(getattr(_, filter_col) == filter_val[0])
     if column not in filter_cols: #show color_by column in table by adding it as a filter (if it's not already a filter)
         filter_cols.append(column)
+        filters.append(getattr(_, column).isin(colorby_vals[column]))
     combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
+    df_percent = get_summary(ca, combined_filter, [column],column, colors) # df used for percentage, excludes non-conserved.
+    df_tab = get_summary(ca, combined_filter, filter_cols, column, colors = None) #df used for printed table
+    if column == "status": #need to include non-conserved in summary stats
+        combined_filter = (combined_filter) | (_.status.isin(['30x30-conserved','other-conserved','unknown','non-conserved']))
+    df = get_summary(ca, combined_filter, [column], column, colors) # df used for charts
+    df_bar_30x30 = None # no stacked charts if we have status/gap_code
+    if column not in ["status","gap_code"]: # df for stacked 30x30 status bar chart
+        colors = colorTable(select_colors,"30x30 Status",'status')
+        combined_filter_status = (combined_filter) | (_.status.isin(['30x30-conserved','other-conserved','unknown','non-conserved']))
+        df_bar_30x30 = get_summary(ca, combined_filter_status, [column, 'status'], column, colors) # df used for charts
+    return df, df_tab, df_percent, df_bar_30x30
+def get_hex(df, color,sort_order):
+    return list(df.drop_duplicates(subset=color, keep="first")
+                .set_index(color)
+                .reindex(sort_order)
+                .dropna()["color"])
+def stacked_bar(df, x, y, color, title):
+    # bar order
+    if x == "established": # order labels in chronological order, not alphabetic.
+        sort = '-x'
+    elif x == "access_type": #order based on levels of openness
+        sort=['Open', 'Restricted', 'No Public', "Unknown"]
+    elif x == "manager_type":
+        sort = ["Federal","Tribal","State","Special District", "County", "City", "HOA","Joint","Non Profit","Private","Unknown"]
+    elif x == "status":
+        sort = ["30x30-conserved","other-conserved","unknown","non-conserved"]
+    elif x == "ecoregion":
+       sort = ['SE. Great Basin','Mojave Desert','Sonoran Desert','Sierra Nevada','SoCal Mountains & Valleys','Mono',
+                'Central CA Coast','Klamath Mountains','NorCal Coast','NorCal Coast Ranges',
+                'NW. Basin & Range','Colorado Desert','Central Valley Coast Ranges','SoCal Coast',
+                'Sierra Nevada Foothills','Southern Cascades','Modoc Plateau','Great Valley (North)','NorCal Interior Coast Ranges',
+                'Great Valley (South)']
+    else:
+        sort = 'x'
+    # label order
+    if x == "manager_type": #labels are too long, making vertical
+        angle = 270
+        height = 373
+    elif x == 'ecoregion': # make labels vertical and figure taller
+        angle = 270
+        height = 430
+    else: #other labels are horizontal
+        angle = 0
+        height = 310
+    # stacked bar order
+    sort_order = ['30x30-conserved', 'other-conserved', 'unknown', 'non-conserved']
+    y_titles = {
+        'ecoregion': 'Ecoregion (%)',
+        'established': 'Year (%)',
+        'manager_type': 'Manager Type (%)',
+        'easement': 'Easement (%)',
+        'access_type': 'Access (%)'
+    }
+    ytitle = y_titles.get(x, y)  # Default to `y` if not in the dictionary
+    color_hex = get_hex(df[[color, 'color']], color, sort_order)
+    sort_order = sort_order[0:len(color_hex)]
+    df["stack_order"] = df[color].apply(lambda val: sort_order.index(val) if val in sort_order else len(sort_order))
+    if x == "ecoregion":
+        label_transform = (
+            "replace("
+            "replace("
+            "replace("
+            "replace("
+            "replace("
+            "replace(datum.ecoregion, 'Northern California', 'NorCal'),"
+            "'Southern California', 'SoCal'),"
+            "'Southeastern', 'SE.'),"
+            "'Northwestern', 'NW.'),"
+            "'and', '&'),"
+            "'California', 'CA')"
+        )
+    else:
+        label_transform = f"datum.{x}"  # Default label transformation
+    chart = alt.Chart(df).mark_bar().transform_calculate(
+        label=label_transform
+    ).encode(
+        x=alt.X("label:N", sort = sort, title=None, axis=alt.Axis(labelLimit=150, labelAngle=angle)),  # Shorten axis labels
+        y=alt.Y(y, title=ytitle).scale(domain=(0,100)),
+        color=alt.Color(
+            color,
+            sort=sort_order,  # Controls legend order
+            scale=alt.Scale(domain=sort_order, range=color_hex)
+        ),
+        order=alt.Order(
+            "stack_order:Q",
+            sort="ascending"
+        ),
+        tooltip=[
+            alt.Tooltip("label", type="nominal"),  # Use transformed label
+            alt.Tooltip("percent_CA", type="quantitative", format=",.2f"),
+            alt.Tooltip("percent_group", type="quantitative", format=",.2f"),
+            alt.Tooltip("acres", type="quantitative", format=",.0f"),
+        ]
+    ).configure_legend(
+    direction = 'horizontal',
+    orient='top',
+    columns = 3,
+    title = None,
+    labelOffset = 2,
+    offset = 10
+    ).properties(width="container", height=height, title=title
+                ).configure_title(fontSize=18, align = "center",anchor='middle',offset = 10)
+    return chart
+def area_plot(df, column):  # Percent protected pie chart
     base = alt.Chart(df).encode(
+        alt.Theta("percent_CA:Q").stack(True),
     )
+    pie = (
+        base
+        .mark_arc(innerRadius=40, outerRadius=100, stroke="black", strokeWidth=0.5)
+        .encode(
+            alt.Color("color:N").scale(None).legend(None),
+            tooltip=[
+                alt.Tooltip(column, type="nominal"),
+                alt.Tooltip("percent_CA", type="quantitative", format=",.2f"),
+                alt.Tooltip("acres", type="quantitative", format=",.0f"),
+            ]
+        )
     )
+    text = (
+        base
+        .mark_text(radius=80, size=14, color="white")
+        .encode(text=column + ":N")
     )
+    plot = pie  # pie + text
     return plot.properties(width="container", height=290)
 def bar_chart(df, x, y, title): #display summary stats for color_by column
     #axis label angles / chart size
+    if x == "manager_type": #labels are too long, making vertical
         angle = 270
         height = 373
     elif x == 'ecoregion': # make labels vertical and figure taller
         height = 310
     # order of bars
+    sort = 'x'
+    lineBreak = ''
     if x == "established": # order labels in chronological order, not alphabetic.
         sort = '-x'
     elif x == "access_type": #order based on levels of openness
         sort=['Open', 'Restricted', 'No Public', "Unknown"]
     elif x == "manager_type":
         sort = ["Federal","Tribal","State","Special District", "County", "City", "HOA","Joint","Non Profit","Private","Unknown"]
+    elif x == "ecoregion":
+       sort = ['SE. Great Basin','Mojave Desert','Sonoran Desert','Sierra Nevada','SoCal Mountains & Valleys','Mono',
+                'Central CA Coast','Klamath Mountains','NorCal Coast','NorCal Coast Ranges',
+                'NW. Basin & Range','Colorado Desert','Central Valley Coast Ranges','SoCal Coast',
+                'Sierra Nevada Foothills','Southern Cascades','Modoc Plateau','Great Valley (North)','NorCal Interior Coast Ranges',
+                'Great Valley (South)']
+    elif x == "status":
+        sort = ["30x30-conserved","other-conserved","unknown","non-conserved"]
+        lineBreak = '-'
     # modify label names in bar chart to fit in frame
     label_transform = f"datum.{x}"  # default; no change
     if x == "access_type":
         label_transform = f"replace(datum.{x}, ' Access', '')"  #omit 'access' from access_type
     elif x == "ecoregion":
+        label_transform = (
+            "replace("
+            "replace("
+            "replace("
+            "replace("
+            "replace("
+            "replace(datum.ecoregion, 'Northern California', 'NorCal'),"
+            "'Southern California', 'SoCal'),"
+            "'Southeastern', 'SE.'),"
+            "'Northwestern', 'NW.'),"
+            "'and', '&'),"
+            "'California', 'CA')"
+        )
+    y_titles = {
+        'mean_richness': 'Richness (Mean)',
+        'mean_rsr': 'Range-Size Rarity (Mean)',
+        'mean_irrecoverable_carbon': 'Irrecoverable Carbon (Mean)',
+        'mean_manageable_carbon': 'Manageable Carbon (Mean)',
+        'mean_disadvantaged': 'Disadvantaged (Mean)',
+        'mean_svi': 'SVI (Mean)',
+        'mean_fire': 'Fire (Mean)',
+        'mean_rxburn': 'Rx Fire (Mean)'
+    }
+    ytitle = y_titles.get(y, y)  # Default to `y` if not in the dictionary
     x_title = next(key for key, value in select_column.items() if value == x)
     chart = alt.Chart(df).mark_bar(stroke = 'black', strokeWidth = .5).transform_calculate(
+            label=label_transform
         ).encode(
         x=alt.X("label:N",
                 axis=alt.Axis(labelAngle=angle, title=x_title, labelLimit = 200),
+                        sort=sort),
+        y=alt.Y(y, axis=alt.Axis(title = ytitle)),
         color=alt.Color('color').scale(None),
+        ).configure(lineBreak = lineBreak)
+    chart = chart.properties(width="container", height=height, title = title
+                            ).configure_title(fontSize=18, align = "center",anchor='middle')
     return chart
 def sync_checkboxes(source):
     # gap 1 and gap 2 on -> 30x30-conserved on
     if source in ["gap_code1", "gap_code2"]:
     # other-conserved on <-> gap 3 on
     elif source == "gap_code3":
         st.session_state["statusother-conserved"] = st.session_state.gap_code3
     elif source == "statusother-conserved":
         if "gap_code3" in st.session_state and st.session_state["statusother-conserved"] != st.session_state.gap_code3:
             st.session_state.gap_code3 = st.session_state["statusother-conserved"]
     # unknown on <-> gap 4 on
     elif source == "gap_code4":
         st.session_state.statusunknown = st.session_state.gap_code4
     elif source == "statusunknown":
         if "gap_code4" in st.session_state and st.session_state.statusunknown != st.session_state.gap_code4:
             st.session_state.gap_code4 = st.session_state.statusunknown
     # non-conserved on <-> gap 0
     elif source == "gap_code0":
         st.session_state['statusnon-conserved'] = st.session_state.gap_code0
     elif source == "statusnon-conserved":
         if "gap_code0" in st.session_state and st.session_state['statusnon-conserved'] != st.session_state.gap_code0:
             st.session_state.gap_code0 = st.session_state['statusnon-conserved']
+def getButtons(style_options, style_choice, default_boxes=None):
     column = style_options[style_choice]['property']
     opts = [style[0] for style in style_options[style_choice]['stops']]
+    default_boxes = default_boxes or {}
     buttons = {}
     for name in opts:
         key = column + str(name)
     return d
+def getLegend(style_options, color_choice):
+    legend = {cat: color for cat, color in  style_options[color_choice]['stops']}
+    position = 'bottom-left'
+    fontsize = 15
+    bg_color = 'white'
+    # shorten legend for ecoregions
+    if color_choice == "Ecoregion":
+        legend = {key.replace("Northern California", "NorCal"): value for key, value in legend.items()}
+        legend = {key.replace("Southern California", "SoCal"): value for key, value in legend.items()}
+        legend = {key.replace("Southeastern", "SE."): value for key, value in legend.items()}
+        legend = {key.replace("and", "&"): value for key, value in legend.items()}
+        legend = {key.replace("California", "CA"): value for key, value in legend.items()}
+        legend = {key.replace("Northwestern", "NW."): value for key, value in legend.items()}
+        bg_color = 'rgba(255, 255, 255, 0.6)'
+        fontsize = 12
+    return legend, position, bg_color, fontsize
 def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
     filters = []
     for col, val in zip(filter_cols, filter_vals):
         filters.append(["match", ["get", col], val, True, False])
     combined_filters = ["all"] + filters
     if "non-conserved" in list(chain.from_iterable(filter_vals)):
        combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"],True, False]]
     style = {
         "version": 8,
         "sources": {
         ]
     }
     return style
+def run_sql(query,color_choice):
+    """
+    Filter data based on an LLM-generated SQL query and return matching IDs.
+    Args:
+        query (str): The natural language query to filter the data.
+        color_choice (str): The column used for plotting.
+    """
+    output = few_shot_structured_llm.invoke(query)
+    sql_query = output.sql_query
+    explanation =output.explanation
+    if not sql_query: # if the chatbot can't generate a SQL query.
+        st.success(explanation)
+        return pd.DataFrame({'id' : []})
+    result = ca.sql(sql_query).execute()
+    if result.empty :
+        explanation = "This query did not return any results. Please try again with a different query."
+        st.warning(explanation, icon="⚠️")
+        st.caption("SQL Query:")
+        st.code(sql_query,language = "sql")
+        if 'geom' in result.columns:
+            return result.drop('geom',axis = 1)
+        else:
+            return result
+    elif ("id" and "geom" in result.columns):
+        style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
+        legend, position, bg_color, fontsize = getLegend(style_options,color_choice)
+        m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
+        m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
+        m.fit_bounds(result.total_bounds.tolist())
+        result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
+    else:
+        st.write(result)  # if we aren't mapping, just print out the data
+    with st.popover("Explanation"):
+        st.write(explanation)
+        st.caption("SQL Query:")
+        st.code(sql_query,language = "sql")
+    return result
+def summary_table_sql(ca, column, colors, ids): # get df for charts + df_tab for printed table
+    filters = [_.id.isin(ids)]
+    combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
+    df = get_summary(ca, combined_filter, [column], colors) # df used for charts
+    return df

app/variables.py CHANGED Viewed

@@ -1,9 +1,9 @@
 # urls for main layer
-ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/38af68979644f52ac928c5e41c81ec4d93468eef/ca-30x30.parquet"
-ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/e283bb63ee76dd5acd2d187029a80ab6a011886b/ca-30x30.pmtiles"
 ca_area_acres = 1.014e8 #acres
 style_choice = "GAP Status Code"
 # urls for additional data layers
@@ -38,7 +38,7 @@ svi_color = "#1bc7c3" #cyan
 white =  "#FFFFFF"
 # gap codes 3 and 4 are off by default.
-default_gap = {
     0: False,
     3: False,
     4: False,
@@ -119,6 +119,8 @@ status = {
         ['other-conserved', "#b6ce7a"],
         ['unknown', "#e5efdb"],
         ['non-conserved', "#e1e1e1"]
     ],
 }
@@ -153,9 +155,9 @@ ecoregion = {
 }
 style_options = {
-    "Year": year,
     "30x30 Status": status,
     "GAP Code": gap,
     "Ecoregion": ecoregion,
     "Manager Type": manager,
     "Easement": easement,
@@ -262,9 +264,9 @@ svi_style = {
 select_column = {
-    "Year": "established",
     "30x30 Status":  "status",
     "GAP Code": "gap_code",
     "Ecoregion":  "ecoregion",
     "Manager Type": "manager_type",
     "Easement": "easement",

 # urls for main layer
+ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/65eb463312262e50d51f5c07bfad7568152803b0/ca-30x30.parquet"
+ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/fc4dc523c27a8133452bb7596e3e520fda24eccd/ca-30x30.pmtiles"
 ca_area_acres = 1.014e8 #acres
+# ca_area_acres = 103179953.76086558
 style_choice = "GAP Status Code"
 # urls for additional data layers
 white =  "#FFFFFF"
 # gap codes 3 and 4 are off by default.
+default_boxes = {
     0: False,
     3: False,
     4: False,
         ['other-conserved', "#b6ce7a"],
         ['unknown', "#e5efdb"],
         ['non-conserved', "#e1e1e1"]
+        # ['non-conserved', white]
     ],
 }
 }
 style_options = {
     "30x30 Status": status,
     "GAP Code": gap,
+    "Year": year,
     "Ecoregion": ecoregion,
     "Manager Type": manager,
     "Easement": easement,
 select_column = {
     "30x30 Status":  "status",
     "GAP Code": "gap_code",
+    "Year": "established",
     "Ecoregion":  "ecoregion",
     "Manager Type": "manager_type",
     "Easement": "easement",

preprocess/preprocess.ipynb CHANGED Viewed

@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
    "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
    "metadata": {
     "editable": true,
@@ -182,25 +182,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "id": "070bbdde-b141-4a63-8f8a-984dd01fd51a",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3c217929b7744164a99f6e2314366359",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
    "source": [
     "con = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
     "\n",
@@ -210,7 +195,7 @@
     "con.create_table(\"eco\", eco.select(\"ECOREGION_\",\"geometry\"), overwrite = True)\n",
     "con.create_table(\"non\", non, overwrite = True)\n",
     "\n",
-    "# split up the non-conserved areas by ecoregions\n",
     "con.con.execute('''\n",
     "CREATE TABLE non_conserved_eco AS\n",
     "SELECT \n",
@@ -223,12 +208,12 @@
     "WHERE ST_GeometryType(ST_Intersection(non.geom, eco.geometry)) IN ('POLYGON', 'MULTIPOLYGON');\n",
     "''')\n",
     "\n",
-    "\n",
     "# save to parquet file so we don't have to run this again\n",
     "non_eco = (con.table(\"non_conserved_eco\")\n",
     "           .drop('geom')\n",
     "           .rename(geom = \"geom_1\")\n",
-    "           .mutate(geom = ST_MakeValid(_.geom))          \n",
     "          )\n",
     "\n",
     "non_conserved_eco = non_eco.execute()\n",
@@ -256,7 +241,7 @@
     "    .cast({\"geom\": \"geometry\"})\n",
     "    .mutate(established = ibis.null(), gap_code = 0, name = ibis.literal(\"Non-Conserved Areas\"),\n",
     "            access_type = ibis.null(), manager = ibis.null(), manager_type = ibis.null(),\n",
-    "            ecoregion = ibis.null(), easement = ibis.null(), id = 0, type = ibis.literal(\"Land\"),\n",
     "            status = ibis.literal(\"non-conserved\"),\n",
     "            acres = _.geom.area() / 4046.8564224 #convert sq meters to acres\n",
     "           )\n",
@@ -466,7 +451,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "id": "aade11d9-87b9-403d-bad1-3069663807a9",
    "metadata": {},
    "outputs": [],

   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
    "metadata": {
     "editable": true,
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "070bbdde-b141-4a63-8f8a-984dd01fd51a",
    "metadata": {},
+   "outputs": [],
    "source": [
     "con = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
     "\n",
     "con.create_table(\"eco\", eco.select(\"ECOREGION_\",\"geometry\"), overwrite = True)\n",
     "con.create_table(\"non\", non, overwrite = True)\n",
     "\n",
+    "#split up the non-conserved areas by ecoregions\n",
     "con.con.execute('''\n",
     "CREATE TABLE non_conserved_eco AS\n",
     "SELECT \n",
     "WHERE ST_GeometryType(ST_Intersection(non.geom, eco.geometry)) IN ('POLYGON', 'MULTIPOLYGON');\n",
     "''')\n",
     "\n",
     "# save to parquet file so we don't have to run this again\n",
     "non_eco = (con.table(\"non_conserved_eco\")\n",
     "           .drop('geom')\n",
     "           .rename(geom = \"geom_1\")\n",
+    "           .mutate(geom = ST_MakeValid(_.geom))\n",
+    "           .mutate(id=ibis.row_number().over())\n",
     "          )\n",
     "\n",
     "non_conserved_eco = non_eco.execute()\n",
     "    .cast({\"geom\": \"geometry\"})\n",
     "    .mutate(established = ibis.null(), gap_code = 0, name = ibis.literal(\"Non-Conserved Areas\"),\n",
     "            access_type = ibis.null(), manager = ibis.null(), manager_type = ibis.null(),\n",
+    "            easement = ibis.null(), type = ibis.literal(\"Land\"),\n",
     "            status = ibis.literal(\"non-conserved\"),\n",
     "            acres = _.geom.area() / 4046.8564224 #convert sq meters to acres\n",
     "           )\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "aade11d9-87b9-403d-bad1-3069663807a9",
    "metadata": {},
    "outputs": [],