Spaces:

boettiger-lab
/

ca-30x30

Running

App Files Files Community

cassiebuhler commited on Mar 4

Commit

38804a1

1 Parent(s): b834a5d

wip

Browse files

optimized functions. Still need to figure out why removing unknown status makes the stacked bar charts go wonky...

Files changed (2) hide show

app/app.py +8 -8
app/utils.py +321 -442

app/app.py CHANGED Viewed

@@ -195,7 +195,7 @@ def run_sql(query,color_choice):
     elif ("id" and "geom" in result.columns):
         style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
-        legend, position, bg_color, fontsize = getLegend(style_options,color_choice)
         m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
         m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
@@ -229,7 +229,7 @@ with st.sidebar:
     st.divider()
     color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
-    colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
     alpha = 0.8
     st.divider()
@@ -348,9 +348,9 @@ with st.sidebar:
     for label in style_options: # get selected filters (based on the buttons selected)
         with st.expander(label):
             if label in ["GAP Code","30x30 Status"]: # gap code 1 and 2 are on by default
-                opts = getButtons(style_options, label, default_boxes)
             else: # other buttons are not on by default.
-                opts = getButtons(style_options, label)
             filters.update(opts)
         selected = {k: v for k, v in filters.items() if v}
@@ -371,7 +371,7 @@ with st.sidebar:
 # Display CA 30x30 Data
 if 'out' not in locals():
     style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
-    legend, position, bg_color, fontsize = getLegend(style_options, color_choice)
     m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
     m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds=True)
@@ -397,11 +397,11 @@ colors = (
 # get summary tables used for charts + printed table
 # df - charts; df_tab - printed table (omits colors)
 if 'out' not in locals():
-    df, df_tab, df_percent, df_bar_30x30 = summary_table(ca, column, select_colors, color_choice, filter_cols, filter_vals,colorby_vals)
     total_percent = 100*df_percent.percent_CA.sum()
 else:
-    df = summary_table_sql(ca, column, colors, ids)
     total_percent = 100*df.percent_CA.sum()
@@ -431,7 +431,7 @@ with main:
         with st.container():
             st.markdown(f"{total_percent}% CA Protected", help = "Total percentage of 30x30 conserved lands, updates based on displayed data")
-            st.altair_chart(area_plot(df, column), use_container_width=True)
             if 'df_bar_30x30' in locals(): #if we use chatbot, we won't have these graphs.
                 if column not in ["status", "gap_code"]:

     elif ("id" and "geom" in result.columns):
         style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
+        legend, position, bg_color, fontsize = get_legend(style_options,color_choice)
         m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
         m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
     st.divider()
     color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
+    colorby_vals = get_color_vals(style_options, color_choice) #get options for selected color_by column
     alpha = 0.8
     st.divider()
     for label in style_options: # get selected filters (based on the buttons selected)
         with st.expander(label):
             if label in ["GAP Code","30x30 Status"]: # gap code 1 and 2 are on by default
+                opts = get_buttons(style_options, label, default_boxes)
             else: # other buttons are not on by default.
+                opts = get_buttons(style_options, label)
             filters.update(opts)
         selected = {k: v for k, v in filters.items() if v}
 # Display CA 30x30 Data
 if 'out' not in locals():
     style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
+    legend, position, bg_color, fontsize = get_legend(style_options, color_choice)
     m.add_legend(legend_dict = legend, position = position, bg_color = bg_color, fontsize = fontsize)
     m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds=True)
 # get summary tables used for charts + printed table
 # df - charts; df_tab - printed table (omits colors)
 if 'out' not in locals():
+    df, df_tab, df_percent, df_bar_30x30 = get_summary_table(ca, column, select_colors, color_choice, filter_cols, filter_vals,colorby_vals)
     total_percent = 100*df_percent.percent_CA.sum()
 else:
+    df = get_summary_table_sql(ca, column, colors, ids)
     total_percent = 100*df.percent_CA.sum()
         with st.container():
             st.markdown(f"{total_percent}% CA Protected", help = "Total percentage of 30x30 conserved lands, updates based on displayed data")
+            st.altair_chart(area_chart(df, column), use_container_width=True)
             if 'df_bar_30x30' in locals(): #if we use chatbot, we won't have these graphs.
                 if column not in ["status", "gap_code"]:

app/utils.py CHANGED Viewed

@@ -1,400 +1,45 @@
 import streamlit as st
 import streamlit.components.v1 as components
-import base64
 import leafmap.maplibregl as leafmap
 import altair as alt
 import ibis
 from ibis import _
 import ibis.selectors as s
 import os
-import pandas as pd
-import geopandas as gpd
 from shapely import wkb
-import sqlalchemy
-import pathlib
 from typing import Optional
 from functools import reduce
 from itertools import chain
 from variables import *
-def colorTable(select_colors,color_choice,column):
-    colors = (ibis
-              .memtable(select_colors[color_choice], columns=[column, "color"])
-              .to_pandas()
-             )
-    return colors
-def get_summary(ca, combined_filter, column, main_group, colors=None):
-    df = ca.filter(combined_filter)
-    #total acres for each group
-    # if colors is not None and not colors.empty:
-    group_totals = df.group_by(main_group).aggregate(total_acres=_.acres.sum())
-    df = ca.filter(combined_filter)
-    df = (df
-            .group_by(*column)  # unpack the list for grouping
-            .aggregate(percent_CA= _.acres.sum() / ca_area_acres,
-                       acres = _.acres.sum(),
-                       mean_richness = (_.richness * _.acres).sum() / _.acres.sum(),
-                       mean_rsr = (_.rsr * _.acres).sum() / _.acres.sum(),
-                       mean_irrecoverable_carbon = (_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
-                       mean_manageable_carbon = (_.manageable_carbon * _.acres).sum() / _.acres.sum(),
-                       mean_fire = (_.fire *_.acres).sum()/_.acres.sum(),
-                       mean_rxburn = (_.rxburn *_.acres).sum()/_.acres.sum(),
-                       mean_disadvantaged =  (_.disadvantaged_communities * _.acres).sum() / _.acres.sum(),
-                       mean_svi =  (_.svi * _.acres).sum() / _.acres.sum(),
-                      )
-            .mutate(percent_CA=_.percent_CA.round(3),
-                    acres=_.acres.round(0))
-         )
-    # if colors is not None and not colors.empty:
-    df = df.inner_join(group_totals, main_group)
-    df = df.mutate(percent_group=( _.acres / _.total_acres).round(3))
-    if colors is not None and not colors.empty: #only the df will have colors, df_tab doesn't since we are printing it.
-        df = df.inner_join(colors, column[-1])
-    df = df.cast({col: "string" for col in column})
-    df = df.to_pandas()
-    return df
-def summary_table(ca, column, select_colors, color_choice, filter_cols, filter_vals,colorby_vals): # get df for charts + df_tab for printed table
-    colors = colorTable(select_colors,color_choice,column)
-    filters = []
-    if filter_cols and filter_vals: #if a filter is selected, add to list of filters
-        for filter_col, filter_val in zip(filter_cols, filter_vals):
-            if len(filter_val) > 1:
-                filters.append(getattr(_, filter_col).isin(filter_val))
-            else:
-                filters.append(getattr(_, filter_col) == filter_val[0])
-    if column not in filter_cols: #show color_by column in table by adding it as a filter (if it's not already a filter)
-        filter_cols.append(column)
-        filters.append(getattr(_, column).isin(colorby_vals[column]))
-    combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
-    only_conserved = (combined_filter) & (_.status.isin(['30x30-conserved']))
-    df_percent = get_summary(ca, only_conserved, [column],column, colors) # df used for percentage, excludes non-conserved.
-    df_tab = get_summary(ca, combined_filter, filter_cols, column, colors = None) #df used for printed table
-    if "non-conserved" in list(chain.from_iterable(filter_vals)):
-       combined_filter = (combined_filter) | (_.status.isin(['non-conserved']))
-    df = get_summary(ca, combined_filter, [column], column, colors) # df used for charts
-    df_bar_30x30 = None # no stacked charts if we have status/gap_code
-    if column not in ["status","gap_code"]: # df for stacked 30x30 status bar chart
-        colors = colorTable(select_colors,"30x30 Status",'status')
-        df_bar_30x30 = get_summary(ca, combined_filter, [column, 'status'], column, colors) # df used for charts
-    return df, df_tab, df_percent, df_bar_30x30
-def summary_table_sql(ca, column, colors, ids): # get df for charts + df_tab for printed table
-    filters = [_.id.isin(ids)]
-    combined_filter = reduce(lambda x, y: x & y, filters) #combining all the filters into ibis filter expression
-    df = get_summary(ca, combined_filter, [column], column, colors) # df used for charts
-    return df
-def get_hex(df, color,sort_order):
-    return list(df.drop_duplicates(subset=color, keep="first")
-                .set_index(color)
-                .reindex(sort_order)
-                .dropna()["color"])
-def transform_label(label, x_field):
-    # converting labels for that gnarly stacked bar chart
-    if x_field == "access_type":
-        return label.replace(" Access", "")
-    elif x_field == "ecoregion":
-        label = label.replace("Northern California", "NorCal")
-        label = label.replace("Southern California", "SoCal")
-        label = label.replace("Southeastern", "SE.")
-        label = label.replace("Northwestern", "NW.")
-        label = label.replace("and", "&")
-        label = label.replace("California", "CA")
-        return label
-    else:
-        return label
-def stacked_bar(df, x, y, color, title, colors):
-    label_colors = colors['color'].to_list()
-    # bar order
-    if x == "established":  # order labels in chronological order, not alphabetic.
-        sort = '-x'
-    elif x == "access_type":  # order based on levels of openness
-        sort = ['Open', 'Restricted', 'No Public', "Unknown"]
-    elif x == "easement":
-        sort = ['True', 'False']
-    elif x == "manager_type":
-        sort = ["Federal", "Tribal", "State", "Special District", "County", "City", "HOA",
-                "Joint", "Non Profit", "Private", "Unknown"]
-    elif x == "status":
-        sort = ["30x30-conserved", "other-conserved", "unknown", "non-conserved"]
-    elif x == "ecoregion":
-        sort = ['SE. Great Basin', 'Mojave Desert', 'Sonoran Desert', 'Sierra Nevada',
-                'SoCal Mountains & Valleys', 'Mono', 'Central CA Coast', 'Klamath Mountains',
-                'NorCal Coast', 'NorCal Coast Ranges', 'NW. Basin & Range', 'Colorado Desert',
-                'Central Valley Coast Ranges', 'SoCal Coast', 'Sierra Nevada Foothills',
-                'Southern Cascades', 'Modoc Plateau', 'Great Valley (North)',
-                'NorCal Interior Coast Ranges', 'Great Valley (South)']
-    else:
-        sort = 'x'
-    if x == "manager_type":
-        angle = 270
-        height = 350
-    elif x == 'ecoregion':
-        angle = 270
-        height = 430
-    else:
-        angle = 0
-        height = 310
-    # stacked bar order
-    sort_order = ['30x30-conserved', 'other-conserved', 'unknown', 'non-conserved']
-    y_titles = {
-        'ecoregion': 'Ecoregion (%)',
-        'established': 'Year (%)',
-        'manager_type': 'Manager Type (%)',
-        'easement': 'Easement (%)',
-        'access_type': 'Access (%)'
-    }
-    ytitle = y_titles.get(x, y)
-    color_hex = get_hex(df[[color, 'color']], color, sort_order)
-    sort_order = sort_order[0:len(color_hex)]
-    df["stack_order"] = df[color].apply(lambda val: sort_order.index(val) if val in sort_order else len(sort_order))
-    # shorten labels to fit on chart
-    label_transform = f"datum.{x}"
-    if x == "access_type":
-        label_transform = f"replace(datum.{x}, ' Access', '')"
-    elif x == "ecoregion":
-        label_transform = (
-            "replace("
-            "replace("
-            "replace("
-            "replace("
-            "replace("
-            "replace(datum.ecoregion, 'Northern California', 'NorCal'),"
-            "'Southern California', 'SoCal'),"
-            "'Southeastern', 'SE.'),"
-            "'Northwestern', 'NW.'),"
-            "'and', '&'),"
-            "'California', 'CA')"
-        )
-    # to match the colors in the map to each label, need to write some ugly code..
-    #  bar chart w/ xlabels hidden
-    chart = alt.Chart(df).mark_bar(height = 500).transform_calculate(
-        xlabel=label_transform
-    ).encode(
-        x=alt.X("xlabel:N", sort=sort, title=None,
-                axis=alt.Axis(labelLimit=150, labelAngle=angle, labelColor="transparent")),
-        y=alt.Y(y, title=ytitle, axis=alt.Axis(labelPadding=5)).scale(domain=(0, 1)),
-        color=alt.Color(
-            color,
-            sort=sort_order,
-            scale=alt.Scale(domain=sort_order, range=color_hex)
-        ),
-        order=alt.Order("stack_order:Q", sort="ascending"),
-        tooltip=[
-            alt.Tooltip(x, type="nominal"),
-            alt.Tooltip(color, type="nominal"),
-            alt.Tooltip("percent_group", type="quantitative", format=",.1%"),
-            alt.Tooltip("acres", type="quantitative", format=",.0f"),
-        ]
-    )
-    transformed_labels = [transform_label(str(lab), x) for lab in colors[x]]
-    labels_df = colors
-    labels_df['xlabel'] = transformed_labels
-    # 2 layers, 1 for the symbol and 1 for the text
-    if angle == 0:
-        symbol_layer = alt.Chart(labels_df).mark_point(
-            filled=True,
-            shape="circle",
-            size=100,
-            xOffset = 0,
-            yOffset=130,
-            align = 'left',
-            tooltip = False
-        ).encode(
-            x=alt.X("xlabel:N", sort=sort),
-            color=alt.Color("color:N", scale=None)
-        )
-        text_layer = alt.Chart(labels_df).mark_text(
-            dy=115,  # shifts the text to the right of the symbol
-            dx = 0,
-            yOffset=0,
-            xOffset = 0,
-            align='center',
-            color="black",
-            tooltip = False
-        ).encode(
-            x=alt.X("xlabel:N", sort=sort),
-            text=alt.Text("xlabel:N")
-        )
-    # vertical labels
-    elif angle == 270:
-        symbol_layer = alt.Chart(labels_df).mark_point(
-            xOffset = 0,
-            yOffset= 100,
-            filled=True,
-            shape="circle",
-            size=100,
-            tooltip = False
-        ).encode(
-            x=alt.X("xlabel:N", sort=sort),
-            color=alt.Color("color:N", scale=None)
-        )
-        text_layer = alt.Chart(labels_df).mark_text(
-            dy=0,
-            dx = -110,
-            angle=270,
-            align='right',
-            color="black",
-            tooltip = False
-        ).encode(
-            x=alt.X("xlabel:N", sort=sort),
-            text=alt.Text("xlabel:N")
-        )
-    custom_labels = alt.layer(symbol_layer, text_layer)
-    final_chart = alt.layer(chart, custom_labels)
-    # put it all together
-    final_chart = final_chart.properties(
-        width="container",
-        height=height,
-        title=title
-    ).configure_legend(
-        direction='horizontal',
-        orient='top',
-        columns=2,
-        title=None,
-        labelOffset=2,
-        offset=10,
-        symbolType = 'square'
-    ).configure_title(
-        fontSize=18, align="center", anchor='middle', offset=10
-    )
-    return final_chart
-def area_plot(df, column):  # Percent protected pie chart
-    base = alt.Chart(df).encode(
-        alt.Theta("percent_CA:Q").stack(True),
-    )
-    pie = (
-        base
-        .mark_arc(innerRadius=40, outerRadius=100, stroke="black", strokeWidth=0.5)
-        .encode(
-            alt.Color("color:N").scale(None).legend(None),
-            tooltip=[
-                alt.Tooltip(column, type="nominal"),
-                alt.Tooltip("percent_CA", type="quantitative", format=",.1%"),
-                alt.Tooltip("acres", type="quantitative", format=",.0f"),
-            ]
-        )
-    )
-    text = (
-        base
-        .mark_text(radius=80, size=14, color="white")
-        .encode(text=column + ":N")
-    )
-    plot = pie  # pie + text
-    return plot.properties(width="container", height=290)
-def bar_chart(df, x, y, title): #display summary stats for color_by column
-    #axis label angles / chart size
-    if x == "manager_type": #labels are too long, making vertical
-        angle = 270
-        height = 373
-    elif x == 'ecoregion': # make labels vertical and figure taller
-        angle = 270
-        height = 430
-    else: #other labels are horizontal
-        angle = 0
-        height = 310
-    # order of bars
-    sort = 'x'
-    lineBreak = ''
-    if x == "established": # order labels in chronological order, not alphabetic.
-        sort = '-x'
-    elif x == "access_type": #order based on levels of openness
-        sort=['Open', 'Restricted', 'No Public', "Unknown"]
-    elif x == "easement":
-        sort=['True','False']
-    elif x == "manager_type":
-        sort = ["Federal","Tribal","State","Special District", "County", "City", "HOA","Joint","Non Profit","Private","Unknown"]
-    elif x == "ecoregion":
-       sort = ['SE. Great Basin','Mojave Desert','Sonoran Desert','Sierra Nevada','SoCal Mountains & Valleys','Mono',
-                'Central CA Coast','Klamath Mountains','NorCal Coast','NorCal Coast Ranges',
-                'NW. Basin & Range','Colorado Desert','Central Valley Coast Ranges','SoCal Coast',
-                'Sierra Nevada Foothills','Southern Cascades','Modoc Plateau','Great Valley (North)','NorCal Interior Coast Ranges',
-                'Great Valley (South)']
-    elif x == "status":
-        sort = ["30x30-conserved","other-conserved","unknown","non-conserved"]
-        lineBreak = '-'
-    # modify label names in bar chart to fit in frame
-    label_transform = f"datum.{x}"  # default; no change
-    if x == "access_type":
-        label_transform = f"replace(datum.{x}, ' Access', '')"  #omit 'access' from access_type
-    elif x == "ecoregion":
-        label_transform = (
-            "replace("
-            "replace("
-            "replace("
-            "replace("
-            "replace("
-            "replace(datum.ecoregion, 'Northern California', 'NorCal'),"
-            "'Southern California', 'SoCal'),"
-            "'Southeastern', 'SE.'),"
-            "'Northwestern', 'NW.'),"
-            "'and', '&'),"
-            "'California', 'CA')"
-        )
-    y_titles = {
-        'mean_richness': 'Richness (Mean)',
-        'mean_rsr': 'Range-Size Rarity (Mean)',
-        'mean_irrecoverable_carbon': 'Irrecoverable Carbon (Mean)',
-        'mean_manageable_carbon': 'Manageable Carbon (Mean)',
-        'mean_disadvantaged': 'Disadvantaged (Mean)',
-        'mean_svi': 'SVI (Mean)',
-        'mean_fire': 'Fire (Mean)',
-        'mean_rxburn': 'Rx Fire (Mean)'
-    }
-    ytitle = y_titles.get(y, y)  # Default to `y` if not in the dictionary
-    x_title = next(key for key, value in select_column.items() if value == x)
-    chart = alt.Chart(df).mark_bar(stroke = 'black', strokeWidth = .5).transform_calculate(
-            label=label_transform
-        ).encode(
-        x=alt.X("label:N",
-                axis=alt.Axis(labelAngle=angle, title=x_title, labelLimit = 200),
-                        sort=sort),
-        y=alt.Y(y, axis=alt.Axis(title = ytitle)),
-        color=alt.Color('color').scale(None),
-        ).configure(lineBreak = lineBreak)
-    chart = chart.properties(width="container", height=height, title = title
-                            ).configure_title(fontSize=18, align = "center",anchor='middle')
-    return chart
 def sync_checkboxes(source):
     # gap 1 and gap 2 on -> 30x30-conserved on
     if source in ["gap_code1", "gap_code2"]:
         st.session_state['status30x30-conserved'] = st.session_state.gap_code1 and st.session_state.gap_code2
@@ -428,62 +73,103 @@ def sync_checkboxes(source):
             st.session_state.gap_code0 = st.session_state['statusnon-conserved']
-def getButtons(style_options, style_choice, default_boxes=None):
     column = style_options[style_choice]['property']
-    opts = [style[0] for style in style_options[style_choice]['stops']]
-    default_boxes = default_boxes or {}
-    buttons = {}
-    for name in opts:
-        key = column + str(name)
-        buttons[name] = st.checkbox(f"{name}", value=st.session_state[key], key=key, on_change = sync_checkboxes, args = (key,))
-    filter_choice = [key for key, value in buttons.items() if value]
-    return {column: filter_choice}
-def getColorVals(style_options, style_choice):
-    #df_tab only includes filters selected, we need to manually add "color_by" column (if it's not already a filter).
-    column = style_options[style_choice]['property']
-    opts = [style[0] for style in style_options[style_choice]['stops']]
-    d = {}
-    d[column] = opts
-    return d
-def getLegend(style_options, color_choice):
-    legend = {cat: color for cat, color in  style_options[color_choice]['stops']}
-    position = 'bottom-left'
-    fontsize = 15
-    bg_color = 'white'
-    # shorten legend for ecoregions
-    if color_choice == "Ecoregion":
-        legend = {key.replace("Northern California", "NorCal"): value for key, value in legend.items()}
-        legend = {key.replace("Southern California", "SoCal"): value for key, value in legend.items()}
-        legend = {key.replace("Southeastern", "SE."): value for key, value in legend.items()}
-        legend = {key.replace("and", "&"): value for key, value in legend.items()}
-        legend = {key.replace("California", "CA"): value for key, value in legend.items()}
-        legend = {key.replace("Northwestern", "NW."): value for key, value in legend.items()}
-        bg_color = 'rgba(255, 255, 255, 0.6)'
-        fontsize = 12
-    return legend, position, bg_color, fontsize
 def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
-    filters = []
-    for col, val in zip(filter_cols, filter_vals):
-        filters.append(["match", ["get", col], val, True, False])
-    combined_filters = ["all"] + filters
-    if "non-conserved" in list(chain.from_iterable(filter_vals)):
-       combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"],True, False]]
-    style = {
         "version": 8,
-        "sources": {
-            "ca": {
-                "type": "vector",
-                "url": "pmtiles://" + ca_pmtiles,
-            }
-        },
         "layers": [
             {
                 "id": "ca30x30",
@@ -491,39 +177,232 @@ def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
                 "source-layer": "ca30x30",
                 "type": "fill",
                 "filter": combined_filters,
-                "paint": {
-                    "fill-color": paint,
-                    "fill-opacity": alpha
-                }
             }
-        ]
     }
-    return style
 def get_pmtiles_style_llm(paint, ids):
-    combined_filters = ["all", ["match", ["get", "id"], ids, True, False]]
-    style = {
         "version": 8,
-        "sources": {
-            "ca": {
-                "type": "vector",
-                "url": "pmtiles://" + ca_pmtiles,
-            }
-        },
         "layers": [
             {
                 "id": "ca30x30",
                 "source": "ca",
                 "source-layer": "ca30x30",
                 "type": "fill",
-                "filter": combined_filters,
-                "paint": {
-                    "fill-color": paint,
-                    "fill-opacity": 1,
-                }
             }
-        ]
     }
-    return style

 import streamlit as st
 import streamlit.components.v1 as components
+import pandas as pd
 import leafmap.maplibregl as leafmap
 import altair as alt
 import ibis
 from ibis import _
 import ibis.selectors as s
 import os
 from shapely import wkb
 from typing import Optional
 from functools import reduce
 from itertools import chain
 from variables import *
+######################## UI FUNCTIONS
+def get_buttons(style_options, style_choice, default_boxes=None):
+    """
+    Creates Streamlit checkboxes based on style options and returns the selected filters.
+    """
+    column = style_options[style_choice]['property']
+    opts = [style[0] for style in style_options[style_choice]['stops']]
+    default_boxes = default_boxes or {}
+    buttons = {}
+    for name in opts:
+        key = column + str(name)
+        buttons[name] = st.checkbox(f"{name}", value=st.session_state[key], key=key, on_change = sync_checkboxes, args = (key,))
+    filter_choice = [key for key, value in buttons.items() if value]
+    return {column: filter_choice}
+    # buttons = {name: st.checkbox(name, value=st.session_state.get(column + str(name), False), key=column + str(name), on_change=sync_checkboxes, args=(column + str(name),)) for name in opts}
+    # return {column: [key for key, value in buttons.items() if value]}
 def sync_checkboxes(source):
+    """
+    Synchronizes checkbox selections in Streamlit based on 30x30 status and GAP codes.
+    """
     # gap 1 and gap 2 on -> 30x30-conserved on
     if source in ["gap_code1", "gap_code2"]:
         st.session_state['status30x30-conserved'] = st.session_state.gap_code1 and st.session_state.gap_code2
             st.session_state.gap_code0 = st.session_state['statusnon-conserved']
+def color_table(select_colors, color_choice, column):
+    """
+    Converts selected color mapping into a DataFrame.
+    """
+    return ibis.memtable(select_colors[color_choice], columns=[column, "color"]).to_pandas()
+def get_color_vals(style_options, style_choice):
+    """
+    Extracts available color values for a selected style option.
+    """
     column = style_options[style_choice]['property']
+    return {column: [style[0] for style in style_options[style_choice]['stops']]}
+######################## SUMMARY & DATA FUNCTIONS
+def get_summary(ca, combined_filter, column, main_group, colors = None):
+    """
+    Computes summary statistics for the filtered dataset.
+    """
+    df = ca.filter(combined_filter)
+    #total acres for each group
+    group_totals = df.group_by(main_group).aggregate(total_acres=_.acres.sum())
+    df = (df.group_by(*column)
+          .aggregate(percent_CA=(_.acres.sum() / ca_area_acres),
+                     acres=_.acres.sum(),
+                     mean_richness=(_.richness * _.acres).sum() / _.acres.sum(),
+                     mean_rsr=(_.rsr * _.acres).sum() / _.acres.sum(),
+                     mean_irrecoverable_carbon=(_.irrecoverable_carbon * _.acres).sum() / _.acres.sum(),
+                     mean_manageable_carbon=(_.manageable_carbon * _.acres).sum() / _.acres.sum(),
+                     mean_fire=(_.fire * _.acres).sum()/_.acres.sum(),
+                     mean_rxburn=(_.rxburn * _.acres).sum()/_.acres.sum(),
+                     mean_disadvantaged=(_.disadvantaged_communities * _.acres).sum() / _.acres.sum(),
+                     mean_svi=(_.svi * _.acres).sum() / _.acres.sum())
+          .mutate(percent_CA=_.percent_CA.round(3), acres=_.acres.round(0)))
+    df = df.inner_join(group_totals, main_group).mutate(percent_group=( _.acres / _.total_acres).round(3))
+    if colors is not None and not colors.empty:
+        df = df.inner_join(colors, column[-1])
+    return df.cast({col: "string" for col in column}).execute()
+def get_summary_table(ca, column, select_colors, color_choice, filter_cols, filter_vals, colorby_vals):
+    """
+    Generates summary tables for visualization and reporting.
+    """
+    colors = color_table(select_colors, color_choice, column)
+    #if a filter is selected, add to list of filters
+    filters = [getattr(_, col).isin(vals) for col, vals in zip(filter_cols, filter_vals) if vals]
+    #show color_by column in table by adding it as a filter (if it's not already a filter)
+    if column not in filter_cols:
+        filter_cols.append(column)
+        filters.append(getattr(_, column).isin(colorby_vals[column]))
+    #combining all the filters into ibis filter expression
+    combined_filter = reduce(lambda x, y: x & y, filters)
+    only_conserved = combined_filter & (_.status.isin(['30x30-conserved']))
+    # df used for percentage, excludes non-conserved.
+    df_percent = get_summary(ca, only_conserved, [column], column, colors)
+    #df used for printed table
+    df_tab = get_summary(ca, combined_filter, filter_cols, column, colors=None)
+    if "non-conserved" in chain.from_iterable(filter_vals):
+        combined_filter = combined_filter | (_.status.isin(['non-conserved']))
+    # df used for charts
+    df = get_summary(ca, combined_filter, [column], column, colors)
+    # df for stacked 30x30 status bar chart
+    df_bar_30x30 = None if column in ["status", "gap_code"] else get_summary(ca, combined_filter, [column, 'status'], column, color_table(select_colors, "30x30 Status", 'status'))
+    return df, df_tab, df_percent, df_bar_30x30
+def get_summary_table_sql(ca, column, colors, ids):
+    """
+    Generates a summary table using specific IDs as filters.
+    """
+    combined_filter = _.id.isin(ids)
+    return get_summary(ca, combined_filter, [column], column, colors)
+######################## MAP STYLING FUNCTIONS
 def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
+    """
+    Generates a MapLibre GL style for PMTiles with specified filters.
+    """
+    filters = [["match", ["get", col], val, True, False] for col, val in zip(filter_cols, filter_vals)]
+    combined_filters = ["all", *filters]
+    if "non-conserved" in chain.from_iterable(filter_vals):
+        combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"], True, False]]
+    return {
         "version": 8,
+        "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
         "layers": [
             {
                 "id": "ca30x30",
                 "source-layer": "ca30x30",
                 "type": "fill",
                 "filter": combined_filters,
+                "paint": {"fill-color": paint, "fill-opacity": alpha},
             }
+        ],
     }
 def get_pmtiles_style_llm(paint, ids):
+    """
+    Generates a MapLibre GL style for PMTiles using specific IDs as filters.
+    """
+    return {
         "version": 8,
+        "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
         "layers": [
             {
                 "id": "ca30x30",
                 "source": "ca",
                 "source-layer": "ca30x30",
                 "type": "fill",
+                "filter": ["in", ["get", "id"], ["literal", ids]],
+                # "filter": ["all", ["match", ["get", "id"], ids, True, False]],
+                "paint": {"fill-color": paint, "fill-opacity": 1},
             }
+        ],
+    }
+def get_legend(style_options, color_choice):
+    """
+    Generates a legend dictionary with color mapping and formatting adjustments.
+    """
+    legend = {cat: color for cat, color in style_options[color_choice]['stops']}
+    position, fontsize, bg_color = 'bottom-left', 15, 'white'
+    # shorten legend for ecoregions
+    if color_choice == "Ecoregion":
+        legend = {key.replace("Northern California", "NorCal"): value for key, value in legend.items()}
+        legend = {key.replace("Southern California", "SoCal"): value for key, value in legend.items()}
+        legend = {key.replace("Southeastern", "SE."): value for key, value in legend.items()}
+        legend = {key.replace("and", "&"): value for key, value in legend.items()}
+        legend = {key.replace("California", "CA"): value for key, value in legend.items()}
+        legend = {key.replace("Northwestern", "NW."): value for key, value in legend.items()}
+        bg_color = 'rgba(255, 255, 255, 0.6)'
+        fontsize = 12
+    return legend, position, bg_color, fontsize
+######################## CHART FUNCTIONS
+def area_chart(df, column):
+    """
+    Generates an Altair pie chart representing the percentage of protected areas.
+    """
+    base = alt.Chart(df).encode(alt.Theta("percent_CA:Q").stack(True))
+    pie = (
+        base.mark_arc(innerRadius=40, outerRadius=100, stroke="black", strokeWidth=0.1)
+        .encode(
+            alt.Color("color:N").scale(None).legend(None),
+            tooltip=[
+                alt.Tooltip(column, type="nominal"),
+                alt.Tooltip("percent_CA", type="quantitative", format=",.1%"),
+                alt.Tooltip("acres", type="quantitative", format=",.0f"),
+            ]
+        )
+    )
+    return pie.properties(width="container", height=290)
+def bar_chart(df, x, y, title):
+    """Creates a simple bar chart."""
+    return create_bar_chart(df, x, y, title)
+def stacked_bar(df, x, y, color, title, colors):
+    """Creates a stacked bar chart."""
+    return create_bar_chart(df, x, y, title, color=color, stacked=True, colors=colors)
+def get_chart_settings(x, stacked):
+    """
+    Returns sorting, axis settings, and y-axis title mappings.
+    """
+    sort_options = {
+        "established": "-x",
+        "access_type": ["Open", "Restricted", "No Public", "Unknown"],
+        "easement": ["True", "False"],
+        "manager_type": ["Federal", "Tribal", "State", "Special District", "County", "City",
+                         "HOA", "Joint", "Non Profit", "Private", "Unknown"],
+        "status": ["30x30-conserved", "other-conserved", "unknown", "non-conserved"],
+        "ecoregion": ['SE. Great Basin', 'Mojave Desert', 'Sonoran Desert', 'Sierra Nevada',
+                      'SoCal Mountains & Valleys', 'Mono', 'Central CA Coast', 'Klamath Mountains',
+                      'NorCal Coast', 'NorCal Coast Ranges', 'NW. Basin & Range', 'Colorado Desert',
+                      'Central Valley Coast Ranges', 'SoCal Coast', 'Sierra Nevada Foothills',
+                      'Southern Cascades', 'Modoc Plateau', 'Great Valley (North)',
+                      'NorCal Interior Coast Ranges', 'Great Valley (South)']
     }
+    y_titles = {
+        "ecoregion": "Ecoregion (%)", "established": "Year (%)",
+        "manager_type": "Manager Type (%)", "easement": "Easement (%)",
+        "access_type": "Access (%)", "mean_richness": "Richness (Mean)",
+        "mean_rsr": "Range-Size Rarity (Mean)", "mean_irrecoverable_carbon": "Irrecoverable Carbon (Mean)",
+        "mean_manageable_carbon": "Manageable Carbon (Mean)", "mean_disadvantaged": "Disadvantaged (Mean)",
+        "mean_svi": "SVI (Mean)", "mean_fire": "Fire (Mean)", "mean_rxburn": "Rx Fire (Mean)"
+    }
+    angle = 270 if x in ["manager_type", "ecoregion"] else 0
+    height = 250 if stacked else 400 if x == "ecoregion" else 350 if x == "manager_type" else 300
+    return sort_options.get(x, "x"), angle, height, y_titles.get(x, x)
+def get_label_transform(x, label=None):
+    """
+    Returns label transformation logic for Altair expressions and manual label conversion.
+    """
+    transformations = {
+        "access_type": ("replace(datum.access_type, ' Access', '')", lambda lbl: lbl.replace(" Access", "")),
+        "ecoregion": (
+            "replace(replace(replace(replace(replace("
+            "replace(datum.ecoregion, 'Northern California', 'NorCal'),"
+            "'Southern California', 'SoCal'),"
+            "'Southeastern', 'SE.'),"
+            "'Northwestern', 'NW.'),"
+            "'and', '&'),"
+            "'California', 'CA')",
+            lambda lbl: (lbl.replace("Northern California", "NorCal")
+                         .replace("Southern California", "SoCal")
+                         .replace("Southeastern", "SE.")
+                         .replace("Northwestern", "NW.")
+                         .replace("and", "&")
+                         .replace("California", "CA"))
+        )
+    }
+    if label is not None:
+        return transformations.get(x, (None, lambda lbl: lbl))[1](label)
+    return transformations.get(x, (f"datum.{x}", None))[0]
+def get_hex(df, color, sort_order):
+    """
+    Returns a list of hex color codes sorted based on `sort_order`.
+    """
+    return list(df.drop_duplicates(subset=color, keep="first")
+                .set_index(color)
+                .reindex(sort_order)
+                .dropna()["color"])
+def create_bar_chart(df, x, y, title, color=None, stacked=False, colors=None):
+    """
+    Generalized function to create a bar chart, supporting both standard and stacked bars.
+    """
+    # helper functions
+    sort, angle, height, y_title = get_chart_settings(x,stacked)
+    label_transform = get_label_transform(x)
+    # create base chart
+    chart = (
+        alt.Chart(df)
+        .mark_bar(stroke="black", strokeWidth=0.1)
+        .transform_calculate(xlabel=label_transform)
+        .encode(
+            x=alt.X("xlabel:N", sort=sort,
+                    axis=alt.Axis(labelAngle=angle, title=None, labelLimit=200)),
+            y=alt.Y(y, axis=alt.Axis(title=y_title, offset = -5)),
+            tooltip=[alt.Tooltip(x, type="nominal"), alt.Tooltip(y, type="quantitative")]
+        )
+        .properties(width="container", height=height)
+    )
+    if stacked:
+        # order stacks
+        sort_order = ["30x30-conserved", "other-conserved", "unknown", "non-conserved"]
+        color_hex = get_hex(df[[color, "color"]], color, sort_order)
+        sort_order = sort_order[:len(color_hex)]
+        df["stack_order"] = df[color].apply(lambda val: sort_order.index(val) if val in sort_order else len(sort_order))
+        # build chart
+        chart = chart.encode(
+            x=alt.X("xlabel:N", sort=sort, title=None, axis=alt.Axis(labels=False)),
+            y=alt.Y(y, axis=alt.Axis(title=y_title, offset = -5),scale = alt.Scale(domain = [0,1])),
+            color=alt.Color(color, sort=sort_order, scale=alt.Scale(domain=sort_order, range=color_hex)) ,
+            order=alt.Order("stack_order:Q", sort="ascending"),
+            tooltip=[
+                alt.Tooltip(x, type="nominal"),
+                alt.Tooltip(color, type="nominal"),
+                alt.Tooltip("percent_group", type="quantitative", format=",.1%"),
+                alt.Tooltip("acres", type="quantitative", format=",.0f"),
+            ],
+        )
+        # use shorter label names (to save space)
+        labels_df = colors.copy()
+        labels_df["xlabel"] = [get_label_transform(x, str(lab)) for lab in colors[x]]
+        # create symbols/label below chart; dots match map colors.
+        symbol_layer = (
+            alt.Chart(labels_df)
+            .mark_point(filled=True, shape="circle", size=100, tooltip=False, yOffset=5)
+            .encode(
+            x=alt.X("xlabel:N", sort=sort,
+                    axis=alt.Axis(labelAngle=angle, title=None, labelLimit=200)),
+                color=alt.Color("color:N", scale=None),
+            )
+            .properties(height=1, width="container")
+        )
+        # append symbols below base chart
+        final_chart = alt.vconcat(chart, symbol_layer, spacing=8).resolve_scale(x="shared")
+    else: #if not stacked, do single chart
+        final_chart = chart.encode(
+            color=alt.Color("color").scale(None)
+        )
+    # customize chart
+    final_chart = final_chart.properties(
+        title=title
+    ).configure_legend(
+        symbolStrokeWidth=0.1, direction="horizontal", orient="top",
+        columns=2, title=None, labelOffset=2, offset=5,
+        symbolType="square", labelFontSize=13,
+    ).configure_title(
+        fontSize=18, align="center", anchor="middle", offset = 10
+    )
+    return final_chart