Spaces:
Sleeping
Sleeping
# This example does not use a langchain agent, | |
# The langchain sql chain has knowledge of the database, but doesn't interact with it becond intialization. | |
# The output of the sql chain is parsed seperately and passed to `duckdb.sql()` by streamlit | |
import os | |
os.environ["WEBSOCKET_TIMEOUT_MS"] = "300000" # no effect | |
import streamlit as st | |
import geopandas as gpd | |
import pandas as pd | |
from shapely import wkb | |
st.set_page_config(page_title="Explore US Protected Areas", page_icon="π¦", layout="wide") | |
st.title("Explore US Protected Areas") | |
## Database connection, reading directly from remote parquet file | |
from sqlalchemy import create_engine | |
from langchain.sql_database import SQLDatabase | |
db_uri = "duckdb:///my.duckdb" | |
stats = "https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/pad-stats.parquet" | |
groups = "https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/pad-groupings.parquet" | |
engine = create_engine(db_uri) #connect_args={'read_only': True}) | |
con = engine.connect() | |
con.execute("install spatial; load spatial;") | |
# con.execute(f"create or replace view stats as select * from read_parquet('{stats}');").fetchall() | |
con.execute(f"create or replace table groups as select * from read_parquet('{groups}');").fetchall() | |
db = SQLDatabase(engine, view_support=True) | |
def query_database(response): | |
z = con.execute(response).fetchall() | |
return pd.DataFrame(z).head(25) | |
import ibis | |
from ibis import _ | |
import ibis.selectors as s | |
import altair as alt | |
ibis_con = ibis.duckdb.connect("my.duckdb") | |
stats = ibis_con.read_parquet(stats) | |
us_lower_48_area_m2 = 7.8e+12 | |
def summary_table(stats, query, column): | |
#z = con.execute(query).fetchall() | |
groups = ibis_con.table("groups").sql(query.replace(";", "")) | |
df = (stats | |
.inner_join(groups, "row_n") | |
.select(~s.contains("_right")) | |
.rename(area = "area_square_meters") | |
.group_by(_[column]) | |
.aggregate(percent_protected = 100 * _.area.sum() / us_lower_48_area_m2, | |
hectares = _.area.sum() / 10000, | |
n = _.area.count(), | |
richness = (_.richness * _.area).sum() / _.area.sum(), | |
rsr = (_.rsr * _.area).sum() / _.area.sum(), | |
carbon_lost = (_.deforest_carbon * _.area).sum() / _.area.sum(), | |
crop_expansion = (_.crop_expansion * _.area).sum() / _.area.sum(), | |
human_impact = (_.human_impact * _.area).sum() / _.area.sum(), | |
) | |
.mutate(percent_protected = _.percent_protected.round(1)) | |
) | |
return df.to_pandas() | |
def area_plot(df, column): | |
base = alt.Chart(df).encode( | |
alt.Theta("percent_protected:Q").stack(True), | |
alt.Color(column+":N").legend(None) | |
) | |
pie = base.mark_arc(innerRadius= 40, outerRadius=80) | |
text = base.mark_text(radius=120, size=20).encode( | |
text="percent_protected:Q" | |
) | |
plot = pie + text | |
return st.altair_chart(plot) | |
def bar_chart(df, x, y): | |
chart = alt.Chart(df).mark_bar().encode( | |
x=x, | |
y=y, | |
color=alt.Color(x).legend(None) | |
).properties(width="container", height=300) | |
return chart | |
## ChatGPT Connection | |
from langchain_openai import ChatOpenAI | |
from langchain_community.llms import Ollama | |
# from langchain_community.llms import ChatOllama | |
models = {"chatgpt3.5": ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"])} | |
other_models = { | |
"chatgpt4": ChatOpenAI(model="gpt-4", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]), | |
"duckdb-nsql": Ollama(model="duckdb-nsql", temperature=0), | |
"command-r-plus": Ollama(model="command-r-plus", temperature=0), | |
"mixtral:8x22b": Ollama(model="mixtral:8x22b", temperature=0), | |
"wizardlm2:8x22b": Ollama(model="wizardlm2:8x22b", temperature=0), | |
"sqlcoder": Ollama(model="sqlcoder", temperature=0), | |
"zephyr": Ollama(model="zephyr", temperature=0), | |
"gemma:7b": Ollama(model="gemma:7b", temperature=0), | |
"codegemma": Ollama(model="codegemma", temperature=0), | |
"llama2": Ollama(model="llama2", temperature=0), | |
} | |
with st.sidebar: | |
choice = st.radio("Select an LLM:", models) | |
llm = models[choice] | |
column = st.text_input("grouping column", "labels") | |
## A SQL Chain | |
from langchain.chains import create_sql_query_chain | |
chain = create_sql_query_chain(llm, db) | |
main = st.container() | |
## Does not preserve history | |
with main: | |
''' | |
The US [recently announced](https://www.conservation.gov/pages/america-the-beautiful-initiative) the first-ever national goal to conserve at least 30 percent of our lands and waters by the year 2030. | |
But which 30%? | |
Protected areas span a range of "GAP" areas [indicating the degree of protection](https://www.protectedlands.net/uses-of-pad-us/#conservation-of-biodiversity-2). Protected areas include not only owned or "fee"-based parcels such as National Parks and Monuments, | |
but also "easements" (see [feature classes](https://www.protectedlands.net/pad-us-technical-how-tos/#feature-classes-in-pad-us-2)) | |
- GAP 1: Managed for biodiversity with natural disturbance events allowed (for example, Wilderness, Research Natural Areas, some National Parks, some State or NGO Nature Preserves) | |
- GAP 2: Managed for biodiversity with management that may interfere with natural processes (for example, suppress wildfire or flood) | |
- GAP 3: Permanent protection, but the land is subject to multiple uses (forestry, farming, intensive recreation, etc. | |
- GAP 4: No known institutional mandates to prevent conversion of natural habitat types | |
Use the chat tool below to specify your own groupings of the data and see how they compare. | |
##### Try these example queries: | |
- gap 1, 2, 3 are labelled 'conserved lands' and gap 4 is labeled 'other' | |
- exclude gap 4, include only Federal manager types, labelled by manager_name | |
- label gap 1, 2 as "permanently protected", label gap 3 as "additional conserved area", and gap 4 as other | |
- label gap 1, 2 areas in category ="Easements" as "protected easements", gap 1,2 category="Fee" as "protected areas", gap 3 easements as "mixed use easements", gap 3 Fee as "mixed use lands". exclude gap 4. | |
''' | |
prefix = "construct a select query that creates a column called 'labels' that only contains rows that meet the following criteria:" | |
suffix = ". Do not use LIMIT. Always return all columns. Do not try to select specific columns." | |
st.markdown("Specify how data should be labelled, as in the examples above:") | |
chatbox = st.container() | |
with chatbox: | |
if prompt := st.chat_input(key="chain"): | |
st.chat_message("user").write(prompt) | |
with st.chat_message("assistant"): | |
response = chain.invoke({"question": prefix + prompt + suffix}) | |
st.write(response) | |
df = summary_table(stats, response, column) | |
with st.container(): | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
total_percent = df.percent_protected.sum().round(1) | |
f"{total_percent}% Continental US Covered" | |
area_plot(df, column) | |
with col2: | |
"Species Richness" | |
st.altair_chart(bar_chart(df, column, "richness"), use_container_width=True) | |
with col3: | |
"Range-Size Rarity" | |
st.altair_chart(bar_chart(df, column, "rsr"), use_container_width=True) | |
with st.container(): | |
col1b, col2b, col3b = st.columns(3) | |
with col1b: | |
"Carbon Lost ('02-'22)" | |
st.altair_chart(bar_chart(df, column, "carbon_lost"), use_container_width=True) | |
with col2b: | |
"Crop expansion" | |
st.altair_chart(bar_chart(df, column, "crop_expansion"), use_container_width=True) | |
with col3b: | |
"Human Impact" | |
st.altair_chart(bar_chart(df, column, "human_impact"), use_container_width=True) | |
st.divider() | |
st.dataframe(df) | |
#st.divider() | |
#with st.container(): | |
# st.text("Database schema (top 3 rows)") | |
# tbl = tbl = query_database("select * from groups limit 3") | |
# st.dataframe(tbl) | |
st.divider() | |
''' | |
Experimental prototype. | |
- Author: [Carl Boettiger](https://carlboettiger.info) | |
- For data sources and processing, see: https://beta.source.coop/repositories/cboettig/pad-us-3/description/ | |
''' | |