library(shiny)
library(bslib)
library(htmltools)
library(markdown)
library(fontawesome)
library(bsicons)
library(gt)
library(glue)
library(ggplot2)
library(mapgl)
library(dplyr)
library(duckdbfs)
duckdbfs::load_spatial()
css <- HTML("")
# Define the UI
ui <- page_sidebar(
fillable = FALSE, # do not squeeze to vertical screen space
tags$head(css),
titlePanel("Demo App"),
"This is a proof-of-principle for a simple chat-driven interface to dynamically explore geospatial data.
",
card(
layout_columns(
textInput("chat",
label = NULL,
"Which counties in California have the highest average social vulnerability?",
width = "100%"),
div(
actionButton("user_msg", "", icon = icon("paper-plane"),
class = "btn-primary btn-sm align-bottom"),
class = "align-text-bottom"),
col_widths = c(11, 1)),
fill = FALSE
),
layout_columns(
card(maplibreOutput("map")),
card(includeMarkdown("## Plot"),
plotOutput("chart1"),
plotOutput("chart2"),
),
col_widths = c(8, 4),
row_heights = c("600px"),
max_height = "700px"
),
gt_output("table"),
card(fill = TRUE,
card_header(fa("robot")),
accordion(
open = FALSE,
accordion_panel(
title = "show sql",
icon = fa("terminal"),
verbatimTextOutput("sql_code"),
),
accordion_panel(
title = "explain",
icon = fa("user", prefer_type="solid"),
textOutput("explanation"),
)
),
card(
card_header("Errata"),
markdown(
"
#### Credits
Developed by Carl Boettiger, UC Berkeley, 2025. BSD License.
Data from the US Census and CDC's [Social Vulnerability Index](https://www.atsdr.cdc.gov/place-health/php/svi/index.html)
#### Technical details
The app is written entirely in R using shiny. The app will translate natural language queries in SQL code using
a small open-weights language model. The SQL code is executed using the duckdb backend against cloud-native
geoparquet snapshot of the Social Vulnerability Index hosted on Source Cooperative. Summary chart data are also
computed in duckdb by streaming, providing responsive updates while needing minimal RAM or disk storage despite
the large size of the data sources.
The map is rendered and updated using MapLibre with PMTiles, which provides responsive rendering for large feature sets.
The PMTiles layer is also hosted on Source cooperative where it can be streamed efficiently.
")
)
),
sidebar = sidebar(
input_switch("redlines", "Redlined Areas", value = FALSE),
input_switch("svi", "Social Vulnerability", value = TRUE),
input_switch("richness", "Biodiversity Richness", value = FALSE),
input_switch("rsr", "Biodiversity Range Size Rarity", value = FALSE),
card(
card_header(bs_icon("github"), "Source code:"),
a(href = "https://github.com/boettiger-lab/geo-llm-r",
"https://github.com/boettiger-lab/geo-llm-r"))
),
theme = bs_theme(version = "5")
)
repo <- "https://data.source.coop/cboettig/social-vulnerability"
pmtiles <- glue("{repo}/svi2020_us_tract.pmtiles")
parquet <- glue("{repo}/svi2020_us_tract.parquet")
svi <- open_dataset(parquet, tblname = "svi") |>
filter(RPL_THEMES > 0)
con <- duckdbfs::cached_connection()
schema <- DBI::dbGetQuery(con, "PRAGMA table_info(svi)")
system_prompt = glue::glue('
You are a helpful agent who always replies strictly in JSON-formatted text.
Your task is to translate the users question into a SQL query that will be run
against the "svi" table in a duckdb database. The duckdb database has a
spatial extension which understands PostGIS operations as well.
Include semantically meaningful columns like COUNTY and STATE name.
In the data, each row represents an individual census tract. If asked for
county or state level statistics, be sure to aggregate across all the tracts
in that county or state.
The table schema is
The column called "RPL_THEMES" corresponds to the overall "Social vulnerability index" number.
Format your answer as follows:
{
"query": "your raw SQL response goes here",
"explanation": "your explanation of the query"
}
', .open = "<", .close = ">")
chat <- ellmer::chat_vllm(
base_url = "https://llm.nrp-nautilus.io/",
model = "llama3",
api_key = Sys.getenv("NRP_API_KEY"),
system_prompt = system_prompt,
api_args = list(temperature = 0)
)
# helper utilities
# faster/more scalable to pass maplibre the ids to refilter pmtiles,
# than to pass it the full geospatial/sf object
filter_column <- function(full_data, filtered_data, id_col = "FIPS") {
if (nrow(filtered_data) < 1) return(NULL)
values <- full_data |>
inner_join(filtered_data, copy = TRUE) |>
pull(id_col)
# maplibre syntax for the filter of PMTiles
list("in", list("get", id_col), list("literal", values))
}
chart1_data <- svi |>
group_by(COUNTY) |>
summarise(mean_svi = mean(RPL_THEMES)) |>
collect()
chart1 <- chart1_data |>
ggplot(aes(mean_svi)) + geom_density(fill="darkred") +
ggtitle("County-level vulnerability nation-wide")
# Define the server
server <- function(input, output, session) {
data <- reactiveValues(df = tibble())
output$chart1 <- renderPlot(chart1)
observeEvent(input$user_msg, {
stream <- chat$chat(input$chat)
# optional, remember previous discussion
#chat_append("chat", stream)
# Parse response
response <- jsonlite::fromJSON(stream)
output$sql_code <- renderText(stringr::str_wrap(response$query, width = 60))
output$explanation <- renderText(response$explanation)
# Actually execute the SQL query generated:
df <- DBI::dbGetQuery(con, response$query)
# don't display shape column in render
df <- df |> select(-any_of("Shape"))
output$table <- render_gt(df, height = 300)
y_axis <- colnames(df)[!colnames(df) %in% colnames(svi)]
chart2 <- df |>
rename(social_vulnerability = y_axis) |>
ggplot(aes(social_vulnerability)) +
geom_density(fill = "darkred") +
xlim(c(0, 1)) +
ggtitle("Vulnerability of selected areas")
output$chart2 <- renderPlot(chart2)
# We need to somehow trigger this df to update the map.
data$df <- df
})
output$map <- renderMaplibre({
m <- maplibre(center = c(-92.9, 41.3), zoom = 3, height = "400")
if (input$redlines) {
m <- m |>
add_fill_layer(
id = "redlines",
source = list(type = "vector",
url = paste0("pmtiles://", "https://data.source.coop/cboettig/us-boundaries/mappinginequality.pmtiles")),
source_layer = "mappinginequality",
fill_color = list("get", "fill")
)
}
if (input$richness) {
m <- m |>
add_raster_source(id = "richness",
tiles = "https://data.source.coop/cboettig/mobi/tiles/red/species-richness-all/{z}/{x}/{y}.png",
maxzoom = 11
) |>
add_raster_layer(id = "richness-layer",
source = "richness")
}
if (input$rsr) {
m <- m |>
add_raster_source(id = "rsr",
tiles = "https://data.source.coop/cboettig/mobi/tiles/green/range-size-rarity-all/{z}/{x}/{y}.png",
maxzoom = 11
) |>
add_raster_layer(id = "richness-layer",
source = "rsr")
}
if (input$svi) {
m <- m |>
add_fill_layer(
id = "svi_layer",
source = list(type = "vector",
url = paste0("pmtiles://", pmtiles)),
source_layer = "SVI2000_US_tract",
filter = filter_column(svi, data$df, "FIPS"),
fill_opacity = 0.5,
fill_color = interpolate(column = "RPL_THEMES",
values = c(0, 1),
stops = c("lightpink", "darkred"),
na_color = "lightgrey")
)
}
m})
}
# Run the app
shinyApp(ui = ui, server = server)