cboettig commited on
Commit
16c3919
·
1 Parent(s): 60f152e

choose :robot:

Browse files
Files changed (3) hide show
  1. app.R +45 -31
  2. schema.yml +4 -4
  3. system-prompt.md +3 -1
app.R CHANGED
@@ -35,7 +35,7 @@ ui <- page_sidebar(
35
  layout_columns(
36
  textInput("chat",
37
  label = NULL,
38
- "Which counties in California have the highest average social vulnerability?",
39
  width = "100%"),
40
  div(
41
  actionButton("user_msg", "", icon = icon("paper-plane"),
@@ -44,7 +44,7 @@ ui <- page_sidebar(
44
  col_widths = c(11, 1)),
45
  fill = FALSE
46
  ),
47
-
48
  textOutput("agent"),
49
 
50
 
@@ -55,14 +55,14 @@ ui <- page_sidebar(
55
  plotOutput("chart2"),
56
  ),
57
  col_widths = c(8, 4),
58
- row_heights = c("600px"),
59
- max_height = "700px"
60
  ),
61
 
62
  gt_output("table"),
63
 
64
  card(fill = TRUE,
65
- card_header(fa("robot")),
66
  accordion(
67
  open = FALSE,
68
  accordion_panel(
@@ -76,13 +76,21 @@ ui <- page_sidebar(
76
  textOutput("explanation"),
77
  )
78
  ),
79
- card(
 
80
  card_header("Errata"),
81
  shiny::markdown(readr::read_file("footer.md")),
82
- )
83
  ),
84
-
85
  sidebar = sidebar(
 
 
 
 
 
 
 
 
 
86
  input_switch("redlines", "Redlined Areas", value = FALSE),
87
  input_switch("svi", "Social Vulnerability", value = TRUE),
88
  input_switch("richness", "Biodiversity Richness", value = FALSE),
@@ -99,21 +107,15 @@ ui <- page_sidebar(
99
 
100
 
101
  repo <- "https://data.source.coop/cboettig/social-vulnerability"
102
- pmtiles <- glue("{repo}/svi2020_us_tract.pmtiles")
103
- parquet <- glue("{repo}/svi2020_us_tract.parquet")
104
  con <- duckdbfs::cached_connection()
105
  svi <- open_dataset(parquet, tblname = "svi") |> filter(RPL_THEMES > 0)
106
- schema <- read_file("schema.yml")
107
- system_prompt <- glue::glue(readr::read_file("system-prompt.md"),
108
- .open = "<", .close = ">")
109
-
110
- chat <- ellmer::chat_vllm(
111
- base_url = "https://llm.nrp-nautilus.io/",
112
- model = "llama3",
113
- api_key = Sys.getenv("NRP_API_KEY"),
114
- system_prompt = system_prompt,
115
- api_args = list(temperature = 0)
116
- )
117
 
118
  # helper utilities
119
  # faster/more scalable to pass maplibre the ids to refilter pmtiles,
@@ -140,17 +142,30 @@ server <- function(input, output, session) {
140
  chart1 <- chart1_data |>
141
  ggplot(aes(mean_svi)) + geom_density(fill="darkred") +
142
  ggtitle("County-level vulnerability nation-wide")
143
-
144
  data <- reactiveValues(df = tibble())
145
  output$chart1 <- renderPlot(chart1)
146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  observeEvent(input$user_msg, {
148
  stream <- chat$chat(input$chat)
149
 
150
-
151
-
152
  # Parse response
153
- response <- jsonlite::fromJSON(stream)
 
154
 
155
  if ("query" %in% names(response)) {
156
  output$sql_code <- renderText(stringr::str_wrap(response$query, width = 60))
@@ -187,12 +202,12 @@ server <- function(input, output, session) {
187
  }
188
 
189
  })
190
-
191
 
192
 
193
  output$map <- renderMaplibre({
194
 
195
- m <- maplibre(center = c(-92.9, 41.3), zoom = 3, height = "400")
196
  if (input$redlines) {
197
  m <- m |>
198
  add_fill_layer(
@@ -230,7 +245,7 @@ server <- function(input, output, session) {
230
  id = "svi_layer",
231
  source = list(type = "vector",
232
  url = paste0("pmtiles://", pmtiles)),
233
- source_layer = "SVI2000_US_tract",
234
  filter = filter_column(svi, data$df, "FIPS"),
235
  fill_opacity = 0.5,
236
  fill_color = interpolate(column = "RPL_THEMES",
@@ -239,9 +254,8 @@ server <- function(input, output, session) {
239
  na_color = "lightgrey")
240
  )
241
  }
242
- m})
243
-
244
-
245
 
246
  }
247
 
 
35
  layout_columns(
36
  textInput("chat",
37
  label = NULL,
38
+ "Which four counties in California have the highest average social vulnerability?",
39
  width = "100%"),
40
  div(
41
  actionButton("user_msg", "", icon = icon("paper-plane"),
 
44
  col_widths = c(11, 1)),
45
  fill = FALSE
46
  ),
47
+
48
  textOutput("agent"),
49
 
50
 
 
55
  plotOutput("chart2"),
56
  ),
57
  col_widths = c(8, 4),
58
+ row_heights = c("500px"),
59
+ max_height = "600px"
60
  ),
61
 
62
  gt_output("table"),
63
 
64
  card(fill = TRUE,
65
+ card_header(fa("robot"), textOutput("model", inline = TRUE)),
66
  accordion(
67
  open = FALSE,
68
  accordion_panel(
 
76
  textOutput("explanation"),
77
  )
78
  ),
79
+ ),
80
+ card(
81
  card_header("Errata"),
82
  shiny::markdown(readr::read_file("footer.md")),
 
83
  ),
 
84
  sidebar = sidebar(
85
+ selectInput(
86
+ "select",
87
+ "Select an LLM:",
88
+ list("LLama3" = "llama3",
89
+ #"OLMO2 (AllenAI)" = "olmo",
90
+ "Gorilla (UC Berkeley)" = "gorilla"
91
+ )
92
+ ),
93
+
94
  input_switch("redlines", "Redlined Areas", value = FALSE),
95
  input_switch("svi", "Social Vulnerability", value = TRUE),
96
  input_switch("richness", "Biodiversity Richness", value = FALSE),
 
107
 
108
 
109
  repo <- "https://data.source.coop/cboettig/social-vulnerability"
110
+ pmtiles <- glue("{repo}/2022/SVI2022_US_tract.pmtiles")
111
+ parquet <- glue("{repo}/2022/SVI2022_US_tract.parquet")
112
  con <- duckdbfs::cached_connection()
113
  svi <- open_dataset(parquet, tblname = "svi") |> filter(RPL_THEMES > 0)
114
+
115
+ safe_parse <- function(txt) {
116
+ gsub("[\r\n]", " ", txt) |> gsub("\\s+", " ", x = _)
117
+ }
118
+
 
 
 
 
 
 
119
 
120
  # helper utilities
121
  # faster/more scalable to pass maplibre the ids to refilter pmtiles,
 
142
  chart1 <- chart1_data |>
143
  ggplot(aes(mean_svi)) + geom_density(fill="darkred") +
144
  ggtitle("County-level vulnerability nation-wide")
145
+
146
  data <- reactiveValues(df = tibble())
147
  output$chart1 <- renderPlot(chart1)
148
 
149
+ model <- reactive(input$select)
150
+ output$model <- renderText(input$select)
151
+ observe({
152
+ schema <- read_file("schema.yml")
153
+ system_prompt <- glue::glue(readr::read_file("system-prompt.md"),
154
+ .open = "<", .close = ">")
155
+ chat <- ellmer::chat_vllm(
156
+ base_url = "https://llm.nrp-nautilus.io/",
157
+ model = model(),
158
+ api_key = Sys.getenv("NRP_API_KEY"),
159
+ system_prompt = system_prompt,
160
+ api_args = list(temperature = 0)
161
+ )
162
+
163
  observeEvent(input$user_msg, {
164
  stream <- chat$chat(input$chat)
165
 
 
 
166
  # Parse response
167
+ response <- jsonlite::fromJSON(safe_parse(stream))
168
+ #response <- jsonlite::fromJSON(stream)
169
 
170
  if ("query" %in% names(response)) {
171
  output$sql_code <- renderText(stringr::str_wrap(response$query, width = 60))
 
202
  }
203
 
204
  })
205
+ })
206
 
207
 
208
  output$map <- renderMaplibre({
209
 
210
+ m <- maplibre(center = c(-104.9, 40.3), zoom = 3, height = "400")
211
  if (input$redlines) {
212
  m <- m |>
213
  add_fill_layer(
 
245
  id = "svi_layer",
246
  source = list(type = "vector",
247
  url = paste0("pmtiles://", pmtiles)),
248
+ source_layer = "svi",
249
  filter = filter_column(svi, data$df, "FIPS"),
250
  fill_opacity = 0.5,
251
  fill_color = interpolate(column = "RPL_THEMES",
 
254
  na_color = "lightgrey")
255
  )
256
  }
257
+ m
258
+ })
 
259
 
260
  }
261
 
schema.yml CHANGED
@@ -1,15 +1,15 @@
1
  - VARIABLE_NAME: ST
2
- DESCRIPTION: State-level FIPS code (two-digit integer)
3
  - VARIABLE_NAME: STATE
4
  DESCRIPTION: State name
5
  - VARIABLE_NAME: ST_ABBR
6
- DESCRIPTION: State abbreviation
7
  - VARIABLE_NAME: STCNTY
8
- DESCRIPTION: County-level FIPS code (5 digit integer)
9
  - VARIABLE_NAME: COUNTY
10
  DESCRIPTION: County name
11
  - VARIABLE_NAME: FIPS
12
- DESCRIPTION: Tract-level geographic identification (full Census Bureau FIPS code)
13
  - VARIABLE_NAME: LOCATION
14
  DESCRIPTION: Text description of tract county state
15
  - VARIABLE_NAME: AREA_SQMI
 
1
  - VARIABLE_NAME: ST
2
+ DESCRIPTION: INTEGER State-level FIPS code (two-digit integer)
3
  - VARIABLE_NAME: STATE
4
  DESCRIPTION: State name
5
  - VARIABLE_NAME: ST_ABBR
6
+ DESCRIPTION: State abbreviation, two-letter string
7
  - VARIABLE_NAME: STCNTY
8
+ DESCRIPTION: INTEGER County-level FIPS code (5 digit integer)
9
  - VARIABLE_NAME: COUNTY
10
  DESCRIPTION: County name
11
  - VARIABLE_NAME: FIPS
12
+ DESCRIPTION: INTEGER, Tract-level geographic identification (full Census Bureau FIPS code)
13
  - VARIABLE_NAME: LOCATION
14
  DESCRIPTION: Text description of tract county state
15
  - VARIABLE_NAME: AREA_SQMI
system-prompt.md CHANGED
@@ -8,10 +8,12 @@ Include semantically meaningful columns like COUNTY and STATE name.
8
  If your answer involves the construction of a SQL query, you must format your answer as follows:
9
 
10
  {
11
- "query": "your raw SQL response goes here",
12
  "explanation": "your explanation of the query"
13
  }
14
 
 
 
15
  If your answer does not involve a SQL query, please reply with the following format instead:
16
 
17
  {
 
8
  If your answer involves the construction of a SQL query, you must format your answer as follows:
9
 
10
  {
11
+ "query": "your raw SQL response goes here.",
12
  "explanation": "your explanation of the query"
13
  }
14
 
15
+ Think carefully about your SQL query, keep it concise and ensure it is entirely valid SQL syntax.
16
+
17
  If your answer does not involve a SQL query, please reply with the following format instead:
18
 
19
  {