3v324v23's picture
Adding sys sleep 1.5
18205f8
##################################################################
# Single R Script: Mortality Analysis + Shiny - Two-Click BBox
# Using Weekly Queries (No Hard-Coded Years)
##################################################################
# See https://www.inaturalist.org/pages/api+recommended+practices
# Query Rate
# Please keep requests to about 1 per second, and around 10k API requests a day
# The API is meant to be used for building applications and for fetching small to medium batches of data. It is not meant to be a way to download data in bulk
# Requests exceeding this limit might be throttled, and will return an HTTP 429 exception “Too Many Requests”
# Please add delays into your code to keep under these limits, and especially if you’re getting 429 errors
# We may block IPs that consistently exceed these limits
# Please use a single IP address for fetching data. If we think multiple IPs are being used in coordination to bypass rate limits, we may block those IPs regardless of query rate
# Downloading over 5 GB of media per hour or 24 GB of media per day may result in a permanent block
# If writing software to interact with the API, please consider using a custom User Agent to identify your application, or iNaturalist username, or something we might use to differentiate your requests
# The User Agent can be set with an HTTP header, e.g. User-Agent: [[application or user name]
#
### 1) Install/Load Required Packages ####
required_packages <- c(
"httr", "jsonlite", "tidyverse", "glue", "lubridate",
"wesanderson", "viridis", "shinycssloaders",
"DT", "maps", "mapdata", "leaflet", "leaflet.extras",
"shinythemes", "shiny"
)
installed_packages <- rownames(installed.packages())
for (pkg in required_packages) {
if (!pkg %in% installed_packages) {
install.packages(pkg, dependencies = TRUE)
}
}
library(httr)
library(jsonlite)
library(tidyverse)
library(glue)
library(lubridate)
library(wesanderson)
library(viridis)
library(shinycssloaders)
library(DT)
library(maps)
library(mapdata)
library(leaflet)
library(leaflet.extras)
library(shinythemes)
library(shiny)
##################################################################
# 2) Mortality-Analysis Functions
##################################################################
# -- Base function to fetch dead observations over a specified date range --
fetch_dead_data_once <- function(
place_id = NULL,
swlat = NULL,
swlng = NULL,
nelat = NULL,
nelng = NULL,
start_date,
end_date,
iconic_taxa = NULL,
taxon_name = NULL,
conservation_status = NULL,
per_page = 200,
max_pages = 200
) {
base_url <- "https://api.inaturalist.org/v1/observations"
q_parts <- list(
"term_id=17", # 'Dead' annotation
"term_value_id=19", # 'Dead' annotation
"verifiable=true", # only verifiable
glue("d1={start_date}"), # start date
glue("d2={end_date}"), # end date
"order=desc",
"order_by=created_at",
glue("per_page={per_page}")
)
if (!is.null(iconic_taxa) && iconic_taxa != "") {
q_parts <- c(q_parts, glue("iconic_taxa={iconic_taxa}"))
}
if (!is.null(taxon_name) && taxon_name != "") {
q_parts <- c(q_parts, glue("taxon_name={URLencode(taxon_name)}"))
}
if (!is.null(conservation_status) && conservation_status != "") {
# If you'd like to filter by iNat conservation status,
# or e.g. pass additional parameters to the API
if (!grepl("=", conservation_status, fixed = TRUE)) {
q_parts <- c(q_parts, glue("cs={URLencode(conservation_status)}"))
} else {
q_parts <- c(q_parts, conservation_status)
}
}
query_params <- paste(q_parts, collapse = "&")
# Build location portion of query
loc_part <- ""
if (!is.null(place_id)) {
loc_part <- glue("&place_id={place_id}")
} else if (!is.null(swlat) && !is.null(swlng) &&
!is.null(nelat) && !is.null(nelng)) {
loc_part <- glue("&nelat={nelat}&nelng={nelng}&swlat={swlat}&swlng={swlng}")
} else {
stop("Must provide either 'place_id' OR bounding box (swlat, swlng, nelat, nelng).")
}
observations_list <- list()
current_page <- 1
while (current_page <= max_pages) {
query_url <- paste0(
base_url, "?", query_params, "&page=", current_page, loc_part
)
message("Fetching page ", current_page,
" [", start_date, " to ", end_date, "]:\n", query_url)
resp <- GET(query_url)
if (http_error(resp)) {
warning("HTTP error on page ", current_page, ": ", status_code(resp))
break
}
parsed <- content(resp, as = "text", encoding = "UTF-8") %>%
fromJSON(flatten = TRUE)
if (length(parsed$results) == 0) {
message("No more results at page ", current_page)
break
}
obs_page_df <- as_tibble(parsed$results)
observations_list[[current_page]] <- obs_page_df
# If the returned page is smaller than per_page, we've reached the last page
if (nrow(obs_page_df) < per_page) {
message("Reached last page of results at page ", current_page)
break
}
current_page <- current_page + 1
Sys.sleep(1.5) # Polite pause
}
observations_all <- bind_rows(observations_list)
return(observations_all)
}
# -- Function to fetch data by iterating through each WEEK of a given year --
fetch_dead_data_weekly <- function(
year,
place_id = NULL,
swlat = NULL,
swlng = NULL,
nelat = NULL,
nelng = NULL,
iconic_taxa = NULL,
taxon_name = NULL,
conservation_status = NULL,
per_page = 200,
max_pages = 200
) {
start_of_year <- as.Date(glue("{year}-01-01"))
end_of_year <- as.Date(glue("{year}-12-31"))
# Create a sequence of "week starts" from Jan 1 to Dec 31
week_starts <- seq.Date(start_of_year, end_of_year, by = "1 week")
weekly_list <- list()
for (i in seq_along(week_starts)) {
start_date <- week_starts[i]
# If not the last index, end_date = next start - 1 day, else clamp to year-end
if (i < length(week_starts)) {
end_date <- week_starts[i + 1] - 1
} else {
end_date <- end_of_year
}
message("\n--- Querying ", year, ", Week #", i,
" [", start_date, " to ", end_date, "] ---")
df_week <- fetch_dead_data_once(
place_id = place_id,
swlat = swlat,
swlng = swlng,
nelat = nelat,
nelng = nelng,
start_date = start_date,
end_date = end_date,
iconic_taxa = iconic_taxa,
taxon_name = taxon_name,
conservation_status = conservation_status,
per_page = per_page,
max_pages = max_pages
)
weekly_list[[i]] <- df_week
Sys.sleep(1.5)
}
year_df <- bind_rows(weekly_list)
return(year_df)
}
# -- Wrapper that iterates over multiple years, pulling data weekly for each year --
getDeadVertebrates_weeklyLoop <- function(
years, # <--- No default: pass your own vector of years
place_id = NULL,
swlat = NULL,
swlng = NULL,
nelat = NULL,
nelng = NULL,
iconic_taxa = NULL,
taxon_name = NULL,
conservation_status = NULL,
per_page = 500,
max_pages = 500,
outdir = NULL
) {
all_years_list <- list()
# For each year, run weekly fetch
for (yr in years) {
message("\n========= YEAR: ", yr, " ==========\n")
yr_df <- fetch_dead_data_weekly(
year = yr,
place_id = place_id,
swlat = swlat,
swlng = swlng,
nelat = nelat,
nelng = nelng,
iconic_taxa= iconic_taxa,
taxon_name = taxon_name,
conservation_status = conservation_status,
per_page = per_page,
max_pages = max_pages
) %>%
mutate(Window = as.character(yr))
all_years_list[[as.character(yr)]] <- yr_df
}
merged_df_all <- bind_rows(all_years_list)
# If no data found or missing crucial columns, create empty placeholders
if (!"created_at_details.date" %in% names(merged_df_all) ||
nrow(merged_df_all) == 0) {
daily_plot <- ggplot() +
labs(title = "No 'Dead' Observations Found", x = NULL, y = NULL) +
theme_void()
top_species_plot <- ggplot() +
labs(title = "No species data", x = NULL, y = NULL) +
theme_void()
map_hotspots_gg <- ggplot() +
labs(title = "No data for hotspots map") +
theme_void()
return(list(
merged_df_all = merged_df_all,
merged_df = merged_df_all,
daily_plot = daily_plot,
top_species_plot = top_species_plot,
map_hotspots_gg = map_hotspots_gg,
daily_90th_quant = NA
))
}
# Optionally write out to disk
if (!is.null(outdir)) {
if (!dir.exists(outdir)) {
dir.create(outdir, recursive = TRUE)
}
readr::write_csv(merged_df_all, file.path(outdir, "merged_df_ALL_data.csv"))
}
# Aggregate counts by day
counts_by_day <- merged_df_all %>%
mutate(obs_date = as.Date(`observed_on`)) %>%
group_by(Window, obs_date) %>%
summarise(n = n_distinct(id), .groups = "drop")
y_max_value <- max(counts_by_day$n, na.rm = TRUE)
n_windows <- length(unique(counts_by_day$Window))
wes_colors <- wes_palette("Zissou1", n_windows, type = "discrete")
# Daily line plot
daily_plot <- ggplot(counts_by_day, aes(x = obs_date, y = n, color = Window)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
# scale_color_viridis_d() +
scale_x_date(date_labels = "%b", date_breaks = "1 month") +
scale_y_continuous(limits = c(0, y_max_value)) +
labs(
title = glue("Daily 'Dead' Observations (Years {paste(years, collapse=', ')})"),
x = "Month",
y = "Number of Observations",
color = "Year"
) +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Top species bar plot
if ("taxon.name" %in% names(merged_df_all)) {
species_counts <- merged_df_all %>%
filter(!is.na(taxon.name)) %>%
group_by(Window, taxon.name) %>%
summarise(dead_count = n(), .groups = "drop")
top_species_overall <- species_counts %>%
group_by(taxon.name) %>%
summarise(total_dead = sum(dead_count)) %>%
arrange(desc(total_dead)) %>%
slice_head(n = 20)
species_top20 <- species_counts %>%
filter(taxon.name %in% top_species_overall$taxon.name)
top_species_plot <- ggplot(species_top20, aes(
x = reorder(taxon.name, -dead_count),
y = dead_count,
fill= Window
)) +
geom_col(position = position_dodge(width = 0.7)) +
coord_flip() +
# scale_fill_manual(values = wes_colors) +
# scale_color_viridis_d() +
labs(
title = "Top 20 Species with 'Dead' Observations",
x = "Species",
y = "Number of Dead Observations",
fill = "Year"
) +
theme_minimal(base_size = 14)
} else {
top_species_plot <- ggplot() +
labs(title = "No 'taxon.name' column found", x = NULL, y = NULL) +
theme_void()
}
# Identify "high mortality" days (>= 90th percentile)
daily_quantile <- quantile(counts_by_day$n, probs = 0.90, na.rm = TRUE)
high_mortality_days <- counts_by_day %>%
filter(n >= daily_quantile) %>%
pull(obs_date)
merged_high <- merged_df_all %>%
mutate(obs_date = as.Date(`observed_on`)) %>%
filter(obs_date %in% high_mortality_days)
# Map of top-90% mortality days
if ("location" %in% names(merged_high)) {
location_df <- merged_high %>%
filter(!is.na(location) & location != "") %>%
separate(location, into = c("lat_str", "lon_str"), sep = ",", remove = FALSE) %>%
mutate(
latitude = as.numeric(lat_str),
longitude = as.numeric(lon_str)
)
if (nrow(location_df) == 0) {
map_hotspots_gg <- ggplot() +
labs(title = "No data in top 90th percentile days with valid location") +
theme_void()
} else {
min_lon <- min(location_df$longitude, na.rm = TRUE)
max_lon <- max(location_df$longitude, na.rm = TRUE)
min_lat <- min(location_df$latitude, na.rm = TRUE)
max_lat <- max(location_df$latitude, na.rm = TRUE)
map_hotspots_gg <- ggplot(location_df, aes(x = longitude, y = latitude, color = Window)) +
borders("world", fill = "gray80", colour = "white") +
geom_point(alpha = 0.6, size = 2) +
# scale_color_viridis_d() +
coord_quickmap(
xlim = c(min_lon, max_lon),
ylim = c(min_lat, max_lat),
expand = TRUE
) +
labs(
title = glue("Top 90th percentile mortality days ({paste(years, collapse=', ')})"),
x = "Longitude",
y = "Latitude",
color = "Year"
) +
theme_minimal(base_size = 14)
}
} else {
map_hotspots_gg <- ggplot() +
labs(title = "No 'location' column for top 90% days map") +
theme_void()
}
# Optionally save outputs
if (!is.null(outdir)) {
readr::write_csv(merged_high, file.path(outdir, "merged_df_top90.csv"))
ggsave(file.path(outdir, "daily_plot.png"),
daily_plot, width = 8, height = 5, dpi = 300)
ggsave(file.path(outdir, "top_species_plot.png"),
top_species_plot, width = 7, height = 7, dpi = 300)
ggsave(file.path(outdir, "map_hotspots.png"),
map_hotspots_gg, width = 8, height = 5, dpi = 300)
}
return(list(
merged_df_all = merged_df_all,
merged_df = merged_high,
daily_plot = daily_plot,
top_species_plot = top_species_plot,
map_hotspots_gg = map_hotspots_gg,
daily_90th_quant = daily_quantile
))
}
##################################################################
# 3) Shiny App: UI + Server (Weekly Queries)
##################################################################
ui <- fluidPage(
theme = shinytheme("cosmo"), # Use a professional theme from shinythemes
# -- Logo and Title at the top --
fluidRow(
column(
width = 2,
tags$img(src = "www/all_logos.png", height = "400px")
),
column(
width = 10,
titlePanel("Dead Wildlife Observations from iNaturalist")
)
),
hr(),
sidebarLayout(
sidebarPanel(
tabsetPanel(
id = "sidebar_tabs",
# == Query Panel ==
tabPanel(
title = "Query",
br(),
radioButtons("region_mode", "Region Input Mode:",
choices = c("Enter Numeric place_id" = "place",
"Two-Click Bounding Box" = "bbox"),
# choices = c(
# "Two-Click Bounding Box" = "bbox"),
selected = "bbox"),
# If user chooses numeric "place_id"
conditionalPanel(
condition = "input.region_mode == 'place'",
numericInput("place_id",
"Numeric place_id (e.g. 1 = USA, 6712 = Canada, 14 = California)",
value = 1, min = 1, max = 999999, step = 1)
),
# If user chooses bounding box
conditionalPanel(
condition = "input.region_mode == 'bbox'",
helpText("Left-click once for the SW corner, once more for the NE corner."),
leafletOutput("map_two_click", height = "300px"),
br(),
actionButton("clear_bbox", "Clear bounding box"),
br(), br(),
verbatimTextOutput("bbox_coords")
),
# Years
checkboxGroupInput("years", "Select Year(s):",
choices = 2018:2025,
selected = c(2022, 2023)),
# Query by iconic class or exact species
radioButtons("query_type", "Query By:",
choices = c("Taxon Class" = "iconic",
"Exact Species Name" = "species")),
conditionalPanel(
condition = "input.query_type == 'iconic'",
selectInput("iconic_taxon", "Select Taxon Class:",
choices = c("Aves", "Mammalia", "Reptilia", "Amphibia",
"Actinopterygii", "Mollusca", "Animalia"),
selected = "Aves")
),
conditionalPanel(
condition = "input.query_type == 'species'",
textInput("species_name", "Enter exact species name (e.g. Puma concolor)", "")
),
actionButton("run_query", "Run Query", icon = icon("play")),
hr(),
downloadButton("downloadTop90", "Download Top-90% CSV", icon = icon("download")),
br(), br(),
downloadButton("downloadAll", "Download ALL Data CSV", icon = icon("download"))
),
# == About Panel ==
tabPanel(
title = "About",
br(),
p("This Shiny application was created by Diego Ellis Soto (UC Berkeley).
It queries iNaturalist for observations that have been annotated as 'Dead' wildlife (term_id=17, term_value_id=19).
The data is fetched via the iNaturalist API and summarized here for scientific or conservation purposes.")
),
# == Participatory Science Panel ==
tabPanel(
title = "Participatory Science",
br(),
p("Citizen science platforms like iNaturalist allow everyday people to collect and share data about local biodiversity.
Recording observations of dead wildlife can help track mortality events, disease spread, and other factors affecting animal populations."),
p("We encourage everyone to contribute their sightings responsibly, ensuring that any data on roadkill or other mortalities can help conservation efforts and
raise public awareness.")
),
# == How To Use Panel ==
tabPanel(
title = "How to Use",
br(),
p("This application lets you retrieve data about dead wildlife observations from iNaturalist.
You can choose to manually provide a numeric place_id or define a custom bounding box by clicking twice on the map."),
p("You can also decide whether to query by taxon class (e.g. Aves) or by exact species name (e.g. Puma concolor)."),
p("After selecting your inputs, press 'Run Query.' Two separate CSV downloads are provided: (1) for all data retrieved, and (2) for only the top-90% mortality days (for hotspot analysis).")
)
)
),
mainPanel(
tabsetPanel(
tabPanel("Daily Time Series", withSpinner(plotOutput("dailyPlot"), type = 6)),
tabPanel("Top Species", withSpinner(plotOutput("speciesPlot"), type = 6)),
tabPanel("Hotspots Map (90th%)", withSpinner(plotOutput("hotspotMap"), type = 6)),
tabPanel("Data Table (Top-90%)", withSpinner(DT::dataTableOutput("dataTable"), type = 6))
)
)
)
)
server <- function(input, output, session) {
# Reactive values for bounding box corners
rv <- reactiveValues(
corner1 = NULL,
corner2 = NULL,
bbox = NULL
)
# Initialize map
output$map_two_click <- renderLeaflet({
leaflet() %>%
addTiles() %>%
setView(lng = -100, lat = 40, zoom = 4)
})
# Handle bounding box clicks
observeEvent(input$map_two_click_click, {
req(input$region_mode == "bbox")
click <- input$map_two_click_click
if (is.null(click)) return()
lat_clicked <- click$lat
lng_clicked <- click$lng
if (is.null(rv$corner1)) {
rv$corner1 <- c(lat_clicked, lng_clicked)
showNotification("First corner set. Now click for the opposite corner.")
leafletProxy("map_two_click") %>%
clearMarkers() %>%
addMarkers(lng = lng_clicked, lat = lat_clicked, popup = "Corner 1")
rv$corner2 <- NULL
rv$bbox <- NULL
} else {
rv$corner2 <- c(lat_clicked, lng_clicked)
lat_min <- min(rv$corner1[1], rv$corner2[1])
lat_max <- max(rv$corner1[1], rv$corner2[1])
lng_min <- min(rv$corner1[2], rv$corner2[2])
lng_max <- max(rv$corner1[2], rv$corner2[2])
rv$bbox <- c(lat_min, lng_min, lat_max, lng_max)
showNotification("Second corner set. Bounding box defined!", duration = 2)
leafletProxy("map_two_click") %>%
clearMarkers() %>%
addMarkers(lng = rv$corner1[2], lat = rv$corner1[1], popup = "Corner 1") %>%
addMarkers(lng = rv$corner2[2], lat = rv$corner2[1], popup = "Corner 2") %>%
clearShapes() %>%
addRectangles(
lng1 = lng_min, lat1 = lat_min,
lng2 = lng_max, lat2 = lat_max,
fillColor = "red", fillOpacity = 0.2,
color = "red"
)
}
})
observeEvent(input$clear_bbox, {
rv$corner1 <- NULL
rv$corner2 <- NULL
rv$bbox <- NULL
leafletProxy("map_two_click") %>%
clearMarkers() %>%
clearShapes()
})
output$bbox_coords <- renderText({
req(input$region_mode == "bbox")
if (is.null(rv$bbox)) {
"No bounding box defined yet."
} else {
paste0(
"Bounding box:\n",
"SW corner: (", rv$bbox[1], ", ", rv$bbox[2], ")\n",
"NE corner: (", rv$bbox[3], ", ", rv$bbox[4], ")"
)
}
})
# Store final query results
result_data <- reactiveVal(NULL)
# Main "Run Query" button
observeEvent(input$run_query, {
req(input$years)
shiny::validate(need(length(input$years) > 0, "Please select at least one year."))
yrs <- as.numeric(input$years)
# Region logic
place_id_val <- NULL
swlat_val <- NULL
swlng_val <- NULL
nelat_val <- NULL
nelng_val <- NULL
if (input$region_mode == "place") {
place_id_val <- input$place_id
} else {
shiny::validate(need(!is.null(rv$bbox), "Please click twice on the map to define bounding box."))
swlat_val <- rv$bbox[1]
swlng_val <- rv$bbox[2]
nelat_val <- rv$bbox[3]
nelng_val <- rv$bbox[4]
}
# Query type logic
iconic_val <- NULL
species_val <- NULL
if (input$query_type == "iconic") {
iconic_val <- input$iconic_taxon
} else {
species_val <- input$species_name
}
# Fetch data
withProgress(message = 'Fetching data from iNaturalist (Weekly)...', value = 0, {
incProgress(0.4)
query_res <- getDeadVertebrates_weeklyLoop(
years = yrs,
place_id = place_id_val,
swlat = swlat_val,
swlng = swlng_val,
nelat = nelat_val,
nelng = nelng_val,
iconic_taxa = iconic_val,
taxon_name = species_val
)
result_data(query_res)
incProgress(1)
})
})
# Output plots
output$dailyPlot <- renderPlot({
req(result_data())
result_data()$daily_plot
})
output$speciesPlot <- renderPlot({
req(result_data())
result_data()$top_species_plot
})
output$hotspotMap <- renderPlot({
req(result_data())
result_data()$map_hotspots_gg
})
# Output data table (top-90% subset)
output$dataTable <- DT::renderDataTable({
req(result_data())
df <- result_data()$merged_df # top 90% subset
if (nrow(df) == 0) {
return(DT::datatable(
data.frame(Message = "No records found"),
options = list(pageLength = 20) # Show 20 records
))
}
df <- df %>%
mutate(
inat_link = paste0(
"<a href='https://www.inaturalist.org/observations/",
id, "' target='_blank'>", id, "</a>"
)
)
photo_col <- "taxon.default_photo.square_url"
if (photo_col %in% names(df)) {
df$image_thumb <- ifelse(
!is.na(df[[photo_col]]) & df[[photo_col]] != "",
paste0("<img src='", df[[photo_col]], "' width='50'/>"),
"No Img"
)
} else {
df$image_thumb <- "No Img"
}
show_cols <- c(
"inat_link", "image_thumb", "taxon.name", "created_at_details.date",
setdiff(names(df), c("inat_link", "image_thumb", "taxon.name", "created_at_details.date"))
)
DT::datatable(
df[, show_cols, drop = FALSE],
escape = FALSE,
options = list(pageLength = 20, autoWidth = TRUE) # Page 50 prior
)
})
# Download handlers
output$downloadTop90 <- downloadHandler(
filename = function() {
paste0("inat_dead_top90_", Sys.Date(), ".csv")
},
content = function(file) {
req(result_data())
readr::write_csv(result_data()$merged_df, file)
}
)
output$downloadAll <- downloadHandler(
filename = function() {
paste0("inat_dead_ALL_", Sys.Date(), ".csv")
},
content = function(file) {
req(result_data())
readr::write_csv(result_data()$merged_df_all, file)
}
)
}
shinyApp(ui = ui, server = server)