LeonceNsh's picture
Update app.py
d722ce7 verified
raw
history blame
7.15 kB
import pandas as pd
import geopandas as gpd
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# ========================
# Data Loading
# ========================
# Load the health and demographic data
conus_data = pd.read_csv("conus27.csv")
# Load the county shapefile
county_geojson = gpd.read_file("county.geojson")
# Load the county embeddings
county_embeddings = pd.read_csv("county_embeddings.csv")
# Load the unemployment data
county_unemployment = pd.read_csv("county_unemployment.csv")
# Load the poverty data
zcta_poverty = pd.read_csv("zcta_poverty.csv")
# Load the ZCTA shapefile
zcta_geojson = gpd.read_file("zcta.geojson")
# Merge unemployment data with county_geojson
county_unemployment_melted = county_unemployment.melt(id_vars=['place'],
var_name='date',
value_name='unemployment_rate')
county_unemployment_melted['place'] = county_unemployment_melted['place'].astype(str)
county_geojson_unemployment = county_geojson.merge(county_unemployment_melted, left_on='place', right_on='place', how='left')
# Prepare poverty data
zcta_poverty_melted = zcta_poverty.melt(id_vars=['place'], var_name='year', value_name='poverty_rate')
zcta_poverty_melted['place'] = zcta_poverty_melted['place'].astype(str)
zcta_geojson['place'] = zcta_geojson['place'].astype(str)
zcta_geojson_poverty = zcta_geojson.merge(zcta_poverty_melted, left_on='place', right_on='place', how='left')
# List of health metrics available
health_metrics = [col for col in conus_data.columns if col.startswith('Percent_Person_')]
# Simplify metric names
simplified_metrics = [col.replace('Percent_Person_', '') for col in health_metrics]
metric_mapping = dict(zip(simplified_metrics, health_metrics))
# ========================
# Utility Functions
# ========================
def plot_health_metric(metric):
"""
Plots the geographical distribution of a selected health metric.
"""
metric_full_name = metric_mapping[metric]
fig, ax = plt.subplots(1, 1, figsize=(12, 8))
gdf_health.plot(
column=metric_full_name,
cmap='OrRd',
markersize=50,
legend=True,
legend_kwds={'label': f"{metric} (%)"},
ax=ax,
alpha=0.7,
edgecolor='k'
)
ax.set_title(f'Geographical Distribution of {metric}', fontsize=15)
ax.axis('off')
plt.tight_layout()
return fig
def plot_correlation_matrix(selected_metrics):
"""
Plots the correlation matrix for selected health metrics.
"""
selected_columns = [metric_mapping[metric] for metric in selected_metrics]
corr = conus_data[selected_columns].corr()
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', square=True, ax=ax)
ax.set_title('Correlation Matrix of Selected Health Metrics', fontsize=15)
plt.tight_layout()
return fig
def plot_unemployment_map(date):
"""
Plots the unemployment rate map for a selected date.
"""
date = str(date)
data = county_geojson_unemployment[county_geojson_unemployment['date'] == date]
fig, ax = plt.subplots(1, 1, figsize=(12, 8))
data.plot(
column='unemployment_rate',
cmap='Blues',
linewidth=0.8,
ax=ax,
edgecolor='0.8',
legend=True,
missing_kwds={"color": "lightgrey", "label": "Missing values"},
)
ax.set_title(f'Unemployment Rate by County ({date})', fontsize=15)
ax.axis('off')
plt.tight_layout()
return fig
def plot_poverty_map(year):
"""
Plots the poverty rate map for a selected year.
"""
year = str(year)
data = zcta_geojson_poverty[zcta_geojson_poverty['year'] == year]
fig, ax = plt.subplots(1, 1, figsize=(12, 8))
data.plot(
column='poverty_rate',
cmap='Reds',
linewidth=0.8,
ax=ax,
edgecolor='0.8',
legend=True,
missing_kwds={"color": "lightgrey", "label": "Missing values"},
)
ax.set_title(f'Poverty Rate by ZCTA ({year})', fontsize=15)
ax.axis('off')
plt.tight_layout()
return fig
def summarize_health_metrics(metric):
"""
Generates summary statistics for a selected health metric.
"""
metric_full_name = metric_mapping[metric]
summary = conus_data[metric_full_name].describe().to_frame().reset_index()
summary.columns = ['Statistic', 'Value']
return summary
# ========================
# Gradio Interface Functions
# ========================
def health_metric_interface(metric):
fig = plot_health_metric(metric)
summary = summarize_health_metrics(metric)
return fig, summary
def correlation_interface(metrics):
fig = plot_correlation_matrix(metrics)
return fig
def unemployment_interface(date):
fig = plot_unemployment_map(date)
return fig
def poverty_interface(year):
fig = plot_poverty_map(year)
return fig
# ========================
# Gradio App Setup
# ========================
with gr.Blocks(title="US Population Health Dashboard") as demo:
gr.Markdown("# US Population Health Dashboard")
gr.Markdown("Explore health metrics, socioeconomic data, and their geospatial distributions across the United States.")
with gr.Tab("Health Metrics Map"):
gr.Markdown("### Geographical Distribution of Health Metrics")
health_metric = gr.Dropdown(label="Select a Health Metric", choices=simplified_metrics, value=simplified_metrics[0])
health_plot = gr.Plot()
health_summary = gr.Dataframe(headers=["Statistic", "Value"])
health_metric.change(health_metric_interface, inputs=health_metric, outputs=[health_plot, health_summary])
with gr.Tab("Health Metrics Correlation"):
gr.Markdown("### Correlation Matrix of Health Metrics")
correlation_metrics = gr.CheckboxGroup(label="Select Health Metrics", choices=simplified_metrics, value=simplified_metrics[:5])
correlation_plot = gr.Plot()
correlation_metrics.change(correlation_interface, inputs=correlation_metrics, outputs=correlation_plot)
with gr.Tab("Unemployment Rate Map"):
gr.Markdown("### Geographical Distribution of Unemployment Rates")
dates = county_unemployment_melted['date'].unique().tolist()
unemployment_date = gr.Slider(label="Select a Date", minimum=min(dates), maximum=max(dates), step=1, value=dates[0])
unemployment_plot = gr.Plot()
unemployment_date.change(unemployment_interface, inputs=unemployment_date, outputs=unemployment_plot)
with gr.Tab("Poverty Rate Map"):
gr.Markdown("### Geographical Distribution of Poverty Rates")
years = zcta_poverty_melted['year'].unique().astype(int).tolist()
poverty_year = gr.Slider(label="Select a Year", minimum=min(years), maximum=max(years), step=1, value=years[0])
poverty_plot = gr.Plot()
poverty_year.change(poverty_interface, inputs=poverty_year, outputs=poverty_plot)
# ========================
# Launch the App
# ========================
if __name__ == "__main__":
demo.launch()