Spaces:
Sleeping
Sleeping
import pandas as pd | |
import geopandas as gpd | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import numpy as np | |
# ======================== | |
# Data Loading | |
# ======================== | |
# Load the health and demographic data | |
conus_data = pd.read_csv("conus27.csv") | |
# Load the county shapefile | |
county_geojson = gpd.read_file("county.geojson") | |
# Load the county embeddings | |
county_embeddings = pd.read_csv("county_embeddings.csv") | |
# Load the unemployment data | |
county_unemployment = pd.read_csv("county_unemployment.csv") | |
# Load the poverty data | |
zcta_poverty = pd.read_csv("zcta_poverty.csv") | |
# Load the ZCTA shapefile | |
zcta_geojson = gpd.read_file("zcta.geojson") | |
# Merge unemployment data with county_geojson | |
county_unemployment_melted = county_unemployment.melt(id_vars=['place'], | |
var_name='date', | |
value_name='unemployment_rate') | |
county_unemployment_melted['place'] = county_unemployment_melted['place'].astype(str) | |
county_geojson_unemployment = county_geojson.merge(county_unemployment_melted, left_on='place', right_on='place', how='left') | |
# Prepare poverty data | |
zcta_poverty_melted = zcta_poverty.melt(id_vars=['place'], var_name='year', value_name='poverty_rate') | |
zcta_poverty_melted['place'] = zcta_poverty_melted['place'].astype(str) | |
zcta_geojson['place'] = zcta_geojson['place'].astype(str) | |
zcta_geojson_poverty = zcta_geojson.merge(zcta_poverty_melted, left_on='place', right_on='place', how='left') | |
# List of health metrics available | |
health_metrics = [col for col in conus_data.columns if col.startswith('Percent_Person_')] | |
# Simplify metric names | |
simplified_metrics = [col.replace('Percent_Person_', '') for col in health_metrics] | |
metric_mapping = dict(zip(simplified_metrics, health_metrics)) | |
# ======================== | |
# Utility Functions | |
# ======================== | |
def plot_health_metric(metric): | |
""" | |
Plots the geographical distribution of a selected health metric. | |
""" | |
metric_full_name = metric_mapping[metric] | |
fig, ax = plt.subplots(1, 1, figsize=(12, 8)) | |
gdf_health.plot( | |
column=metric_full_name, | |
cmap='OrRd', | |
markersize=50, | |
legend=True, | |
legend_kwds={'label': f"{metric} (%)"}, | |
ax=ax, | |
alpha=0.7, | |
edgecolor='k' | |
) | |
ax.set_title(f'Geographical Distribution of {metric}', fontsize=15) | |
ax.axis('off') | |
plt.tight_layout() | |
return fig | |
def plot_correlation_matrix(selected_metrics): | |
""" | |
Plots the correlation matrix for selected health metrics. | |
""" | |
selected_columns = [metric_mapping[metric] for metric in selected_metrics] | |
corr = conus_data[selected_columns].corr() | |
fig, ax = plt.subplots(figsize=(10, 8)) | |
sns.heatmap(corr, annot=True, cmap='coolwarm', square=True, ax=ax) | |
ax.set_title('Correlation Matrix of Selected Health Metrics', fontsize=15) | |
plt.tight_layout() | |
return fig | |
def plot_unemployment_map(date): | |
""" | |
Plots the unemployment rate map for a selected date. | |
""" | |
date = str(date) | |
data = county_geojson_unemployment[county_geojson_unemployment['date'] == date] | |
fig, ax = plt.subplots(1, 1, figsize=(12, 8)) | |
data.plot( | |
column='unemployment_rate', | |
cmap='Blues', | |
linewidth=0.8, | |
ax=ax, | |
edgecolor='0.8', | |
legend=True, | |
missing_kwds={"color": "lightgrey", "label": "Missing values"}, | |
) | |
ax.set_title(f'Unemployment Rate by County ({date})', fontsize=15) | |
ax.axis('off') | |
plt.tight_layout() | |
return fig | |
def plot_poverty_map(year): | |
""" | |
Plots the poverty rate map for a selected year. | |
""" | |
year = str(year) | |
data = zcta_geojson_poverty[zcta_geojson_poverty['year'] == year] | |
fig, ax = plt.subplots(1, 1, figsize=(12, 8)) | |
data.plot( | |
column='poverty_rate', | |
cmap='Reds', | |
linewidth=0.8, | |
ax=ax, | |
edgecolor='0.8', | |
legend=True, | |
missing_kwds={"color": "lightgrey", "label": "Missing values"}, | |
) | |
ax.set_title(f'Poverty Rate by ZCTA ({year})', fontsize=15) | |
ax.axis('off') | |
plt.tight_layout() | |
return fig | |
def summarize_health_metrics(metric): | |
""" | |
Generates summary statistics for a selected health metric. | |
""" | |
metric_full_name = metric_mapping[metric] | |
summary = conus_data[metric_full_name].describe().to_frame().reset_index() | |
summary.columns = ['Statistic', 'Value'] | |
return summary | |
# ======================== | |
# Gradio Interface Functions | |
# ======================== | |
def health_metric_interface(metric): | |
fig = plot_health_metric(metric) | |
summary = summarize_health_metrics(metric) | |
return fig, summary | |
def correlation_interface(metrics): | |
fig = plot_correlation_matrix(metrics) | |
return fig | |
def unemployment_interface(date): | |
fig = plot_unemployment_map(date) | |
return fig | |
def poverty_interface(year): | |
fig = plot_poverty_map(year) | |
return fig | |
# ======================== | |
# Gradio App Setup | |
# ======================== | |
with gr.Blocks(title="US Population Health Dashboard") as demo: | |
gr.Markdown("# US Population Health Dashboard") | |
gr.Markdown("Explore health metrics, socioeconomic data, and their geospatial distributions across the United States.") | |
with gr.Tab("Health Metrics Map"): | |
gr.Markdown("### Geographical Distribution of Health Metrics") | |
health_metric = gr.Dropdown(label="Select a Health Metric", choices=simplified_metrics, value=simplified_metrics[0]) | |
health_plot = gr.Plot() | |
health_summary = gr.Dataframe(headers=["Statistic", "Value"]) | |
health_metric.change(health_metric_interface, inputs=health_metric, outputs=[health_plot, health_summary]) | |
with gr.Tab("Health Metrics Correlation"): | |
gr.Markdown("### Correlation Matrix of Health Metrics") | |
correlation_metrics = gr.CheckboxGroup(label="Select Health Metrics", choices=simplified_metrics, value=simplified_metrics[:5]) | |
correlation_plot = gr.Plot() | |
correlation_metrics.change(correlation_interface, inputs=correlation_metrics, outputs=correlation_plot) | |
with gr.Tab("Unemployment Rate Map"): | |
gr.Markdown("### Geographical Distribution of Unemployment Rates") | |
dates = county_unemployment_melted['date'].unique().tolist() | |
unemployment_date = gr.Slider(label="Select a Date", minimum=min(dates), maximum=max(dates), step=1, value=dates[0]) | |
unemployment_plot = gr.Plot() | |
unemployment_date.change(unemployment_interface, inputs=unemployment_date, outputs=unemployment_plot) | |
with gr.Tab("Poverty Rate Map"): | |
gr.Markdown("### Geographical Distribution of Poverty Rates") | |
years = zcta_poverty_melted['year'].unique().astype(int).tolist() | |
poverty_year = gr.Slider(label="Select a Year", minimum=min(years), maximum=max(years), step=1, value=years[0]) | |
poverty_plot = gr.Plot() | |
poverty_year.change(poverty_interface, inputs=poverty_year, outputs=poverty_plot) | |
# ======================== | |
# Launch the App | |
# ======================== | |
if __name__ == "__main__": | |
demo.launch() |