Spaces:

LeonceNsh
/

healthcare-networks-gpus

Sleeping

App Files Files Community

LeonceNsh commited on Dec 6, 2024

Commit

4b3015b

verified ·

1 Parent(s): 95e341b

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -66

app.py CHANGED Viewed

@@ -4,101 +4,151 @@ import gradio as gr
 import matplotlib.pyplot as plt
 import seaborn as sns
 import numpy as np
 # ========================
 # Data Loading
 # ========================
-# Load the health and demographic data
 conus_data = pd.read_csv("conus27.csv")
-# Load the county shapefile
 county_geojson = gpd.read_file("county.geojson")
-# Load the county embeddings
 county_embeddings = pd.read_csv("county_embeddings.csv")
-# Load the unemployment data
 county_unemployment = pd.read_csv("county_unemployment.csv")
-# Load the poverty data
 zcta_poverty = pd.read_csv("zcta_poverty.csv")
-# Load the ZCTA shapefile
 zcta_geojson = gpd.read_file("zcta.geojson")
-# Merge unemployment data with county_geojson
-county_unemployment_melted = county_unemployment.melt(id_vars=['place'],
-                                                      var_name='date',
-                                                      value_name='unemployment_rate')
 county_unemployment_melted['place'] = county_unemployment_melted['place'].astype(str)
-county_geojson_unemployment = county_geojson.merge(county_unemployment_melted, left_on='place', right_on='place', how='left')
 # Prepare poverty data
-zcta_poverty_melted = zcta_poverty.melt(id_vars=['place'], var_name='year', value_name='poverty_rate')
 zcta_poverty_melted['place'] = zcta_poverty_melted['place'].astype(str)
 zcta_geojson['place'] = zcta_geojson['place'].astype(str)
-zcta_geojson_poverty = zcta_geojson.merge(zcta_poverty_melted, left_on='place', right_on='place', how='left')
-# List of health metrics available
 health_metrics = [col for col in conus_data.columns if col.startswith('Percent_Person_')]
-# Simplify metric names
 simplified_metrics = [col.replace('Percent_Person_', '') for col in health_metrics]
 metric_mapping = dict(zip(simplified_metrics, health_metrics))
 # ========================
 # Utility Functions
 # ========================
 def plot_health_metric(metric):
     """
-    Plots the geographical distribution of a selected health metric.
     """
     metric_full_name = metric_mapping[metric]
     fig, ax = plt.subplots(1, 1, figsize=(12, 8))
-    gdf_health.plot(
         column=metric_full_name,
-        cmap='OrRd',
         markersize=50,
         legend=True,
         legend_kwds={'label': f"{metric} (%)"},
         ax=ax,
         alpha=0.7,
-        edgecolor='k'
     )
     ax.set_title(f'Geographical Distribution of {metric}', fontsize=15)
     ax.axis('off')
     plt.tight_layout()
     return fig
 def plot_correlation_matrix(selected_metrics):
     """
-    Plots the correlation matrix for selected health metrics.
     """
-    selected_columns = [metric_mapping[metric] for metric in selected_metrics]
     corr = conus_data[selected_columns].corr()
     fig, ax = plt.subplots(figsize=(10, 8))
-    sns.heatmap(corr, annot=True, cmap='coolwarm', square=True, ax=ax)
-    ax.set_title('Correlation Matrix of Selected Health Metrics', fontsize=15)
     plt.tight_layout()
     return fig
 def plot_unemployment_map(date):
     """
-    Plots the unemployment rate map for a selected date.
     """
     date = str(date)
     data = county_geojson_unemployment[county_geojson_unemployment['date'] == date]
     fig, ax = plt.subplots(1, 1, figsize=(12, 8))
     data.plot(
         column='unemployment_rate',
-        cmap='Blues',
-        linewidth=0.8,
         ax=ax,
         edgecolor='0.8',
         legend=True,
@@ -111,15 +161,15 @@ def plot_unemployment_map(date):
 def plot_poverty_map(year):
     """
-    Plots the poverty rate map for a selected year.
     """
     year = str(year)
     data = zcta_geojson_poverty[zcta_geojson_poverty['year'] == year]
     fig, ax = plt.subplots(1, 1, figsize=(12, 8))
     data.plot(
         column='poverty_rate',
-        cmap='Reds',
-        linewidth=0.8,
         ax=ax,
         edgecolor='0.8',
         legend=True,
@@ -130,25 +180,20 @@ def plot_poverty_map(year):
     plt.tight_layout()
     return fig
-def summarize_health_metrics(metric):
-    """
-    Generates summary statistics for a selected health metric.
-    """
-    metric_full_name = metric_mapping[metric]
-    summary = conus_data[metric_full_name].describe().to_frame().reset_index()
-    summary.columns = ['Statistic', 'Value']
-    return summary
 # ========================
 # Gradio Interface Functions
 # ========================
 def health_metric_interface(metric):
-    fig = plot_health_metric(metric)
     summary = summarize_health_metrics(metric)
-    return fig, summary
 def correlation_interface(metrics):
     fig = plot_correlation_matrix(metrics)
     return fig
@@ -166,25 +211,46 @@ def poverty_interface(year):
 with gr.Blocks(title="US Population Health Dashboard") as demo:
     gr.Markdown("# US Population Health Dashboard")
-    gr.Markdown("Explore health metrics, socioeconomic data, and their geospatial distributions across the United States.")
-    with gr.Tab("Health Metrics Map"):
-        gr.Markdown("### Geographical Distribution of Health Metrics")
         health_metric = gr.Dropdown(label="Select a Health Metric", choices=simplified_metrics, value=simplified_metrics[0])
-        health_plot = gr.Plot()
-        health_summary = gr.Dataframe(headers=["Statistic", "Value"])
-        health_metric.change(health_metric_interface, inputs=health_metric, outputs=[health_plot, health_summary])
     with gr.Tab("Health Metrics Correlation"):
-        gr.Markdown("### Correlation Matrix of Health Metrics")
-        correlation_metrics = gr.CheckboxGroup(label="Select Health Metrics", choices=simplified_metrics, value=simplified_metrics[:5])
-        correlation_plot = gr.Plot()
         correlation_metrics.change(correlation_interface, inputs=correlation_metrics, outputs=correlation_plot)
-# ========================
-# Launch the App
-# ========================
 if __name__ == "__main__":
-    demo.launch()

 import matplotlib.pyplot as plt
 import seaborn as sns
 import numpy as np
+from scipy.cluster.hierarchy import linkage, leaves_list
 # ========================
 # Data Loading
 # ========================
 conus_data = pd.read_csv("conus27.csv")
 county_geojson = gpd.read_file("county.geojson")
 county_embeddings = pd.read_csv("county_embeddings.csv")
 county_unemployment = pd.read_csv("county_unemployment.csv")
 zcta_poverty = pd.read_csv("zcta_poverty.csv")
 zcta_geojson = gpd.read_file("zcta.geojson")
+# Prepare unemployment data
+county_unemployment_melted = county_unemployment.melt(
+    id_vars=['place'], var_name='date', value_name='unemployment_rate'
+)
 county_unemployment_melted['place'] = county_unemployment_melted['place'].astype(str)
+county_geojson_unemployment = county_geojson.merge(
+    county_unemployment_melted, left_on='place', right_on='place', how='left'
+)
 # Prepare poverty data
+zcta_poverty_melted = zcta_poverty.melt(
+    id_vars=['place'], var_name='year', value_name='poverty_rate'
+)
 zcta_poverty_melted['place'] = zcta_poverty_melted['place'].astype(str)
 zcta_geojson['place'] = zcta_geojson['place'].astype(str)
+zcta_geojson_poverty = zcta_geojson.merge(
+    zcta_poverty_melted, left_on='place', right_on='place', how='left'
+)
+# Identify health metrics
 health_metrics = [col for col in conus_data.columns if col.startswith('Percent_Person_')]
 simplified_metrics = [col.replace('Percent_Person_', '') for col in health_metrics]
 metric_mapping = dict(zip(simplified_metrics, health_metrics))
+# Create a merged geodataframe for health metrics visualization
+# Assuming conus_data has a 'place' or 'GEOID' matching the county_geojson
+if 'place' in conus_data.columns:
+    merged_health = county_geojson.merge(conus_data, on='place', how='left')
+else:
+    # If another key needed, adjust here. Assuming 'GEOID' would match, as example.
+    if 'GEOID' in county_geojson.columns and 'GEOID' in conus_data.columns:
+        merged_health = county_geojson.merge(conus_data, on='GEOID', how='left')
+    else:
+        raise ValueError("No matching key found to merge health data with geodata.")
 # ========================
 # Utility Functions
 # ========================
 def plot_health_metric(metric):
     """
+    Plots the geographical distribution of a selected health metric using a better colormap.
     """
     metric_full_name = metric_mapping[metric]
     fig, ax = plt.subplots(1, 1, figsize=(12, 8))
+    merged_health.plot(
         column=metric_full_name,
+        cmap='viridis',
         markersize=50,
         legend=True,
         legend_kwds={'label': f"{metric} (%)"},
         ax=ax,
         alpha=0.7,
+        edgecolor='black',
+        linewidth=0.5,
+        missing_kwds={"color": "lightgrey", "label": "No Data"}
     )
     ax.set_title(f'Geographical Distribution of {metric}', fontsize=15)
     ax.axis('off')
     plt.tight_layout()
     return fig
+def plot_health_histogram(metric):
+    """
+    Plots the distribution (histogram) of a selected health metric to understand its spread.
+    """
+    metric_full_name = metric_mapping[metric]
+    data = conus_data[metric_full_name].dropna()
+    fig, ax = plt.subplots(figsize=(8, 6))
+    sns.histplot(data, kde=True, color='teal', ax=ax)
+    ax.set_title(f'Distribution of {metric} (%)', fontsize=15)
+    ax.set_xlabel(f'{metric} (%)')
+    ax.set_ylabel('Count')
+    plt.tight_layout()
+    return fig
+def summarize_health_metrics(metric):
+    """
+    Generates more detailed summary statistics for a selected health metric.
+    Includes median and IQR along with standard describe().
+    """
+    metric_full_name = metric_mapping[metric]
+    data = conus_data[metric_full_name].dropna()
+    desc = data.describe().to_frame().reset_index()
+    desc.columns = ['Statistic', 'Value']
+    # Add median and IQR if not already present
+    median_val = data.median()
+    q1, q3 = data.quantile([0.25, 0.75])
+    iqr = q3 - q1
+    # Insert median and IQR below mean row
+    extra_stats = pd.DataFrame({
+        'Statistic': ['Median', 'IQR'],
+        'Value': [median_val, iqr]
+    })
+    summary = pd.concat([desc, extra_stats], ignore_index=True)
+    return summary
 def plot_correlation_matrix(selected_metrics):
     """
+    Plots a correlation matrix for selected health metrics and reorders the axes using hierarchical clustering.
     """
+    selected_columns = [metric_mapping[m] for m in selected_metrics]
     corr = conus_data[selected_columns].corr()
+    # Hierarchical clustering to reorder correlation matrix
+    linkage_matrix = linkage(1 - corr, method='average')
+    idx = leaves_list(linkage_matrix)
+    corr = corr.iloc[idx, :].iloc[:, idx]
     fig, ax = plt.subplots(figsize=(10, 8))
+    sns.heatmap(
+        corr, annot=True, cmap='coolwarm', square=True, ax=ax,
+        xticklabels=corr.columns, yticklabels=corr.columns,
+        cbar_kws={"shrink": .8}
+    )
+    ax.set_title('Correlation Matrix (Hierarchically Clustered)', fontsize=15)
+    plt.xticks(rotation=45, ha='right')
+    plt.yticks(rotation=0)
     plt.tight_layout()
     return fig
 def plot_unemployment_map(date):
     """
+    Plots the unemployment rate map for a selected date with an improved colormap.
     """
     date = str(date)
     data = county_geojson_unemployment[county_geojson_unemployment['date'] == date]
     fig, ax = plt.subplots(1, 1, figsize=(12, 8))
     data.plot(
         column='unemployment_rate',
+        cmap='YlGnBu',
+        linewidth=0.5,
         ax=ax,
         edgecolor='0.8',
         legend=True,
 def plot_poverty_map(year):
     """
+    Plots the poverty rate map for a selected year with improved colormap.
     """
     year = str(year)
     data = zcta_geojson_poverty[zcta_geojson_poverty['year'] == year]
     fig, ax = plt.subplots(1, 1, figsize=(12, 8))
     data.plot(
         column='poverty_rate',
+        cmap='YlOrRd',
+        linewidth=0.5,
         ax=ax,
         edgecolor='0.8',
         legend=True,
     plt.tight_layout()
     return fig
 # ========================
 # Gradio Interface Functions
 # ========================
 def health_metric_interface(metric):
+    map_fig = plot_health_metric(metric)
     summary = summarize_health_metrics(metric)
+    hist_fig = plot_health_histogram(metric)
+    return map_fig, summary, hist_fig
 def correlation_interface(metrics):
+    # Require at least two metrics to show correlation
+    if len(metrics) < 2:
+        return "Please select at least two metrics to see a correlation matrix."
     fig = plot_correlation_matrix(metrics)
     return fig
 with gr.Blocks(title="US Population Health Dashboard") as demo:
     gr.Markdown("# US Population Health Dashboard")
+    gr.Markdown("""
+    Explore health metrics, socioeconomic data, and their geospatial distributions across the United States.
+    Use the tabs below to select different datasets and visualizations.
+    """)
+    with gr.Tab("Health Metrics"):
+        gr.Markdown("### Explore a Selected Health Metric")
+        gr.Markdown("Select a health metric to view its geographical distribution, summary statistics, and distribution histogram.")
         health_metric = gr.Dropdown(label="Select a Health Metric", choices=simplified_metrics, value=simplified_metrics[0])
+        health_plot = gr.Plot(label="Health Metric Map")
+        health_summary = gr.Dataframe(label="Summary Statistics", headers=["Statistic", "Value"])
+        health_hist = gr.Plot(label="Metric Distribution Histogram")
+        health_metric.change(health_metric_interface, inputs=health_metric, outputs=[health_plot, health_summary, health_hist])
     with gr.Tab("Health Metrics Correlation"):
+        gr.Markdown("### Correlation Between Health Metrics")
+        gr.Markdown("Select multiple health metrics to see how they correlate with each other. The matrix is reordered using hierarchical clustering.")
+        correlation_metrics = gr.CheckboxGroup(
+            label="Select Health Metrics",
+            choices=simplified_metrics,
+            value=simplified_metrics[:5]
+        )
+        correlation_plot = gr.Plot(label="Correlation Matrix")
         correlation_metrics.change(correlation_interface, inputs=correlation_metrics, outputs=correlation_plot)
+    with gr.Tab("Unemployment Rates Over Time"):
+        gr.Markdown("### View Unemployment Rates by County")
+        gr.Markdown("Select a date to see the unemployment rate distribution across counties.")
+        unique_dates = sorted(county_unemployment_melted['date'].unique())
+        unemployment_date = gr.Dropdown(label="Select a Date", choices=unique_dates, value=unique_dates[0])
+        unemployment_plot = gr.Plot(label="Unemployment Rate Map")
+        unemployment_date.change(unemployment_interface, inputs=unemployment_date, outputs=unemployment_plot)
+    with gr.Tab("Poverty Rates Over Time"):
+        gr.Markdown("### View Poverty Rates by ZCTA")
+        gr.Markdown("Select a year to see the poverty rate distribution across ZIP Code Tabulation Areas.")
+        unique_years = sorted(zcta_poverty_melted['year'].unique())
+        poverty_year = gr.Dropdown(label="Select a Year", choices=unique_years, value=unique_years[0])
+        poverty_plot = gr.Plot(label="Poverty Rate Map")
+        poverty_year.change(poverty_interface, inputs=poverty_year, outputs=poverty_plot)
 if __name__ == "__main__":
+    demo.launch()