Spaces:

LeonceNsh
/

healthcare-networks-gpus

Sleeping

App Files Files Community

LeonceNsh commited on Dec 4, 2024

Commit

d722ce7

verified ·

1 Parent(s): d7dbe6c

Update app.py

Browse files

Files changed (1) hide show

app.py +191 -73

app.py CHANGED Viewed

@@ -1,83 +1,201 @@
-import gradio as gr
 import pandas as pd
 import geopandas as gpd
 import matplotlib.pyplot as plt
 import seaborn as sns
-from timesfm import TimesFm, TimesFmHparams, TimesFmCheckpoint
-from sklearn.ensemble import GradientBoostingRegressor
 import numpy as np
-# GPU-optimized TimesFM setup
-timesfm_backend = "gpu"
-timesfm_model_config = TimesFmHparams(
-    context_len=512,
-    horizon_len=128,
-    per_core_batch_size=128,
-    backend=timesfm_backend,
-)
-timesfm_model = TimesFm(
-    hparams=timesfm_model_config,
-    checkpoint=TimesFmCheckpoint(huggingface_repo_id="google/timesfm-1.0-200m-pytorch")
-)
-# Function to load embeddings and calculate HHI
-def calculate_hhi(file, market_col, id_col, weight_col):
-    df = pd.read_csv(file.name)
-    df['denominator'] = df.groupby(market_col)[weight_col].transform('sum')
-    df['numerator'] = df.groupby([market_col, id_col])[weight_col].transform('sum')
-    df['market_share'] = 100 * (df['numerator'] / df['denominator'])
-    df['market_share_sq'] = df['market_share'] ** 2
-    hhi = df.groupby(market_col).apply(lambda x: x['market_share_sq'].sum())
-    return hhi.reset_index(name='hhi')
-# Function to visualize HHI map
-def plot_hhi_map(hhi_csv, shapefile):
-    hhi_df = pd.read_csv(hhi_csv.name)
-    gdf = gpd.read_file(shapefile.name)
-    gdf = gdf.merge(hhi_df, left_on='fips_code', right_on='market_col', how='left')
     fig, ax = plt.subplots(1, 1, figsize=(12, 8))
-    gdf.plot(column='hhi', cmap='RdBu', legend=True, ax=ax, missing_kwds={"color": "lightgrey"})
-    ax.set_title("HHI by County")
     return fig
-# Function to forecast using TimesFM
-def forecast(file, history_steps, forecast_steps):
-    df = pd.read_csv(file.name).set_index('place')
-    history = df[history_steps]
-    forecast = timesfm_model.forecast(inputs=history.values)
-    return pd.DataFrame(forecast, index=history.index)
-# Gradio app interface
-def gradio_interface():
-    with gr.Blocks() as demo:
-        gr.Markdown("### Healthcare Network Analysis and Forecasting")
-        with gr.Tab("Upload Embeddings"):
-            file_upload = gr.File(label="Upload Embeddings (CSV)")
-            hhi_results = gr.DataFrame(label="HHI Results")
-            calculate_button = gr.Button("Calculate HHI")
-            calculate_button.click(
-                calculate_hhi,
-                inputs=[file_upload, "market_col", "id_col", "weight_col"],
-                outputs=hhi_results
-            )
-        with gr.Tab("Visualize Map"):
-            hhi_csv = gr.File(label="Upload HHI CSV")
-            shapefile = gr.File(label="Upload Shapefile")
-            map_plot = gr.Plot(label="HHI Map")
-            plot_button = gr.Button("Generate Map")
-            plot_button.click(plot_hhi_map, inputs=[hhi_csv, shapefile], outputs=map_plot)
-        with gr.Tab("Forecasting"):
-            forecast_file = gr.File(label="Upload Historical Data (CSV)")
-            forecast_steps = gr.Slider(minimum=1, maximum=24, step=1, label="Forecast Steps")
-            forecast_results = gr.DataFrame(label="Forecasted Data")
-            forecast_button = gr.Button("Forecast")
-            forecast_button.click(forecast, inputs=[forecast_file, forecast_steps], outputs=forecast_results)
-    return demo
-# Run app
 if __name__ == "__main__":
-    gradio_interface().launch()

 import pandas as pd
 import geopandas as gpd
+import gradio as gr
 import matplotlib.pyplot as plt
 import seaborn as sns
 import numpy as np
+# ========================
+# Data Loading
+# ========================
+# Load the health and demographic data
+conus_data = pd.read_csv("conus27.csv")
+# Load the county shapefile
+county_geojson = gpd.read_file("county.geojson")
+# Load the county embeddings
+county_embeddings = pd.read_csv("county_embeddings.csv")
+# Load the unemployment data
+county_unemployment = pd.read_csv("county_unemployment.csv")
+# Load the poverty data
+zcta_poverty = pd.read_csv("zcta_poverty.csv")
+# Load the ZCTA shapefile
+zcta_geojson = gpd.read_file("zcta.geojson")
+# Merge unemployment data with county_geojson
+county_unemployment_melted = county_unemployment.melt(id_vars=['place'],
+                                                      var_name='date',
+                                                      value_name='unemployment_rate')
+county_unemployment_melted['place'] = county_unemployment_melted['place'].astype(str)
+county_geojson_unemployment = county_geojson.merge(county_unemployment_melted, left_on='place', right_on='place', how='left')
+# Prepare poverty data
+zcta_poverty_melted = zcta_poverty.melt(id_vars=['place'], var_name='year', value_name='poverty_rate')
+zcta_poverty_melted['place'] = zcta_poverty_melted['place'].astype(str)
+zcta_geojson['place'] = zcta_geojson['place'].astype(str)
+zcta_geojson_poverty = zcta_geojson.merge(zcta_poverty_melted, left_on='place', right_on='place', how='left')
+# List of health metrics available
+health_metrics = [col for col in conus_data.columns if col.startswith('Percent_Person_')]
+# Simplify metric names
+simplified_metrics = [col.replace('Percent_Person_', '') for col in health_metrics]
+metric_mapping = dict(zip(simplified_metrics, health_metrics))
+# ========================
+# Utility Functions
+# ========================
+def plot_health_metric(metric):
+    """
+    Plots the geographical distribution of a selected health metric.
+    """
+    metric_full_name = metric_mapping[metric]
+    fig, ax = plt.subplots(1, 1, figsize=(12, 8))
+    gdf_health.plot(
+        column=metric_full_name,
+        cmap='OrRd',
+        markersize=50,
+        legend=True,
+        legend_kwds={'label': f"{metric} (%)"},
+        ax=ax,
+        alpha=0.7,
+        edgecolor='k'
+    )
+    ax.set_title(f'Geographical Distribution of {metric}', fontsize=15)
+    ax.axis('off')
+    plt.tight_layout()
+    return fig
+def plot_correlation_matrix(selected_metrics):
+    """
+    Plots the correlation matrix for selected health metrics.
+    """
+    selected_columns = [metric_mapping[metric] for metric in selected_metrics]
+    corr = conus_data[selected_columns].corr()
+    fig, ax = plt.subplots(figsize=(10, 8))
+    sns.heatmap(corr, annot=True, cmap='coolwarm', square=True, ax=ax)
+    ax.set_title('Correlation Matrix of Selected Health Metrics', fontsize=15)
+    plt.tight_layout()
+    return fig
+def plot_unemployment_map(date):
+    """
+    Plots the unemployment rate map for a selected date.
+    """
+    date = str(date)
+    data = county_geojson_unemployment[county_geojson_unemployment['date'] == date]
     fig, ax = plt.subplots(1, 1, figsize=(12, 8))
+    data.plot(
+        column='unemployment_rate',
+        cmap='Blues',
+        linewidth=0.8,
+        ax=ax,
+        edgecolor='0.8',
+        legend=True,
+        missing_kwds={"color": "lightgrey", "label": "Missing values"},
+    )
+    ax.set_title(f'Unemployment Rate by County ({date})', fontsize=15)
+    ax.axis('off')
+    plt.tight_layout()
+    return fig
+def plot_poverty_map(year):
+    """
+    Plots the poverty rate map for a selected year.
+    """
+    year = str(year)
+    data = zcta_geojson_poverty[zcta_geojson_poverty['year'] == year]
+    fig, ax = plt.subplots(1, 1, figsize=(12, 8))
+    data.plot(
+        column='poverty_rate',
+        cmap='Reds',
+        linewidth=0.8,
+        ax=ax,
+        edgecolor='0.8',
+        legend=True,
+        missing_kwds={"color": "lightgrey", "label": "Missing values"},
+    )
+    ax.set_title(f'Poverty Rate by ZCTA ({year})', fontsize=15)
+    ax.axis('off')
+    plt.tight_layout()
+    return fig
+def summarize_health_metrics(metric):
+    """
+    Generates summary statistics for a selected health metric.
+    """
+    metric_full_name = metric_mapping[metric]
+    summary = conus_data[metric_full_name].describe().to_frame().reset_index()
+    summary.columns = ['Statistic', 'Value']
+    return summary
+# ========================
+# Gradio Interface Functions
+# ========================
+def health_metric_interface(metric):
+    fig = plot_health_metric(metric)
+    summary = summarize_health_metrics(metric)
+    return fig, summary
+def correlation_interface(metrics):
+    fig = plot_correlation_matrix(metrics)
+    return fig
+def unemployment_interface(date):
+    fig = plot_unemployment_map(date)
+    return fig
+def poverty_interface(year):
+    fig = plot_poverty_map(year)
     return fig
+# ========================
+# Gradio App Setup
+# ========================
+with gr.Blocks(title="US Population Health Dashboard") as demo:
+    gr.Markdown("# US Population Health Dashboard")
+    gr.Markdown("Explore health metrics, socioeconomic data, and their geospatial distributions across the United States.")
+    with gr.Tab("Health Metrics Map"):
+        gr.Markdown("### Geographical Distribution of Health Metrics")
+        health_metric = gr.Dropdown(label="Select a Health Metric", choices=simplified_metrics, value=simplified_metrics[0])
+        health_plot = gr.Plot()
+        health_summary = gr.Dataframe(headers=["Statistic", "Value"])
+        health_metric.change(health_metric_interface, inputs=health_metric, outputs=[health_plot, health_summary])
+    with gr.Tab("Health Metrics Correlation"):
+        gr.Markdown("### Correlation Matrix of Health Metrics")
+        correlation_metrics = gr.CheckboxGroup(label="Select Health Metrics", choices=simplified_metrics, value=simplified_metrics[:5])
+        correlation_plot = gr.Plot()
+        correlation_metrics.change(correlation_interface, inputs=correlation_metrics, outputs=correlation_plot)
+    with gr.Tab("Unemployment Rate Map"):
+        gr.Markdown("### Geographical Distribution of Unemployment Rates")
+        dates = county_unemployment_melted['date'].unique().tolist()
+        unemployment_date = gr.Slider(label="Select a Date", minimum=min(dates), maximum=max(dates), step=1, value=dates[0])
+        unemployment_plot = gr.Plot()
+        unemployment_date.change(unemployment_interface, inputs=unemployment_date, outputs=unemployment_plot)
+    with gr.Tab("Poverty Rate Map"):
+        gr.Markdown("### Geographical Distribution of Poverty Rates")
+        years = zcta_poverty_melted['year'].unique().astype(int).tolist()
+        poverty_year = gr.Slider(label="Select a Year", minimum=min(years), maximum=max(years), step=1, value=years[0])
+        poverty_plot = gr.Plot()
+        poverty_year.change(poverty_interface, inputs=poverty_year, outputs=poverty_plot)
+# ========================
+# Launch the App
+# ========================
 if __name__ == "__main__":
+    demo.launch()