LeonceNsh commited on
Commit
4b3015b
·
verified ·
1 Parent(s): 95e341b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -66
app.py CHANGED
@@ -4,101 +4,151 @@ import gradio as gr
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
  import numpy as np
 
7
 
8
  # ========================
9
  # Data Loading
10
  # ========================
11
-
12
- # Load the health and demographic data
13
  conus_data = pd.read_csv("conus27.csv")
14
- # Load the county shapefile
15
  county_geojson = gpd.read_file("county.geojson")
16
- # Load the county embeddings
17
  county_embeddings = pd.read_csv("county_embeddings.csv")
18
- # Load the unemployment data
19
  county_unemployment = pd.read_csv("county_unemployment.csv")
20
- # Load the poverty data
21
  zcta_poverty = pd.read_csv("zcta_poverty.csv")
22
- # Load the ZCTA shapefile
23
  zcta_geojson = gpd.read_file("zcta.geojson")
24
 
25
- # Merge unemployment data with county_geojson
26
- county_unemployment_melted = county_unemployment.melt(id_vars=['place'],
27
- var_name='date',
28
- value_name='unemployment_rate')
29
  county_unemployment_melted['place'] = county_unemployment_melted['place'].astype(str)
30
-
31
-
32
- county_geojson_unemployment = county_geojson.merge(county_unemployment_melted, left_on='place', right_on='place', how='left')
33
 
34
  # Prepare poverty data
35
- zcta_poverty_melted = zcta_poverty.melt(id_vars=['place'], var_name='year', value_name='poverty_rate')
36
-
37
-
38
  zcta_poverty_melted['place'] = zcta_poverty_melted['place'].astype(str)
39
-
40
-
41
  zcta_geojson['place'] = zcta_geojson['place'].astype(str)
 
 
 
42
 
43
- zcta_geojson_poverty = zcta_geojson.merge(zcta_poverty_melted, left_on='place', right_on='place', how='left')
44
-
45
-
46
- # List of health metrics available
47
  health_metrics = [col for col in conus_data.columns if col.startswith('Percent_Person_')]
48
- # Simplify metric names
49
  simplified_metrics = [col.replace('Percent_Person_', '') for col in health_metrics]
50
  metric_mapping = dict(zip(simplified_metrics, health_metrics))
51
 
 
 
 
 
 
 
 
 
 
 
 
52
  # ========================
53
  # Utility Functions
54
  # ========================
55
 
56
  def plot_health_metric(metric):
57
  """
58
- Plots the geographical distribution of a selected health metric.
59
  """
60
-
61
  metric_full_name = metric_mapping[metric]
62
-
63
  fig, ax = plt.subplots(1, 1, figsize=(12, 8))
64
- gdf_health.plot(
65
  column=metric_full_name,
66
- cmap='OrRd',
67
  markersize=50,
68
  legend=True,
69
  legend_kwds={'label': f"{metric} (%)"},
70
  ax=ax,
71
  alpha=0.7,
72
- edgecolor='k'
 
 
73
  )
74
  ax.set_title(f'Geographical Distribution of {metric}', fontsize=15)
75
  ax.axis('off')
76
  plt.tight_layout()
77
  return fig
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def plot_correlation_matrix(selected_metrics):
80
  """
81
- Plots the correlation matrix for selected health metrics.
82
  """
83
- selected_columns = [metric_mapping[metric] for metric in selected_metrics]
84
  corr = conus_data[selected_columns].corr()
 
 
 
 
 
 
85
  fig, ax = plt.subplots(figsize=(10, 8))
86
- sns.heatmap(corr, annot=True, cmap='coolwarm', square=True, ax=ax)
87
- ax.set_title('Correlation Matrix of Selected Health Metrics', fontsize=15)
 
 
 
 
 
 
88
  plt.tight_layout()
89
  return fig
90
 
91
  def plot_unemployment_map(date):
92
  """
93
- Plots the unemployment rate map for a selected date.
94
  """
95
  date = str(date)
96
  data = county_geojson_unemployment[county_geojson_unemployment['date'] == date]
97
  fig, ax = plt.subplots(1, 1, figsize=(12, 8))
98
  data.plot(
99
  column='unemployment_rate',
100
- cmap='Blues',
101
- linewidth=0.8,
102
  ax=ax,
103
  edgecolor='0.8',
104
  legend=True,
@@ -111,15 +161,15 @@ def plot_unemployment_map(date):
111
 
112
  def plot_poverty_map(year):
113
  """
114
- Plots the poverty rate map for a selected year.
115
  """
116
  year = str(year)
117
  data = zcta_geojson_poverty[zcta_geojson_poverty['year'] == year]
118
  fig, ax = plt.subplots(1, 1, figsize=(12, 8))
119
  data.plot(
120
  column='poverty_rate',
121
- cmap='Reds',
122
- linewidth=0.8,
123
  ax=ax,
124
  edgecolor='0.8',
125
  legend=True,
@@ -130,25 +180,20 @@ def plot_poverty_map(year):
130
  plt.tight_layout()
131
  return fig
132
 
133
- def summarize_health_metrics(metric):
134
- """
135
- Generates summary statistics for a selected health metric.
136
- """
137
- metric_full_name = metric_mapping[metric]
138
- summary = conus_data[metric_full_name].describe().to_frame().reset_index()
139
- summary.columns = ['Statistic', 'Value']
140
- return summary
141
-
142
  # ========================
143
  # Gradio Interface Functions
144
  # ========================
145
 
146
  def health_metric_interface(metric):
147
- fig = plot_health_metric(metric)
148
  summary = summarize_health_metrics(metric)
149
- return fig, summary
 
150
 
151
  def correlation_interface(metrics):
 
 
 
152
  fig = plot_correlation_matrix(metrics)
153
  return fig
154
 
@@ -166,25 +211,46 @@ def poverty_interface(year):
166
 
167
  with gr.Blocks(title="US Population Health Dashboard") as demo:
168
  gr.Markdown("# US Population Health Dashboard")
169
- gr.Markdown("Explore health metrics, socioeconomic data, and their geospatial distributions across the United States.")
170
-
171
- with gr.Tab("Health Metrics Map"):
172
- gr.Markdown("### Geographical Distribution of Health Metrics")
 
 
 
 
173
  health_metric = gr.Dropdown(label="Select a Health Metric", choices=simplified_metrics, value=simplified_metrics[0])
174
- health_plot = gr.Plot()
175
- health_summary = gr.Dataframe(headers=["Statistic", "Value"])
176
- health_metric.change(health_metric_interface, inputs=health_metric, outputs=[health_plot, health_summary])
 
177
 
178
  with gr.Tab("Health Metrics Correlation"):
179
- gr.Markdown("### Correlation Matrix of Health Metrics")
180
- correlation_metrics = gr.CheckboxGroup(label="Select Health Metrics", choices=simplified_metrics, value=simplified_metrics[:5])
181
- correlation_plot = gr.Plot()
 
 
 
 
 
182
  correlation_metrics.change(correlation_interface, inputs=correlation_metrics, outputs=correlation_plot)
183
 
184
-
185
- # ========================
186
- # Launch the App
187
- # ========================
 
 
 
 
 
 
 
 
 
 
 
188
 
189
  if __name__ == "__main__":
190
- demo.launch()
 
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
  import numpy as np
7
+ from scipy.cluster.hierarchy import linkage, leaves_list
8
 
9
  # ========================
10
  # Data Loading
11
  # ========================
 
 
12
  conus_data = pd.read_csv("conus27.csv")
 
13
  county_geojson = gpd.read_file("county.geojson")
 
14
  county_embeddings = pd.read_csv("county_embeddings.csv")
 
15
  county_unemployment = pd.read_csv("county_unemployment.csv")
 
16
  zcta_poverty = pd.read_csv("zcta_poverty.csv")
 
17
  zcta_geojson = gpd.read_file("zcta.geojson")
18
 
19
+ # Prepare unemployment data
20
+ county_unemployment_melted = county_unemployment.melt(
21
+ id_vars=['place'], var_name='date', value_name='unemployment_rate'
22
+ )
23
  county_unemployment_melted['place'] = county_unemployment_melted['place'].astype(str)
24
+ county_geojson_unemployment = county_geojson.merge(
25
+ county_unemployment_melted, left_on='place', right_on='place', how='left'
26
+ )
27
 
28
  # Prepare poverty data
29
+ zcta_poverty_melted = zcta_poverty.melt(
30
+ id_vars=['place'], var_name='year', value_name='poverty_rate'
31
+ )
32
  zcta_poverty_melted['place'] = zcta_poverty_melted['place'].astype(str)
 
 
33
  zcta_geojson['place'] = zcta_geojson['place'].astype(str)
34
+ zcta_geojson_poverty = zcta_geojson.merge(
35
+ zcta_poverty_melted, left_on='place', right_on='place', how='left'
36
+ )
37
 
38
+ # Identify health metrics
 
 
 
39
  health_metrics = [col for col in conus_data.columns if col.startswith('Percent_Person_')]
 
40
  simplified_metrics = [col.replace('Percent_Person_', '') for col in health_metrics]
41
  metric_mapping = dict(zip(simplified_metrics, health_metrics))
42
 
43
+ # Create a merged geodataframe for health metrics visualization
44
+ # Assuming conus_data has a 'place' or 'GEOID' matching the county_geojson
45
+ if 'place' in conus_data.columns:
46
+ merged_health = county_geojson.merge(conus_data, on='place', how='left')
47
+ else:
48
+ # If another key needed, adjust here. Assuming 'GEOID' would match, as example.
49
+ if 'GEOID' in county_geojson.columns and 'GEOID' in conus_data.columns:
50
+ merged_health = county_geojson.merge(conus_data, on='GEOID', how='left')
51
+ else:
52
+ raise ValueError("No matching key found to merge health data with geodata.")
53
+
54
  # ========================
55
  # Utility Functions
56
  # ========================
57
 
58
  def plot_health_metric(metric):
59
  """
60
+ Plots the geographical distribution of a selected health metric using a better colormap.
61
  """
 
62
  metric_full_name = metric_mapping[metric]
 
63
  fig, ax = plt.subplots(1, 1, figsize=(12, 8))
64
+ merged_health.plot(
65
  column=metric_full_name,
66
+ cmap='viridis',
67
  markersize=50,
68
  legend=True,
69
  legend_kwds={'label': f"{metric} (%)"},
70
  ax=ax,
71
  alpha=0.7,
72
+ edgecolor='black',
73
+ linewidth=0.5,
74
+ missing_kwds={"color": "lightgrey", "label": "No Data"}
75
  )
76
  ax.set_title(f'Geographical Distribution of {metric}', fontsize=15)
77
  ax.axis('off')
78
  plt.tight_layout()
79
  return fig
80
 
81
+ def plot_health_histogram(metric):
82
+ """
83
+ Plots the distribution (histogram) of a selected health metric to understand its spread.
84
+ """
85
+ metric_full_name = metric_mapping[metric]
86
+ data = conus_data[metric_full_name].dropna()
87
+ fig, ax = plt.subplots(figsize=(8, 6))
88
+ sns.histplot(data, kde=True, color='teal', ax=ax)
89
+ ax.set_title(f'Distribution of {metric} (%)', fontsize=15)
90
+ ax.set_xlabel(f'{metric} (%)')
91
+ ax.set_ylabel('Count')
92
+ plt.tight_layout()
93
+ return fig
94
+
95
+ def summarize_health_metrics(metric):
96
+ """
97
+ Generates more detailed summary statistics for a selected health metric.
98
+ Includes median and IQR along with standard describe().
99
+ """
100
+ metric_full_name = metric_mapping[metric]
101
+ data = conus_data[metric_full_name].dropna()
102
+ desc = data.describe().to_frame().reset_index()
103
+ desc.columns = ['Statistic', 'Value']
104
+
105
+ # Add median and IQR if not already present
106
+ median_val = data.median()
107
+ q1, q3 = data.quantile([0.25, 0.75])
108
+ iqr = q3 - q1
109
+ # Insert median and IQR below mean row
110
+ extra_stats = pd.DataFrame({
111
+ 'Statistic': ['Median', 'IQR'],
112
+ 'Value': [median_val, iqr]
113
+ })
114
+ summary = pd.concat([desc, extra_stats], ignore_index=True)
115
+ return summary
116
+
117
  def plot_correlation_matrix(selected_metrics):
118
  """
119
+ Plots a correlation matrix for selected health metrics and reorders the axes using hierarchical clustering.
120
  """
121
+ selected_columns = [metric_mapping[m] for m in selected_metrics]
122
  corr = conus_data[selected_columns].corr()
123
+
124
+ # Hierarchical clustering to reorder correlation matrix
125
+ linkage_matrix = linkage(1 - corr, method='average')
126
+ idx = leaves_list(linkage_matrix)
127
+ corr = corr.iloc[idx, :].iloc[:, idx]
128
+
129
  fig, ax = plt.subplots(figsize=(10, 8))
130
+ sns.heatmap(
131
+ corr, annot=True, cmap='coolwarm', square=True, ax=ax,
132
+ xticklabels=corr.columns, yticklabels=corr.columns,
133
+ cbar_kws={"shrink": .8}
134
+ )
135
+ ax.set_title('Correlation Matrix (Hierarchically Clustered)', fontsize=15)
136
+ plt.xticks(rotation=45, ha='right')
137
+ plt.yticks(rotation=0)
138
  plt.tight_layout()
139
  return fig
140
 
141
  def plot_unemployment_map(date):
142
  """
143
+ Plots the unemployment rate map for a selected date with an improved colormap.
144
  """
145
  date = str(date)
146
  data = county_geojson_unemployment[county_geojson_unemployment['date'] == date]
147
  fig, ax = plt.subplots(1, 1, figsize=(12, 8))
148
  data.plot(
149
  column='unemployment_rate',
150
+ cmap='YlGnBu',
151
+ linewidth=0.5,
152
  ax=ax,
153
  edgecolor='0.8',
154
  legend=True,
 
161
 
162
  def plot_poverty_map(year):
163
  """
164
+ Plots the poverty rate map for a selected year with improved colormap.
165
  """
166
  year = str(year)
167
  data = zcta_geojson_poverty[zcta_geojson_poverty['year'] == year]
168
  fig, ax = plt.subplots(1, 1, figsize=(12, 8))
169
  data.plot(
170
  column='poverty_rate',
171
+ cmap='YlOrRd',
172
+ linewidth=0.5,
173
  ax=ax,
174
  edgecolor='0.8',
175
  legend=True,
 
180
  plt.tight_layout()
181
  return fig
182
 
 
 
 
 
 
 
 
 
 
183
  # ========================
184
  # Gradio Interface Functions
185
  # ========================
186
 
187
  def health_metric_interface(metric):
188
+ map_fig = plot_health_metric(metric)
189
  summary = summarize_health_metrics(metric)
190
+ hist_fig = plot_health_histogram(metric)
191
+ return map_fig, summary, hist_fig
192
 
193
  def correlation_interface(metrics):
194
+ # Require at least two metrics to show correlation
195
+ if len(metrics) < 2:
196
+ return "Please select at least two metrics to see a correlation matrix."
197
  fig = plot_correlation_matrix(metrics)
198
  return fig
199
 
 
211
 
212
  with gr.Blocks(title="US Population Health Dashboard") as demo:
213
  gr.Markdown("# US Population Health Dashboard")
214
+ gr.Markdown("""
215
+ Explore health metrics, socioeconomic data, and their geospatial distributions across the United States.
216
+ Use the tabs below to select different datasets and visualizations.
217
+ """)
218
+
219
+ with gr.Tab("Health Metrics"):
220
+ gr.Markdown("### Explore a Selected Health Metric")
221
+ gr.Markdown("Select a health metric to view its geographical distribution, summary statistics, and distribution histogram.")
222
  health_metric = gr.Dropdown(label="Select a Health Metric", choices=simplified_metrics, value=simplified_metrics[0])
223
+ health_plot = gr.Plot(label="Health Metric Map")
224
+ health_summary = gr.Dataframe(label="Summary Statistics", headers=["Statistic", "Value"])
225
+ health_hist = gr.Plot(label="Metric Distribution Histogram")
226
+ health_metric.change(health_metric_interface, inputs=health_metric, outputs=[health_plot, health_summary, health_hist])
227
 
228
  with gr.Tab("Health Metrics Correlation"):
229
+ gr.Markdown("### Correlation Between Health Metrics")
230
+ gr.Markdown("Select multiple health metrics to see how they correlate with each other. The matrix is reordered using hierarchical clustering.")
231
+ correlation_metrics = gr.CheckboxGroup(
232
+ label="Select Health Metrics",
233
+ choices=simplified_metrics,
234
+ value=simplified_metrics[:5]
235
+ )
236
+ correlation_plot = gr.Plot(label="Correlation Matrix")
237
  correlation_metrics.change(correlation_interface, inputs=correlation_metrics, outputs=correlation_plot)
238
 
239
+ with gr.Tab("Unemployment Rates Over Time"):
240
+ gr.Markdown("### View Unemployment Rates by County")
241
+ gr.Markdown("Select a date to see the unemployment rate distribution across counties.")
242
+ unique_dates = sorted(county_unemployment_melted['date'].unique())
243
+ unemployment_date = gr.Dropdown(label="Select a Date", choices=unique_dates, value=unique_dates[0])
244
+ unemployment_plot = gr.Plot(label="Unemployment Rate Map")
245
+ unemployment_date.change(unemployment_interface, inputs=unemployment_date, outputs=unemployment_plot)
246
+
247
+ with gr.Tab("Poverty Rates Over Time"):
248
+ gr.Markdown("### View Poverty Rates by ZCTA")
249
+ gr.Markdown("Select a year to see the poverty rate distribution across ZIP Code Tabulation Areas.")
250
+ unique_years = sorted(zcta_poverty_melted['year'].unique())
251
+ poverty_year = gr.Dropdown(label="Select a Year", choices=unique_years, value=unique_years[0])
252
+ poverty_plot = gr.Plot(label="Poverty Rate Map")
253
+ poverty_year.change(poverty_interface, inputs=poverty_year, outputs=poverty_plot)
254
 
255
  if __name__ == "__main__":
256
+ demo.launch()