lyimo commited on
Commit
ab05130
·
verified ·
1 Parent(s): 8a7ba4a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +448 -0
app.py ADDED
@@ -0,0 +1,448 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import plotly.express as px
5
+ import plotly.graph_objs as go
6
+ import folium
7
+ from streamlit_folium import st_folium
8
+ from datetime import timedelta
9
+
10
+ # ----------------------------------------------------
11
+ # 1. Load data
12
+ # ----------------------------------------------------
13
+ @st.cache_data
14
+ def load_data():
15
+ # Load daily and monthly CSV from local files (or a URL if needed)
16
+ daily_df = pd.read_csv("daily_data_2013_2024.csv", parse_dates=["date"])
17
+ monthly_df = pd.read_csv("monthly_data_2013_2024.csv")
18
+ # If monthly_df also needs a 'date' column for plotting, you can create:
19
+ # monthly_df["date"] = pd.to_datetime(monthly_df["year"].astype(str) + "-" + monthly_df["month"].astype(str) + "-01")
20
+ return daily_df, monthly_df
21
+
22
+ daily_data, monthly_data = load_data()
23
+
24
+ # Pre-define your location dictionary so we can map lat/lon
25
+ LOCATIONS = {
26
+ "Karagwe": {"lat": -1.7718, "lon": 30.9876},
27
+ "Masasi": {"lat": -10.7167, "lon": 38.8000},
28
+ "Igunga": {"lat": -4.2833, "lon": 33.8833}
29
+ }
30
+
31
+ # ----------------------------------------------------
32
+ # 2. Streamlit UI Layout
33
+ # ----------------------------------------------------
34
+ st.title("Malaria & Dengue Outbreak Analysis (2013–2024)")
35
+
36
+ st.sidebar.header("Filters & Options")
37
+
38
+ # Choose disease type to focus on
39
+ disease_choice = st.sidebar.radio("Select Disease", ["Malaria", "Dengue"], index=0)
40
+
41
+ # Choose data granularity
42
+ data_choice = st.sidebar.radio("Data Granularity", ["Monthly", "Daily"], index=0)
43
+
44
+ # Let user filter location(s)
45
+ location_list = list(LOCATIONS.keys())
46
+ selected_locations = st.sidebar.multiselect("Select Location(s)", location_list, default=location_list)
47
+
48
+ # For monthly data, let user select a year range
49
+ if data_choice == "Monthly":
50
+ year_min = int(monthly_data["year"].min())
51
+ year_max = int(monthly_data["year"].max())
52
+ year_range = st.sidebar.slider(
53
+ "Select Year Range",
54
+ min_value=year_min,
55
+ max_value=year_max,
56
+ value=(year_min, year_max),
57
+ step=1
58
+ )
59
+ # For daily data, let user select a date range
60
+ else:
61
+ date_min = daily_data["date"].min()
62
+ date_max = daily_data["date"].max()
63
+ date_range = st.sidebar.date_input(
64
+ "Select Date Range",
65
+ [date_min, date_max],
66
+ min_value=date_min,
67
+ max_value=date_max
68
+ )
69
+
70
+ # ----------------------------------------------------
71
+ # 3. Filter data based on user input
72
+ # ----------------------------------------------------
73
+ if data_choice == "Monthly":
74
+ # Subset monthly data for selected locations
75
+ df = monthly_data[monthly_data["location"].isin(selected_locations)].copy()
76
+ # Filter year range
77
+ df = df[(df["year"] >= year_range[0]) & (df["year"] <= year_range[1])]
78
+
79
+ # Create a "date" column for monthly plotting
80
+ df["date"] = pd.to_datetime(df["year"].astype(str) + "-" + df["month"].astype(str) + "-01")
81
+
82
+ else:
83
+ # Subset daily data
84
+ df = daily_data[daily_data["location"].isin(selected_locations)].copy()
85
+ # Filter date range
86
+ df = df[(df["date"] >= pd.to_datetime(date_range[0])) & (df["date"] <= pd.to_datetime(date_range[1]))]
87
+
88
+ # ----------------------------------------------------
89
+ # 4. Interactive Plotly Time-Series (Original)
90
+ # ----------------------------------------------------
91
+ st.subheader(f"{data_choice} {disease_choice} Risk & Climate Parameters")
92
+
93
+ # Decide which columns are relevant for risk
94
+ risk_col = "malaria_risk" if disease_choice == "Malaria" else "dengue_risk"
95
+
96
+ if data_choice == "Monthly":
97
+ # Plot a line chart of risk vs. date
98
+ fig = px.line(
99
+ df,
100
+ x="date",
101
+ y=risk_col,
102
+ color="location",
103
+ title=f"{disease_choice} Risk Over Time ({data_choice})"
104
+ )
105
+ fig.update_layout(yaxis_title="Risk (0–1)")
106
+ st.plotly_chart(fig, use_container_width=True)
107
+
108
+ # Temperature & Rainfall side-by-side
109
+ col1, col2 = st.columns(2)
110
+ with col1:
111
+ fig_temp = px.line(
112
+ df,
113
+ x="date",
114
+ y="temp_avg",
115
+ color="location",
116
+ title="Average Temperature (°C)"
117
+ )
118
+ st.plotly_chart(fig_temp, use_container_width=True)
119
+ with col2:
120
+ # 'monthly_rainfall_mm' is total monthly rainfall
121
+ fig_rain = px.line(
122
+ df,
123
+ x="date",
124
+ y="monthly_rainfall_mm",
125
+ color="location",
126
+ title="Monthly Rainfall (mm)"
127
+ )
128
+ st.plotly_chart(fig_rain, use_container_width=True)
129
+
130
+ # Show outbreak flags if focusing on monthly
131
+ if disease_choice == "Malaria":
132
+ flag_col = "malaria_outbreak"
133
+ else:
134
+ flag_col = "dengue_outbreak"
135
+
136
+ outbreak_months = df[df[flag_col] == True]
137
+ if not outbreak_months.empty:
138
+ st.write(f"**Months with likely {disease_choice} outbreak:**")
139
+ st.dataframe(outbreak_months[[
140
+ "location","year","month","temp_avg",
141
+ "humidity","monthly_rainfall_mm",flag_col
142
+ ]])
143
+ else:
144
+ st.write(f"No months meet the {disease_choice} outbreak criteria in this selection.")
145
+
146
+ else:
147
+ # For daily data, plot daily risk
148
+ fig = px.line(
149
+ df,
150
+ x="date",
151
+ y=risk_col,
152
+ color="location",
153
+ title=f"{disease_choice} Daily Risk Over Time (2013–2024)"
154
+ )
155
+ fig.update_layout(yaxis_title="Risk (0–1)")
156
+ st.plotly_chart(fig, use_container_width=True)
157
+
158
+ # Temperature & Rainfall side-by-side
159
+ col1, col2 = st.columns(2)
160
+ with col1:
161
+ fig_temp = px.line(
162
+ df,
163
+ x="date",
164
+ y="temp_avg",
165
+ color="location",
166
+ title="Daily Avg Temperature (°C)"
167
+ )
168
+ st.plotly_chart(fig_temp, use_container_width=True)
169
+ with col2:
170
+ fig_rain = px.line(
171
+ df,
172
+ x="date",
173
+ y="daily_rainfall_mm",
174
+ color="location",
175
+ title="Daily Rainfall (mm)"
176
+ )
177
+ st.plotly_chart(fig_rain, use_container_width=True)
178
+
179
+ # ----------------------------------------------------
180
+ # 5. Correlation Heatmap (Original)
181
+ # ----------------------------------------------------
182
+ st.subheader(f"Correlation Heatmap - {data_choice} Data")
183
+
184
+ # Option to choose correlation method
185
+ corr_method = st.selectbox("Correlation Method", ["pearson", "spearman"], index=0)
186
+
187
+ # We'll pick relevant numeric columns
188
+ if data_choice == "Monthly":
189
+ subset_cols = ["temp_avg", "humidity", "monthly_rainfall_mm", "malaria_risk", "dengue_risk"]
190
+ else:
191
+ subset_cols = ["temp_avg", "humidity", "daily_rainfall_mm", "malaria_risk", "dengue_risk"]
192
+
193
+ corr_df = df[subset_cols].corr(method=corr_method)
194
+ fig_corr = px.imshow(
195
+ corr_df,
196
+ text_auto=True,
197
+ aspect="auto",
198
+ title=f"Correlation Matrix of Weather & Risk ({corr_method.capitalize()})"
199
+ )
200
+ st.plotly_chart(fig_corr, use_container_width=True)
201
+
202
+ # ----------------------------------------------------
203
+ # 6. Interactive Map (Original)
204
+ # ----------------------------------------------------
205
+ st.subheader("Interactive Map")
206
+ st.markdown(
207
+ """
208
+ **Note**: We only have 3 locations. Each marker popup shows some aggregated
209
+ stats for the displayed data range.
210
+ """
211
+ )
212
+
213
+ # Create a base map centered roughly in Tanzania
214
+ m = folium.Map(location=[-6.0, 35.0], zoom_start=6)
215
+
216
+ # Show monthly or daily aggregates in the popups
217
+ if data_choice == "Monthly":
218
+ for loc in selected_locations:
219
+ loc_info = LOCATIONS[loc]
220
+ loc_df = df[df["location"] == loc]
221
+ if loc_df.empty:
222
+ continue
223
+ # Basic stats
224
+ avg_risk = loc_df[risk_col].mean()
225
+ avg_temp = loc_df["temp_avg"].mean()
226
+ avg_rain = loc_df["monthly_rainfall_mm"].mean()
227
+
228
+ # Build popup HTML
229
+ popup_html = f"""
230
+ <b>{loc}</b><br/>
231
+ Disease: {disease_choice}<br/>
232
+ Avg Risk (in selection): {avg_risk:.2f}<br/>
233
+ Avg Temp (°C): {avg_temp:.2f}<br/>
234
+ Avg Rainfall (mm): {avg_rain:.2f}<br/>
235
+ """
236
+ folium.Marker(
237
+ location=[loc_info["lat"], loc_info["lon"]],
238
+ popup=popup_html,
239
+ tooltip=f"{loc} ({disease_choice})"
240
+ ).add_to(m)
241
+ else:
242
+ # Daily data
243
+ for loc in selected_locations:
244
+ loc_info = LOCATIONS[loc]
245
+ loc_df = df[df["location"] == loc]
246
+ if loc_df.empty:
247
+ continue
248
+ avg_risk = loc_df[risk_col].mean()
249
+ avg_temp = loc_df["temp_avg"].mean()
250
+ avg_rain = loc_df["daily_rainfall_mm"].mean()
251
+
252
+ popup_html = f"""
253
+ <b>{loc}</b><br/>
254
+ Disease: {disease_choice}<br/>
255
+ Avg Risk (in selection): {avg_risk:.2f}<br/>
256
+ Avg Temp (°C): {avg_temp:.2f}<br/>
257
+ Avg Rain (mm/day): {avg_rain:.2f}<br/>
258
+ """
259
+ folium.Marker(
260
+ location=[loc_info["lat"], loc_info["lon"]],
261
+ popup=popup_html,
262
+ tooltip=f"{loc} ({disease_choice})"
263
+ ).add_to(m)
264
+
265
+ # Render Folium map in Streamlit
266
+ st_data = st_folium(m, width=700, height=500)
267
+
268
+ # ----------------------------------------------------
269
+ # 7. Additional Explorations (New Features)
270
+ # ----------------------------------------------------
271
+ st.header("Additional Explorations")
272
+
273
+ ###############################################################################
274
+ # 7.1 Compare Malaria & Dengue Risk Side-by-Side (same chart) for the same data
275
+ ###############################################################################
276
+ st.subheader("Compare Malaria & Dengue Risk Over Time")
277
+ compare_both = st.checkbox("Compare Both Diseases on One Plot")
278
+
279
+ if compare_both:
280
+ # We'll create two columns for Malaria & Dengue in the same DF subset
281
+ # Already have "malaria_risk" and "dengue_risk" in the data
282
+ # Filter the same df but plot them together:
283
+
284
+ # Convert to "long" format for easy plotting with Plotly
285
+ # e.g. columns: date, location, disease, risk
286
+ if data_choice == "Monthly":
287
+ # We have date, location, malaria_risk, dengue_risk
288
+ df_long = df.melt(
289
+ id_vars=["date","location","temp_avg","humidity"],
290
+ value_vars=["malaria_risk","dengue_risk"],
291
+ var_name="disease",
292
+ value_name="risk"
293
+ )
294
+ else:
295
+ df_long = df.melt(
296
+ id_vars=["date","location","temp_avg","humidity"],
297
+ value_vars=["malaria_risk","dengue_risk"],
298
+ var_name="disease",
299
+ value_name="risk"
300
+ )
301
+
302
+ # We only want to show locations user selected, but the df is already filtered
303
+ # so just plot:
304
+ title_str = "Malaria vs. Dengue Risk"
305
+ fig_compare = px.line(
306
+ df_long,
307
+ x="date",
308
+ y="risk",
309
+ color="location",
310
+ line_dash="disease",
311
+ title=title_str
312
+ )
313
+ fig_compare.update_layout(yaxis_title="Risk (0–1)")
314
+ st.plotly_chart(fig_compare, use_container_width=True)
315
+
316
+ ##################################################
317
+ # 7.2 Scatter Matrix (Pairwise relationships)
318
+ ##################################################
319
+ st.subheader("Scatter Matrix of Risk & Weather Parameters")
320
+
321
+ # Let user choose which columns to include (besides the default subset)
322
+ scatter_cols = st.multiselect(
323
+ "Choose additional columns to include in Scatter Matrix (besides risk & weather).",
324
+ ["temp_avg","humidity","monthly_rainfall_mm","daily_rainfall_mm","malaria_risk","dengue_risk"],
325
+ default=["temp_avg","humidity","malaria_risk","dengue_risk"]
326
+ )
327
+
328
+ if len(scatter_cols) < 2:
329
+ st.warning("Please select at least two columns to generate a scatter matrix.")
330
+ else:
331
+ # Prepare data for scatter matrix
332
+ sm_df = df[scatter_cols].copy()
333
+ # For monthly vs daily, the rainfall column might differ
334
+ # If user selected 'monthly_rainfall_mm' but the data is daily, that column might not exist.
335
+ # So we can drop missing columns gracefully:
336
+ sm_df = sm_df.dropna(axis=1, how='all')
337
+
338
+ # Using Plotly's scatter_matrix:
339
+ fig_sm = px.scatter_matrix(
340
+ sm_df,
341
+ dimensions=sm_df.columns,
342
+ title="Scatter Matrix",
343
+ color_discrete_sequence=["#636EFA"] # Adjust color if you like
344
+ )
345
+ fig_sm.update_layout(width=800, height=800)
346
+ st.plotly_chart(fig_sm, use_container_width=True)
347
+
348
+ ##################################################
349
+ # 7.3 Simple Time-Lag Correlation (Example)
350
+ ##################################################
351
+ st.subheader("Time-Lag Correlation (Experimental)")
352
+
353
+ st.markdown("""
354
+ Here, you can experiment with a simple lag analysis. For example, check how
355
+ temperature or rainfall in previous weeks/months correlates with **current**
356
+ Malaria/Dengue risk.
357
+ """)
358
+
359
+ time_lag = st.slider("Select Lag (days) to shift weather parameters", min_value=0, max_value=60, value=0, step=5)
360
+
361
+ # Example: Shift rainfall & temperature columns by the selected lag and see correlation with disease risk
362
+ df_lag = df.copy()
363
+
364
+ if data_choice == "Daily" and time_lag > 0:
365
+ # Shift daily rainfall/temperature backward by 'time_lag' days
366
+ df_lag = df_lag.sort_values("date") # ensure sorted by date
367
+ df_lag["temp_avg_lag"] = df_lag.groupby("location")["temp_avg"].shift(time_lag)
368
+ df_lag["rain_lag"] = df_lag.groupby("location")["daily_rainfall_mm"].shift(time_lag)
369
+ # If we want to see correlation with today's risk
370
+ # we can drop rows with NaN in the lag columns
371
+ df_lag.dropna(subset=["temp_avg_lag","rain_lag"], inplace=True)
372
+
373
+ elif data_choice == "Monthly" and time_lag > 0:
374
+ # Shift monthly rainfall & temp by 'time_lag' (in days) => must approximate?
375
+ # We'll interpret the slider as months if data is monthly.
376
+ # But that might not be precise if "time_lag" is in days. For simplicity, we convert days -> months ~ 30 days
377
+ month_lag = time_lag // 30 # approximate conversion
378
+ if month_lag > 0:
379
+ df_lag = df_lag.sort_values("date")
380
+ df_lag["temp_avg_lag"] = df_lag.groupby("location")["temp_avg"].shift(month_lag)
381
+ df_lag["rain_lag"] = df_lag.groupby("location")["monthly_rainfall_mm"].shift(month_lag)
382
+ df_lag.dropna(subset=["temp_avg_lag","rain_lag"], inplace=True)
383
+
384
+ # Now we compute correlation between risk_col and these lagged columns, if they exist
385
+ if "temp_avg_lag" in df_lag.columns and "rain_lag" in df_lag.columns:
386
+ lag_corr_temp = df_lag[risk_col].corr(df_lag["temp_avg_lag"], method=corr_method)
387
+ lag_corr_rain = df_lag[risk_col].corr(df_lag["rain_lag"], method=corr_method)
388
+
389
+ st.write(f"**Correlation between {disease_choice} Risk and lagged Temperature**: {lag_corr_temp:.3f}")
390
+ st.write(f"**Correlation between {disease_choice} Risk and lagged Rainfall**: {lag_corr_rain:.3f}")
391
+ else:
392
+ st.write("No lag columns or lag is set to 0. Increase the lag to see results.")
393
+
394
+ ##################################################
395
+ # 7.4 Outbreak Statistics
396
+ ##################################################
397
+ st.subheader("Outbreak Statistics")
398
+
399
+ st.markdown("""
400
+ This section gives you the **count** of outbreak periods based on user selection
401
+ and some summary statistics.
402
+ """)
403
+
404
+ if disease_choice == "Malaria":
405
+ outbreak_flag_col = "malaria_outbreak"
406
+ else:
407
+ outbreak_flag_col = "dengue_outbreak"
408
+
409
+ # Summarize outbreak by location
410
+ if outbreak_flag_col in df.columns:
411
+ outbreak_count_by_loc = df[df[outbreak_flag_col] == True].groupby("location").size().reset_index(name="outbreak_count")
412
+ st.write("**Number of outbreak instances (in current selection) by location:**")
413
+ st.dataframe(outbreak_count_by_loc)
414
+ else:
415
+ st.write(f"No outbreak flag column found for {disease_choice}.")
416
+
417
+ # Show average temperature, rainfall, humidity during outbreak vs non-outbreak
418
+ if outbreak_flag_col in df.columns:
419
+ with st.expander("Compare Weather Averages During Outbreak vs. Non-Outbreak"):
420
+ outbreak_df = df[df[outbreak_flag_col] == True]
421
+ non_outbreak_df = df[df[outbreak_flag_col] == False]
422
+
423
+ if not outbreak_df.empty:
424
+ avg_temp_outbreak = outbreak_df["temp_avg"].mean()
425
+ avg_hum_outbreak = outbreak_df["humidity"].mean()
426
+ if data_choice == "Daily":
427
+ avg_rain_outbreak = outbreak_df["daily_rainfall_mm"].mean()
428
+ else:
429
+ avg_rain_outbreak = outbreak_df["monthly_rainfall_mm"].mean()
430
+
431
+ avg_temp_non = non_outbreak_df["temp_avg"].mean()
432
+ avg_hum_non = non_outbreak_df["humidity"].mean()
433
+ if data_choice == "Daily":
434
+ avg_rain_non = non_outbreak_df["daily_rainfall_mm"].mean()
435
+ else:
436
+ avg_rain_non = non_outbreak_df["monthly_rainfall_mm"].mean()
437
+
438
+ st.write(f"**Outbreak Periods** ({disease_choice}):")
439
+ st.write(f"- Avg Temperature: {avg_temp_outbreak:.2f} °C")
440
+ st.write(f"- Avg Humidity: {avg_hum_outbreak:.2f}%")
441
+ st.write(f"- Avg Rainfall: {avg_rain_outbreak:.2f} mm")
442
+
443
+ st.write(f"**Non-Outbreak Periods** ({disease_choice}):")
444
+ st.write(f"- Avg Temperature: {avg_temp_non:.2f} °C")
445
+ st.write(f"- Avg Humidity: {avg_hum_non:.2f}%")
446
+ st.write(f"- Avg Rainfall: {avg_rain_non:.2f} mm")
447
+ else:
448
+ st.write(f"No {disease_choice} outbreaks found in the current selection.")