Shrek29 commited on
Commit
ba61270
·
1 Parent(s): 8457ed3

Added: APP.py

Browse files
Files changed (3) hide show
  1. Nuisance_Complaints.csv +0 -0
  2. app.py +224 -0
  3. requirements.txt +76 -0
Nuisance_Complaints.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import folium
7
+ from folium.plugins import HeatMap
8
+ from streamlit_folium import st_folium
9
+ import plotly.express as px
10
+ from datetime import datetime
11
+
12
+ # Set page config
13
+ st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
14
+
15
+ # Title and introduction
16
+ st.title("Nuisance Complaints Analysis Dashboard")
17
+ st.markdown("""
18
+ **Team Members:**
19
+ * Lu Chang ([email protected])
20
+ * Qiming Li ([email protected])
21
+ * Ruchita Alate ([email protected])
22
+ * Shreyas Kulkarni ([email protected])
23
+ * Vishal Devulapalli ([email protected])
24
+ """)
25
+ st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
26
+
27
+ # Load and clean data
28
+ @st.cache_data
29
+ def load_and_clean_data():
30
+ try:
31
+ # Load data
32
+ data = pd.read_csv('Nuisance_Complaints.csv')
33
+
34
+ # Drop rows with missing 'File Number'
35
+ data = data.dropna(subset=['File Number'])
36
+
37
+ # Convert dates and handle date-related columns
38
+ data['Date Reported'] = pd.to_datetime(data['Date Reported'])
39
+ data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
40
+ data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
41
+
42
+ # Handle 'Date Notice Mailed or Given'
43
+ median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
44
+ data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
45
+ data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
46
+
47
+ # Handle 'Type of Complaint'
48
+ data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
49
+
50
+ # Handle 'Disposition'
51
+ most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
52
+ lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
53
+ )
54
+ data['Disposition'] = data.apply(
55
+ lambda row: most_common_disposition[row['Type of Complaint']]
56
+ if pd.isna(row['Disposition']) else row['Disposition'],
57
+ axis=1
58
+ )
59
+
60
+ # Calculate processing time for resolved cases
61
+ data['Processing Time'] = np.where(
62
+ data['File Close Date'].notna(),
63
+ (data['File Close Date'] - data['Date Reported']).dt.days,
64
+ np.nan
65
+ )
66
+
67
+ # Handle 'Method Submitted'
68
+ data.loc[
69
+ (data['Submitted Online?']) & (data['Method Submitted'].isna()),
70
+ 'Method Submitted'
71
+ ] = 'Online'
72
+ data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
73
+
74
+ # Drop rows with missing critical values
75
+ data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
76
+
77
+ # Extract and clean location data
78
+ data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
79
+ data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
80
+
81
+ # Ensure Year Reported is integer
82
+ data['Year Reported'] = data['Year Reported'].astype(int)
83
+
84
+ return data
85
+
86
+ except Exception as e:
87
+ st.error(f"Error in data preprocessing: {str(e)}")
88
+ raise e
89
+
90
+ # Load the data
91
+ try:
92
+ data = load_and_clean_data()
93
+ st.success("Data successfully loaded and cleaned!")
94
+ except Exception as e:
95
+ st.error(f"Error loading data: {str(e)}")
96
+ st.stop()
97
+
98
+ # Create sidebar
99
+
100
+ st.sidebar.header("Dashboard Controls")
101
+
102
+ # Get unique years and convert to list for selectbox
103
+ year_list = sorted(data['Year Reported'].unique().tolist())
104
+ year_options = ['All Time'] + [int(year) for year in year_list] # Convert years to integers
105
+
106
+ selected_year = st.sidebar.selectbox(
107
+ "Select Year",
108
+ options=year_options,
109
+ )
110
+ # Add visualization type selector
111
+ viz_type = st.sidebar.selectbox(
112
+ "Select Visualization",
113
+ ["Complaint Types", "Geographic Distribution", "Resolution Status",
114
+ "Submission Methods", "Complaints by Disposition"]
115
+ )
116
+
117
+
118
+ # Filter data based on selected year
119
+ if selected_year == 'All Time':
120
+ filtered_data = data # Use complete dataset when 'All Time' is selected
121
+ else:
122
+ filtered_data = data[data['Year Reported'] == selected_year]
123
+
124
+ # Update header text
125
+ if selected_year == 'All Time':
126
+ st.header("Analysis for All Time")
127
+ else:
128
+ st.header(f"Analysis for Year {selected_year}")
129
+ # Main content
130
+
131
+ # Create metrics
132
+ # Create metrics
133
+ # Create metrics
134
+ # Create metrics
135
+ # Create metrics
136
+ col1, col2, col3 = st.columns(3)
137
+ with col1:
138
+ st.metric("Total Complaints", len(filtered_data))
139
+ with col2:
140
+ avg_time = filtered_data['Processing Time'].mean()
141
+ st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
142
+ with col3:
143
+ if not filtered_data.empty:
144
+ most_common = filtered_data['Type of Complaint'].value_counts().index[0]
145
+ st.metric("Most Common Type", most_common)
146
+ else:
147
+ st.metric("Most Common Type", "N/A")
148
+ if viz_type == "Complaint Types":
149
+ # Interactive Pie Chart
150
+ st.subheader("Interactive Complaint Types Pie Chart")
151
+ complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
152
+ complaint_counts.columns = ['Complaint Type', 'Count']
153
+
154
+ fig = px.pie(
155
+ complaint_counts,
156
+ names='Complaint Type',
157
+ values='Count',
158
+ title=f'Complaint Types Distribution in {selected_year}',
159
+ hole=0.4 # Donut style
160
+ )
161
+ fig.update_traces(textinfo='percent+label')
162
+ st.plotly_chart(fig, use_container_width=True)
163
+
164
+ elif viz_type == "Geographic Distribution":
165
+ # Clustered Heatmap
166
+ st.subheader("Clustered Heatmap of Complaints")
167
+ map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
168
+ m = folium.Map(location=map_center, zoom_start=12)
169
+
170
+ heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
171
+ HeatMap(heat_data).add_to(m)
172
+
173
+ st_data = st_folium(m, width=700, height=500)
174
+
175
+
176
+ elif viz_type == "Resolution Status":
177
+ st.subheader("Complaint Resolution Status")
178
+ fig, ax = plt.subplots(figsize=(10, 6))
179
+ resolution_counts = filtered_data['Disposition'].value_counts()
180
+ sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
181
+ plt.title(f'Resolution Status Distribution in {selected_year}')
182
+ st.pyplot(fig)
183
+
184
+ elif viz_type == "Submission Methods":
185
+ st.subheader("Submission Methods Analysis")
186
+ fig, ax = plt.subplots(figsize=(10, 6))
187
+ submission_counts = filtered_data['Method Submitted'].value_counts()
188
+ sns.barplot(x=submission_counts.values, y=submission_counts.index)
189
+ plt.title(f'Submission Methods in {selected_year}')
190
+ st.pyplot(fig)
191
+
192
+
193
+ elif viz_type == "Complaints by Disposition":
194
+ st.subheader("Complaints by Disposition")
195
+ disposition_counts = filtered_data['Disposition'].value_counts()
196
+
197
+ if not disposition_counts.empty:
198
+ fig, ax = plt.subplots(figsize=(10, 6))
199
+ sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette="viridis", ax=ax)
200
+ ax.set_title(f'Complaints by Disposition in {selected_year}', fontsize=14)
201
+ ax.set_xlabel('Number of Complaints', fontsize=12)
202
+ ax.set_ylabel('Disposition', fontsize=12)
203
+ st.pyplot(fig)
204
+ else:
205
+ st.write("No data available for the selected year.")
206
+
207
+ # Additional insights
208
+ st.header("Key Insights")
209
+ col1, col2 = st.columns(2)
210
+
211
+ with col1:
212
+ st.subheader("Top 3 Complaint Types")
213
+ top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
214
+ st.write(top_complaints)
215
+
216
+ with col2:
217
+ st.subheader("Resolution Efficiency")
218
+ resolution_rate = (filtered_data['Disposition'].value_counts() /
219
+ len(filtered_data) * 100).round(2)
220
+ st.write(resolution_rate)
221
+
222
+ # Footer
223
+ st.markdown("---")
224
+ st.markdown("Dataset provided by the City of Urbana Open Data Portal")
requirements.txt ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==5.5.0
2
+ asttokens==3.0.0
3
+ attrs==24.2.0
4
+ blinker==1.9.0
5
+ branca==0.8.0
6
+ cachetools==5.5.0
7
+ certifi==2024.8.30
8
+ charset-normalizer==3.4.0
9
+ click==8.1.7
10
+ comm==0.2.2
11
+ contourpy==1.3.1
12
+ cycler==0.12.1
13
+ debugpy==1.8.9
14
+ decorator==5.1.1
15
+ executing==2.1.0
16
+ folium==0.18.0
17
+ fonttools==4.55.1
18
+ gitdb==4.0.11
19
+ GitPython==3.1.43
20
+ idna==3.10
21
+ ipykernel==6.29.5
22
+ ipython==8.30.0
23
+ jedi==0.19.2
24
+ Jinja2==3.1.4
25
+ jsonschema==4.23.0
26
+ jsonschema-specifications==2024.10.1
27
+ jupyter_client==8.6.3
28
+ jupyter_core==5.7.2
29
+ kiwisolver==1.4.7
30
+ markdown-it-py==3.0.0
31
+ MarkupSafe==3.0.2
32
+ matplotlib==3.9.3
33
+ matplotlib-inline==0.1.7
34
+ mdurl==0.1.2
35
+ narwhals==1.15.2
36
+ nest-asyncio==1.6.0
37
+ numpy==2.1.3
38
+ packaging==24.2
39
+ pandas==2.2.3
40
+ parso==0.8.4
41
+ pexpect==4.9.0
42
+ pillow==11.0.0
43
+ platformdirs==4.3.6
44
+ plotly==5.24.1
45
+ prompt_toolkit==3.0.48
46
+ protobuf==5.29.1
47
+ psutil==6.1.0
48
+ ptyprocess==0.7.0
49
+ pure_eval==0.2.3
50
+ pyarrow==18.1.0
51
+ pydeck==0.9.1
52
+ Pygments==2.18.0
53
+ pyparsing==3.2.0
54
+ python-dateutil==2.9.0.post0
55
+ pytz==2024.2
56
+ pyzmq==26.2.0
57
+ referencing==0.35.1
58
+ requests==2.32.3
59
+ rich==13.9.4
60
+ rpds-py==0.22.3
61
+ seaborn==0.13.2
62
+ six==1.17.0
63
+ smmap==5.0.1
64
+ stack-data==0.6.3
65
+ streamlit==1.40.2
66
+ streamlit_folium==0.23.2
67
+ tenacity==9.0.0
68
+ toml==0.10.2
69
+ tornado==6.4.2
70
+ traitlets==5.14.3
71
+ typing_extensions==4.12.2
72
+ tzdata==2024.2
73
+ urllib3==2.2.3
74
+ watchdog==6.0.0
75
+ wcwidth==0.2.13
76
+ xyzservices==2024.9.0