hyzhang00 commited on
Commit
b940652
·
verified ·
1 Parent(s): 6eade16

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -0
app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import numpy as np
6
+
7
+ def load_and_preprocess_data(file_path):
8
+ # Read the data
9
+ df = pd.read_csv(file_path)
10
+
11
+ # Drop redundant columns
12
+ df = df.drop(['X', 'Y'], axis=1)
13
+
14
+ # Handle missing values
15
+ df.dropna(subset=['Incidentid', 'DateTime', 'Year', 'Latitude', 'Longitude'], inplace=True)
16
+
17
+ # Fill numeric values
18
+ numeric = ['Age_Drv1', 'Age_Drv2']
19
+ for col in numeric:
20
+ df[col].fillna(df[col].median(), inplace=True)
21
+
22
+ # Fill categorical values
23
+ categorical = [
24
+ 'Gender_Drv1', 'Violation1_Drv1', 'AlcoholUse_Drv1', 'DrugUse_Drv1',
25
+ 'Gender_Drv2', 'Violation1_Drv2', 'AlcoholUse_Drv2', 'DrugUse_Drv2',
26
+ 'Unittype_Two', 'Traveldirection_Two', 'Unitaction_Two', 'CrossStreet'
27
+ ]
28
+ for col in categorical:
29
+ df[col].fillna('Unknown', inplace=True)
30
+
31
+ # Remove invalid ages
32
+ df = df[
33
+ (df['Age_Drv1'] <= 90) &
34
+ (df['Age_Drv2'] <= 90) &
35
+ (df['Age_Drv1'] >= 16) &
36
+ (df['Age_Drv2'] >= 16)
37
+ ]
38
+
39
+ # Create age groups for both drivers
40
+ df['Age_Group_Drv1'] = pd.cut(
41
+ df['Age_Drv1'],
42
+ bins=[15, 25, 35, 45, 55, 65, 90],
43
+ labels=['16-25', '26-35', '36-45', '46-55', '56-65', '65+']
44
+ )
45
+
46
+ df['Age_Group_Drv2'] = pd.cut(
47
+ df['Age_Drv2'],
48
+ bins=[15, 25, 35, 45, 55, 65, 90],
49
+ labels=['16-25', '26-35', '36-45', '46-55', '56-65', '65+']
50
+ )
51
+
52
+ return df
53
+
54
+ def create_severity_violation_chart(df, selected_age_group=None):
55
+ # Filter by age group if selected
56
+ if selected_age_group:
57
+ df = df[
58
+ (df['Age_Group_Drv1'] == selected_age_group) |
59
+ (df['Age_Group_Drv2'] == selected_age_group)
60
+ ]
61
+
62
+ # Create violation categories for both drivers
63
+ violations_drv1 = df.groupby(['Violation1_Drv1', 'Injuryseverity']).size().reset_index(name='count')
64
+ violations_drv2 = df.groupby(['Violation1_Drv2', 'Injuryseverity']).size().reset_index(name='count')
65
+
66
+ # Combine violations from both drivers
67
+ violations_drv1.columns = ['Violation', 'Severity', 'count']
68
+ violations_drv2.columns = ['Violation', 'Severity', 'count']
69
+ violations_combined = pd.concat([violations_drv1, violations_drv2])
70
+
71
+ # Aggregate the combined violations
72
+ violations_agg = violations_combined.groupby(['Violation', 'Severity'])['count'].sum().reset_index()
73
+
74
+ # Create the stacked bar chart
75
+ fig = px.bar(
76
+ violations_agg,
77
+ x='Violation',
78
+ y='count',
79
+ color='Severity',
80
+ title=f'Distribution of Crash Severity by Violation Type {selected_age_group if selected_age_group else ""}',
81
+ labels={'count': 'Number of Incidents', 'Violation': 'Violation Type'},
82
+ height=600
83
+ )
84
+
85
+ # Customize the layout
86
+ fig.update_layout(
87
+ xaxis_tickangle=-45,
88
+ legend_title='Severity',
89
+ barmode='stack',
90
+ showlegend=True
91
+ )
92
+
93
+ return fig
94
+
95
+ def main():
96
+ st.title('Traffic Crash Analysis Dashboard')
97
+
98
+ # Load data
99
+ df = load_and_preprocess_data('1.08_Crash_Data_Report_(detail).csv')
100
+
101
+ # Create age group selector
102
+ st.sidebar.header('Filters')
103
+ age_groups = ['All'] + list(df['Age_Group_Drv1'].unique())
104
+ selected_age_group = st.sidebar.selectbox('Select Age Group', age_groups)
105
+
106
+ # Create and display the chart
107
+ if selected_age_group == 'All':
108
+ fig = create_severity_violation_chart(df)
109
+ else:
110
+ fig = create_severity_violation_chart(df, selected_age_group)
111
+
112
+ st.plotly_chart(fig, use_container_width=True)
113
+
114
+ # Add additional insights
115
+ st.subheader('Analysis Insights')
116
+
117
+ # Calculate and display some statistics
118
+ if selected_age_group == 'All':
119
+ total_crashes = len(df)
120
+ else:
121
+ total_crashes = len(df[
122
+ (df['Age_Group_Drv1'] == selected_age_group) |
123
+ (df['Age_Group_Drv2'] == selected_age_group)
124
+ ])
125
+
126
+ st.write(f"Total number of crashes: {total_crashes:,}")
127
+
128
+ # Show top violations
129
+ st.subheader('Top Violations')
130
+ if selected_age_group == 'All':
131
+ violations = pd.concat([
132
+ df['Violation1_Drv1'].value_counts(),
133
+ df['Violation1_Drv2'].value_counts()
134
+ ]).groupby(level=0).sum()
135
+ else:
136
+ filtered_df = df[
137
+ (df['Age_Group_Drv1'] == selected_age_group) |
138
+ (df['Age_Group_Drv2'] == selected_age_group)
139
+ ]
140
+ violations = pd.concat([
141
+ filtered_df['Violation1_Drv1'].value_counts(),
142
+ filtered_df['Violation1_Drv2'].value_counts()
143
+ ]).groupby(level=0).sum()
144
+
145
+ st.write(violations.head())
146
+
147
+ if __name__ == "__main__":
148
+ main()