hyzhang00 commited on
Commit
ac65f38
·
verified ·
1 Parent(s): 5a6524d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -87
app.py CHANGED
@@ -1,30 +1,23 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import plotly.express as px
4
- import plotly.graph_objects as go
5
- import numpy as np
6
 
7
  def load_and_preprocess_data(file_path):
8
  # Read the data
9
  df = pd.read_csv(file_path)
10
 
11
- # Drop redundant columns
12
  df = df.drop(['X', 'Y'], axis=1)
13
-
14
- # Handle missing values
15
  df.dropna(subset=['Incidentid', 'DateTime', 'Year', 'Latitude', 'Longitude'], inplace=True)
16
 
17
- # Fill numeric values
18
  numeric = ['Age_Drv1', 'Age_Drv2']
19
  for col in numeric:
20
  df[col].fillna(df[col].median(), inplace=True)
21
-
22
- # Fill categorical values
23
- categorical = [
24
- 'Gender_Drv1', 'Violation1_Drv1', 'AlcoholUse_Drv1', 'DrugUse_Drv1',
25
- 'Gender_Drv2', 'Violation1_Drv2', 'AlcoholUse_Drv2', 'DrugUse_Drv2',
26
- 'Unittype_Two', 'Traveldirection_Two', 'Unitaction_Two', 'CrossStreet'
27
- ]
28
  for col in categorical:
29
  df[col].fillna('Unknown', inplace=True)
30
 
@@ -36,113 +29,73 @@ def load_and_preprocess_data(file_path):
36
  (df['Age_Drv2'] >= 16)
37
  ]
38
 
39
- # Create age groups for both drivers
40
- df['Age_Group_Drv1'] = pd.cut(
41
- df['Age_Drv1'],
42
- bins=[15, 25, 35, 45, 55, 65, 90],
43
- labels=['16-25', '26-35', '36-45', '46-55', '56-65', '65+']
44
- )
45
 
46
- df['Age_Group_Drv2'] = pd.cut(
47
- df['Age_Drv2'],
48
- bins=[15, 25, 35, 45, 55, 65, 90],
49
- labels=['16-25', '26-35', '36-45', '46-55', '56-65', '65+']
50
- )
51
 
52
  return df
53
 
54
- def create_severity_violation_chart(df, selected_age_group=None):
55
- # Filter by age group if selected
56
- if selected_age_group:
57
- df = df[
58
- (df['Age_Group_Drv1'] == selected_age_group) |
59
- (df['Age_Group_Drv2'] == selected_age_group)
60
- ]
61
-
62
- # Create violation categories for both drivers
63
- violations_drv1 = df.groupby(['Violation1_Drv1', 'Injuryseverity']).size().reset_index(name='count')
64
- violations_drv2 = df.groupby(['Violation1_Drv2', 'Injuryseverity']).size().reset_index(name='count')
65
 
66
  # Combine violations from both drivers
67
- violations_drv1.columns = ['Violation', 'Severity', 'count']
68
- violations_drv2.columns = ['Violation', 'Severity', 'count']
69
- violations_combined = pd.concat([violations_drv1, violations_drv2])
 
 
70
 
71
- # Aggregate the combined violations
72
- violations_agg = violations_combined.groupby(['Violation', 'Severity'])['count'].sum().reset_index()
73
 
74
- # Create the stacked bar chart
75
  fig = px.bar(
76
- violations_agg,
77
  x='Violation',
78
  y='count',
79
  color='Severity',
80
- title=f'Distribution of Crash Severity by Violation Type {selected_age_group if selected_age_group else ""}',
81
  labels={'count': 'Number of Incidents', 'Violation': 'Violation Type'},
82
  height=600
83
  )
84
 
85
- # Customize the layout
86
  fig.update_layout(
87
  xaxis_tickangle=-45,
88
- legend_title='Severity',
89
- barmode='stack',
90
- showlegend=True
91
  )
92
 
93
  return fig
94
 
95
  def main():
96
- st.title('Traffic Crash Analysis Dashboard')
97
 
98
  # Load data
99
  df = load_and_preprocess_data('1.08_Crash_Data_Report_(detail).csv')
100
 
101
- # Create age group selector
102
- st.sidebar.header('Filters')
103
- age_groups = ['All'] + list(df['Age_Group_Drv1'].unique())
104
- selected_age_group = st.sidebar.selectbox('Select Age Group', age_groups)
105
-
106
- # Create and display the chart
107
- if selected_age_group == 'All':
108
- fig = create_severity_violation_chart(df)
109
- else:
110
- fig = create_severity_violation_chart(df, selected_age_group)
111
 
 
 
112
  st.plotly_chart(fig, use_container_width=True)
113
 
114
- # Add additional insights
115
- st.subheader('Analysis Insights')
116
-
117
- # Calculate and display some statistics
118
- if selected_age_group == 'All':
119
- total_crashes = len(df)
120
  else:
121
- total_crashes = len(df[
122
- (df['Age_Group_Drv1'] == selected_age_group) |
123
- (df['Age_Group_Drv2'] == selected_age_group)
124
  ])
125
 
126
- st.write(f"Total number of crashes: {total_crashes:,}")
127
-
128
- # Show top violations
129
- st.subheader('Top Violations')
130
- if selected_age_group == 'All':
131
- violations = pd.concat([
132
- df['Violation1_Drv1'].value_counts(),
133
- df['Violation1_Drv2'].value_counts()
134
- ]).groupby(level=0).sum()
135
- else:
136
- filtered_df = df[
137
- (df['Age_Group_Drv1'] == selected_age_group) |
138
- (df['Age_Group_Drv2'] == selected_age_group)
139
- ]
140
- violations = pd.concat([
141
- filtered_df['Violation1_Drv1'].value_counts(),
142
- filtered_df['Violation1_Drv2'].value_counts()
143
- ]).groupby(level=0).sum()
144
-
145
- st.write(violations.head())
146
 
147
  if __name__ == "__main__":
148
  main()
 
1
  import streamlit as st
2
  import pandas as pd
3
  import plotly.express as px
 
 
4
 
5
  def load_and_preprocess_data(file_path):
6
  # Read the data
7
  df = pd.read_csv(file_path)
8
 
9
+ # Basic preprocessing
10
  df = df.drop(['X', 'Y'], axis=1)
 
 
11
  df.dropna(subset=['Incidentid', 'DateTime', 'Year', 'Latitude', 'Longitude'], inplace=True)
12
 
13
+ # Fill missing values
14
  numeric = ['Age_Drv1', 'Age_Drv2']
15
  for col in numeric:
16
  df[col].fillna(df[col].median(), inplace=True)
17
+
18
+ categorical = ['Gender_Drv1', 'Violation1_Drv1', 'AlcoholUse_Drv1', 'DrugUse_Drv1',
19
+ 'Gender_Drv2', 'Violation1_Drv2', 'AlcoholUse_Drv2', 'DrugUse_Drv2',
20
+ 'Unittype_Two', 'Traveldirection_Two', 'Unitaction_Two', 'CrossStreet']
 
 
 
21
  for col in categorical:
22
  df[col].fillna('Unknown', inplace=True)
23
 
 
29
  (df['Age_Drv2'] >= 16)
30
  ]
31
 
32
+ # Create age groups
33
+ bins = [15, 25, 35, 45, 55, 65, 90]
34
+ labels = ['16-25', '26-35', '36-45', '46-55', '56-65', '65+']
 
 
 
35
 
36
+ df['Age_Group_Drv1'] = pd.cut(df['Age_Drv1'], bins=bins, labels=labels)
37
+ df['Age_Group_Drv2'] = pd.cut(df['Age_Drv2'], bins=bins, labels=labels)
 
 
 
38
 
39
  return df
40
 
41
+ def create_severity_violation_chart(df, age_group=None):
42
+ # Apply age group filter if selected
43
+ if age_group != 'All Ages':
44
+ df = df[(df['Age_Group_Drv1'] == age_group) | (df['Age_Group_Drv2'] == age_group)]
 
 
 
 
 
 
 
45
 
46
  # Combine violations from both drivers
47
+ violations_1 = df.groupby(['Violation1_Drv1', 'Injuryseverity']).size().reset_index(name='count')
48
+ violations_2 = df.groupby(['Violation1_Drv2', 'Injuryseverity']).size().reset_index(name='count')
49
+
50
+ violations_1.columns = ['Violation', 'Severity', 'count']
51
+ violations_2.columns = ['Violation', 'Severity', 'count']
52
 
53
+ violations = pd.concat([violations_1, violations_2])
54
+ violations = violations.groupby(['Violation', 'Severity'])['count'].sum().reset_index()
55
 
56
+ # Create visualization
57
  fig = px.bar(
58
+ violations,
59
  x='Violation',
60
  y='count',
61
  color='Severity',
62
+ title=f'Crash Severity Distribution by Violation Type - {age_group}',
63
  labels={'count': 'Number of Incidents', 'Violation': 'Violation Type'},
64
  height=600
65
  )
66
 
 
67
  fig.update_layout(
68
  xaxis_tickangle=-45,
69
+ legend_title='Severity Level',
70
+ barmode='stack'
 
71
  )
72
 
73
  return fig
74
 
75
  def main():
76
+ st.title('Traffic Crash Analysis')
77
 
78
  # Load data
79
  df = load_and_preprocess_data('1.08_Crash_Data_Report_(detail).csv')
80
 
81
+ # Create simple dropdown for age groups
82
+ age_groups = ['All Ages', '16-25', '26-35', '36-45', '46-55', '56-65', '65+']
83
+ selected_age = st.selectbox('Select Age Group:', age_groups)
 
 
 
 
 
 
 
84
 
85
+ # Create and display chart
86
+ fig = create_severity_violation_chart(df, selected_age)
87
  st.plotly_chart(fig, use_container_width=True)
88
 
89
+ # Display basic statistics
90
+ if selected_age == 'All Ages':
91
+ total_incidents = len(df)
 
 
 
92
  else:
93
+ total_incidents = len(df[
94
+ (df['Age_Group_Drv1'] == selected_age) |
95
+ (df['Age_Group_Drv2'] == selected_age)
96
  ])
97
 
98
+ st.write(f"Total incidents for {selected_age}: {total_incidents:,}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  if __name__ == "__main__":
101
  main()