File size: 4,793 Bytes
b940652
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

def load_and_preprocess_data(file_path):
    # Read the data
    df = pd.read_csv(file_path)
    
    # Drop redundant columns
    df = df.drop(['X', 'Y'], axis=1)
    
    # Handle missing values
    df.dropna(subset=['Incidentid', 'DateTime', 'Year', 'Latitude', 'Longitude'], inplace=True)
    
    # Fill numeric values
    numeric = ['Age_Drv1', 'Age_Drv2']
    for col in numeric:
        df[col].fillna(df[col].median(), inplace=True)
    
    # Fill categorical values
    categorical = [
        'Gender_Drv1', 'Violation1_Drv1', 'AlcoholUse_Drv1', 'DrugUse_Drv1',
        'Gender_Drv2', 'Violation1_Drv2', 'AlcoholUse_Drv2', 'DrugUse_Drv2',
        'Unittype_Two', 'Traveldirection_Two', 'Unitaction_Two', 'CrossStreet'
    ]
    for col in categorical:
        df[col].fillna('Unknown', inplace=True)
    
    # Remove invalid ages
    df = df[
        (df['Age_Drv1'] <= 90) & 
        (df['Age_Drv2'] <= 90) & 
        (df['Age_Drv1'] >= 16) & 
        (df['Age_Drv2'] >= 16)
    ]
    
    # Create age groups for both drivers
    df['Age_Group_Drv1'] = pd.cut(
        df['Age_Drv1'],
        bins=[15, 25, 35, 45, 55, 65, 90],
        labels=['16-25', '26-35', '36-45', '46-55', '56-65', '65+']
    )
    
    df['Age_Group_Drv2'] = pd.cut(
        df['Age_Drv2'],
        bins=[15, 25, 35, 45, 55, 65, 90],
        labels=['16-25', '26-35', '36-45', '46-55', '56-65', '65+']
    )
    
    return df

def create_severity_violation_chart(df, selected_age_group=None):
    # Filter by age group if selected
    if selected_age_group:
        df = df[
            (df['Age_Group_Drv1'] == selected_age_group) | 
            (df['Age_Group_Drv2'] == selected_age_group)
        ]
    
    # Create violation categories for both drivers
    violations_drv1 = df.groupby(['Violation1_Drv1', 'Injuryseverity']).size().reset_index(name='count')
    violations_drv2 = df.groupby(['Violation1_Drv2', 'Injuryseverity']).size().reset_index(name='count')
    
    # Combine violations from both drivers
    violations_drv1.columns = ['Violation', 'Severity', 'count']
    violations_drv2.columns = ['Violation', 'Severity', 'count']
    violations_combined = pd.concat([violations_drv1, violations_drv2])
    
    # Aggregate the combined violations
    violations_agg = violations_combined.groupby(['Violation', 'Severity'])['count'].sum().reset_index()
    
    # Create the stacked bar chart
    fig = px.bar(
        violations_agg,
        x='Violation',
        y='count',
        color='Severity',
        title=f'Distribution of Crash Severity by Violation Type {selected_age_group if selected_age_group else ""}',
        labels={'count': 'Number of Incidents', 'Violation': 'Violation Type'},
        height=600
    )
    
    # Customize the layout
    fig.update_layout(
        xaxis_tickangle=-45,
        legend_title='Severity',
        barmode='stack',
        showlegend=True
    )
    
    return fig

def main():
    st.title('Traffic Crash Analysis Dashboard')
    
    # Load data
    df = load_and_preprocess_data('1.08_Crash_Data_Report_(detail).csv')
    
    # Create age group selector
    st.sidebar.header('Filters')
    age_groups = ['All'] + list(df['Age_Group_Drv1'].unique())
    selected_age_group = st.sidebar.selectbox('Select Age Group', age_groups)
    
    # Create and display the chart
    if selected_age_group == 'All':
        fig = create_severity_violation_chart(df)
    else:
        fig = create_severity_violation_chart(df, selected_age_group)
    
    st.plotly_chart(fig, use_container_width=True)
    
    # Add additional insights
    st.subheader('Analysis Insights')
    
    # Calculate and display some statistics
    if selected_age_group == 'All':
        total_crashes = len(df)
    else:
        total_crashes = len(df[
            (df['Age_Group_Drv1'] == selected_age_group) | 
            (df['Age_Group_Drv2'] == selected_age_group)
        ])
    
    st.write(f"Total number of crashes: {total_crashes:,}")
    
    # Show top violations
    st.subheader('Top Violations')
    if selected_age_group == 'All':
        violations = pd.concat([
            df['Violation1_Drv1'].value_counts(),
            df['Violation1_Drv2'].value_counts()
        ]).groupby(level=0).sum()
    else:
        filtered_df = df[
            (df['Age_Group_Drv1'] == selected_age_group) | 
            (df['Age_Group_Drv2'] == selected_age_group)
        ]
        violations = pd.concat([
            filtered_df['Violation1_Drv1'].value_counts(),
            filtered_df['Violation1_Drv2'].value_counts()
        ]).groupby(level=0).sum()
    
    st.write(violations.head())

if __name__ == "__main__":
    main()