Spaces:
Sleeping
Sleeping
File size: 4,793 Bytes
b940652 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
def load_and_preprocess_data(file_path):
# Read the data
df = pd.read_csv(file_path)
# Drop redundant columns
df = df.drop(['X', 'Y'], axis=1)
# Handle missing values
df.dropna(subset=['Incidentid', 'DateTime', 'Year', 'Latitude', 'Longitude'], inplace=True)
# Fill numeric values
numeric = ['Age_Drv1', 'Age_Drv2']
for col in numeric:
df[col].fillna(df[col].median(), inplace=True)
# Fill categorical values
categorical = [
'Gender_Drv1', 'Violation1_Drv1', 'AlcoholUse_Drv1', 'DrugUse_Drv1',
'Gender_Drv2', 'Violation1_Drv2', 'AlcoholUse_Drv2', 'DrugUse_Drv2',
'Unittype_Two', 'Traveldirection_Two', 'Unitaction_Two', 'CrossStreet'
]
for col in categorical:
df[col].fillna('Unknown', inplace=True)
# Remove invalid ages
df = df[
(df['Age_Drv1'] <= 90) &
(df['Age_Drv2'] <= 90) &
(df['Age_Drv1'] >= 16) &
(df['Age_Drv2'] >= 16)
]
# Create age groups for both drivers
df['Age_Group_Drv1'] = pd.cut(
df['Age_Drv1'],
bins=[15, 25, 35, 45, 55, 65, 90],
labels=['16-25', '26-35', '36-45', '46-55', '56-65', '65+']
)
df['Age_Group_Drv2'] = pd.cut(
df['Age_Drv2'],
bins=[15, 25, 35, 45, 55, 65, 90],
labels=['16-25', '26-35', '36-45', '46-55', '56-65', '65+']
)
return df
def create_severity_violation_chart(df, selected_age_group=None):
# Filter by age group if selected
if selected_age_group:
df = df[
(df['Age_Group_Drv1'] == selected_age_group) |
(df['Age_Group_Drv2'] == selected_age_group)
]
# Create violation categories for both drivers
violations_drv1 = df.groupby(['Violation1_Drv1', 'Injuryseverity']).size().reset_index(name='count')
violations_drv2 = df.groupby(['Violation1_Drv2', 'Injuryseverity']).size().reset_index(name='count')
# Combine violations from both drivers
violations_drv1.columns = ['Violation', 'Severity', 'count']
violations_drv2.columns = ['Violation', 'Severity', 'count']
violations_combined = pd.concat([violations_drv1, violations_drv2])
# Aggregate the combined violations
violations_agg = violations_combined.groupby(['Violation', 'Severity'])['count'].sum().reset_index()
# Create the stacked bar chart
fig = px.bar(
violations_agg,
x='Violation',
y='count',
color='Severity',
title=f'Distribution of Crash Severity by Violation Type {selected_age_group if selected_age_group else ""}',
labels={'count': 'Number of Incidents', 'Violation': 'Violation Type'},
height=600
)
# Customize the layout
fig.update_layout(
xaxis_tickangle=-45,
legend_title='Severity',
barmode='stack',
showlegend=True
)
return fig
def main():
st.title('Traffic Crash Analysis Dashboard')
# Load data
df = load_and_preprocess_data('1.08_Crash_Data_Report_(detail).csv')
# Create age group selector
st.sidebar.header('Filters')
age_groups = ['All'] + list(df['Age_Group_Drv1'].unique())
selected_age_group = st.sidebar.selectbox('Select Age Group', age_groups)
# Create and display the chart
if selected_age_group == 'All':
fig = create_severity_violation_chart(df)
else:
fig = create_severity_violation_chart(df, selected_age_group)
st.plotly_chart(fig, use_container_width=True)
# Add additional insights
st.subheader('Analysis Insights')
# Calculate and display some statistics
if selected_age_group == 'All':
total_crashes = len(df)
else:
total_crashes = len(df[
(df['Age_Group_Drv1'] == selected_age_group) |
(df['Age_Group_Drv2'] == selected_age_group)
])
st.write(f"Total number of crashes: {total_crashes:,}")
# Show top violations
st.subheader('Top Violations')
if selected_age_group == 'All':
violations = pd.concat([
df['Violation1_Drv1'].value_counts(),
df['Violation1_Drv2'].value_counts()
]).groupby(level=0).sum()
else:
filtered_df = df[
(df['Age_Group_Drv1'] == selected_age_group) |
(df['Age_Group_Drv2'] == selected_age_group)
]
violations = pd.concat([
filtered_df['Violation1_Drv1'].value_counts(),
filtered_df['Violation1_Drv2'].value_counts()
]).groupby(level=0).sum()
st.write(violations.head())
if __name__ == "__main__":
main() |