Spaces:
Sleeping
Sleeping
Commit
Β·
d7f893a
1
Parent(s):
acca66d
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pydeck as pdk
|
5 |
+
import plotly.express as px
|
6 |
+
|
7 |
+
DATE_TIME = "date/time"
|
8 |
+
DATA_URL = (
|
9 |
+
"Motor_Vehicle_Collisions_-_Crashes.csv"
|
10 |
+
)
|
11 |
+
|
12 |
+
st.title("Motor Vehicle Collisions in New York City")
|
13 |
+
st.markdown("This application is a Streamlit dashboard that can be used "
|
14 |
+
"to analyze motor vehicle collisions in NYC π½π₯π")
|
15 |
+
|
16 |
+
|
17 |
+
@st.cache(persist=True)
|
18 |
+
def load_data(nrows):
|
19 |
+
data = pd.read_csv(DATA_URL, nrows=nrows, parse_dates=[['CRASH_DATE', 'CRASH_TIME']])
|
20 |
+
data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True)
|
21 |
+
lowercase = lambda x: str(x).lower()
|
22 |
+
data.rename(lowercase, axis="columns", inplace=True)
|
23 |
+
data.rename(columns={"crash_date_crash_time": "date/time"}, inplace=True)
|
24 |
+
#data = data[['date/time', 'latitude', 'longitude']]
|
25 |
+
return data
|
26 |
+
|
27 |
+
data = load_data(100000)
|
28 |
+
|
29 |
+
|
30 |
+
st.header("Where are the most people injured in NYC?")
|
31 |
+
injured_people = st.slider("Number of persons injured in vehicle collisions", 0, 19)
|
32 |
+
st.map(data.query("injured_persons >= @injured_people")[["latitude", "longitude"]].dropna(how="any"))
|
33 |
+
|
34 |
+
st.header("How many collisions occur during a given time of day?")
|
35 |
+
hour = st.slider("Hour to look at", 0, 23)
|
36 |
+
original_data = data
|
37 |
+
data = data[data[DATE_TIME].dt.hour == hour]
|
38 |
+
st.markdown("Vehicle collisions between %i:00 and %i:00" % (hour, (hour + 1) % 24))
|
39 |
+
|
40 |
+
midpoint = (np.average(data["latitude"]), np.average(data["longitude"]))
|
41 |
+
st.write(pdk.Deck(
|
42 |
+
map_style="mapbox://styles/mapbox/light-v9",
|
43 |
+
initial_view_state={
|
44 |
+
"latitude": midpoint[0],
|
45 |
+
"longitude": midpoint[1],
|
46 |
+
"zoom": 11,
|
47 |
+
"pitch": 50,
|
48 |
+
},
|
49 |
+
layers=[
|
50 |
+
pdk.Layer(
|
51 |
+
"HexagonLayer",
|
52 |
+
data=data[['date/time', 'latitude', 'longitude']],
|
53 |
+
get_position=["longitude", "latitude"],
|
54 |
+
auto_highlight=True,
|
55 |
+
radius=100,
|
56 |
+
extruded=True,
|
57 |
+
pickable=True,
|
58 |
+
elevation_scale=4,
|
59 |
+
elevation_range=[0, 1000],
|
60 |
+
),
|
61 |
+
],
|
62 |
+
))
|
63 |
+
if st.checkbox("Show raw data", False):
|
64 |
+
st.subheader("Raw data by minute between %i:00 and %i:00" % (hour, (hour + 1) % 24))
|
65 |
+
st.write(data)
|
66 |
+
|
67 |
+
st.subheader("Breakdown by minute between %i:00 and %i:00" % (hour, (hour + 1) % 24))
|
68 |
+
filtered = data[
|
69 |
+
(data[DATE_TIME].dt.hour >= hour) & (data[DATE_TIME].dt.hour < (hour + 1))
|
70 |
+
]
|
71 |
+
hist = np.histogram(filtered[DATE_TIME].dt.minute, bins=60, range=(0, 60))[0]
|
72 |
+
chart_data = pd.DataFrame({"minute": range(60), "crashes": hist})
|
73 |
+
|
74 |
+
fig = px.bar(chart_data, x='minute', y='crashes', hover_data=['minute', 'crashes'], height=400)
|
75 |
+
st.write(fig)
|
76 |
+
|
77 |
+
st.header("Top 5 dangerous streets by affected class")
|
78 |
+
select = st.selectbox('Affected class', ['Pedestrians', 'Cyclists', 'Motorists'])
|
79 |
+
|
80 |
+
if select == 'Pedestrians':
|
81 |
+
st.write(original_data.query("injured_pedestrians >= 1")[["on_street_name", "injured_pedestrians"]].sort_values(by=['injured_pedestrians'], ascending=False).dropna(how="any")[:5])
|
82 |
+
|
83 |
+
elif select == 'Cyclists':
|
84 |
+
st.write(original_data.query("injured_cyclists >= 1")[["on_street_name", "injured_cyclists"]].sort_values(by=['injured_cyclists'], ascending=False).dropna(how="any")[:5])
|
85 |
+
|
86 |
+
else:
|
87 |
+
st.write(original_data.query("injured_motorists >= 1")[["on_street_name", "injured_motorists"]].sort_values(by=['injured_motorists'], ascending=False).dropna(how="any")[:5])
|