AjithBharadwaj commited on
Commit
d7f893a
Β·
1 Parent(s): acca66d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pydeck as pdk
5
+ import plotly.express as px
6
+
7
+ DATE_TIME = "date/time"
8
+ DATA_URL = (
9
+ "Motor_Vehicle_Collisions_-_Crashes.csv"
10
+ )
11
+
12
+ st.title("Motor Vehicle Collisions in New York City")
13
+ st.markdown("This application is a Streamlit dashboard that can be used "
14
+ "to analyze motor vehicle collisions in NYC πŸ—½πŸ’₯πŸš—")
15
+
16
+
17
+ @st.cache(persist=True)
18
+ def load_data(nrows):
19
+ data = pd.read_csv(DATA_URL, nrows=nrows, parse_dates=[['CRASH_DATE', 'CRASH_TIME']])
20
+ data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True)
21
+ lowercase = lambda x: str(x).lower()
22
+ data.rename(lowercase, axis="columns", inplace=True)
23
+ data.rename(columns={"crash_date_crash_time": "date/time"}, inplace=True)
24
+ #data = data[['date/time', 'latitude', 'longitude']]
25
+ return data
26
+
27
+ data = load_data(100000)
28
+
29
+
30
+ st.header("Where are the most people injured in NYC?")
31
+ injured_people = st.slider("Number of persons injured in vehicle collisions", 0, 19)
32
+ st.map(data.query("injured_persons >= @injured_people")[["latitude", "longitude"]].dropna(how="any"))
33
+
34
+ st.header("How many collisions occur during a given time of day?")
35
+ hour = st.slider("Hour to look at", 0, 23)
36
+ original_data = data
37
+ data = data[data[DATE_TIME].dt.hour == hour]
38
+ st.markdown("Vehicle collisions between %i:00 and %i:00" % (hour, (hour + 1) % 24))
39
+
40
+ midpoint = (np.average(data["latitude"]), np.average(data["longitude"]))
41
+ st.write(pdk.Deck(
42
+ map_style="mapbox://styles/mapbox/light-v9",
43
+ initial_view_state={
44
+ "latitude": midpoint[0],
45
+ "longitude": midpoint[1],
46
+ "zoom": 11,
47
+ "pitch": 50,
48
+ },
49
+ layers=[
50
+ pdk.Layer(
51
+ "HexagonLayer",
52
+ data=data[['date/time', 'latitude', 'longitude']],
53
+ get_position=["longitude", "latitude"],
54
+ auto_highlight=True,
55
+ radius=100,
56
+ extruded=True,
57
+ pickable=True,
58
+ elevation_scale=4,
59
+ elevation_range=[0, 1000],
60
+ ),
61
+ ],
62
+ ))
63
+ if st.checkbox("Show raw data", False):
64
+ st.subheader("Raw data by minute between %i:00 and %i:00" % (hour, (hour + 1) % 24))
65
+ st.write(data)
66
+
67
+ st.subheader("Breakdown by minute between %i:00 and %i:00" % (hour, (hour + 1) % 24))
68
+ filtered = data[
69
+ (data[DATE_TIME].dt.hour >= hour) & (data[DATE_TIME].dt.hour < (hour + 1))
70
+ ]
71
+ hist = np.histogram(filtered[DATE_TIME].dt.minute, bins=60, range=(0, 60))[0]
72
+ chart_data = pd.DataFrame({"minute": range(60), "crashes": hist})
73
+
74
+ fig = px.bar(chart_data, x='minute', y='crashes', hover_data=['minute', 'crashes'], height=400)
75
+ st.write(fig)
76
+
77
+ st.header("Top 5 dangerous streets by affected class")
78
+ select = st.selectbox('Affected class', ['Pedestrians', 'Cyclists', 'Motorists'])
79
+
80
+ if select == 'Pedestrians':
81
+ st.write(original_data.query("injured_pedestrians >= 1")[["on_street_name", "injured_pedestrians"]].sort_values(by=['injured_pedestrians'], ascending=False).dropna(how="any")[:5])
82
+
83
+ elif select == 'Cyclists':
84
+ st.write(original_data.query("injured_cyclists >= 1")[["on_street_name", "injured_cyclists"]].sort_values(by=['injured_cyclists'], ascending=False).dropna(how="any")[:5])
85
+
86
+ else:
87
+ st.write(original_data.query("injured_motorists >= 1")[["on_street_name", "injured_motorists"]].sort_values(by=['injured_motorists'], ascending=False).dropna(how="any")[:5])