-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
139 lines (114 loc) · 4.57 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Importing the required libraries
import streamlit as st
import pandas as pd
import numpy as np
import pydeck as pdk
import plotly.express as px
# Global Variables
DATE_TIME = "date/time"
DATA_URL = (
"./Data/Motor_Vehicle_Collisions_NYC.csv"
)
# Title of the Web app
st.title("Motor Vehicle Collisions in New York City")
# Displays the text as a markdown
st.markdown("This application is a Streamlit dashboard that can be used "
"to analyze motor vehicle collisions in NYC 🗽💥🚗")
# Decorating the function with streamlit cache to optimize performance
# by storing the result of the function in a local cache
@st.cache(persist=True)
def load_data(nrows):
"""
Function which loads the data, changes the column names to lowercase
and changes the name of column crash_date_crash_time as date/time
"""
# Loading the data
data = pd.read_csv(DATA_URL, nrows=nrows, parse_dates=[
['CRASH_DATE', 'CRASH_TIME']])
# Droping NA values
data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True)
# Converting the column names to lower case
def lowercase(x): return str(x).lower()
data.rename(lowercase, axis="columns", inplace=True)
# Renaming crash_date_crash_time column
data.rename(columns={"crash_date_crash_time": "date/time"}, inplace=True)
#data = data[['date/time', 'latitude', 'longitude']]
return data
# Loading the data with 100,000 rows
# data = load_data(70000)
data = load_data(100000)
# Most injured people in NYC
st.header("Where are the most people injured in NYC?")
# Adjustable slider
injured_people = st.slider(
"Number of persons injured in vehicle collisions", 0, 19)
# Values shown in map
st.map(data.query("injured_persons >= @injured_people")
[["latitude", "longitude"]].dropna(how="any"))
# Collision at a given time in a day
st.header("How many collisions occur during a given time of day?")
# Adjustable slider
hour = st.slider("Hour to look at", 0, 23)
original_data = data
data = data[data[DATE_TIME].dt.hour == hour]
st.markdown("Vehicle collisions between %i:00 and %i:00" %
(hour, (hour + 1) % 24))
# Midpoint of the latitude and longitude for setting initial view of map
midpoint = (np.average(data["latitude"]), np.average(data["longitude"]))
st.write(pdk.Deck(
map_style="mapbox://styles/mapbox/light-v9",
initial_view_state={
"latitude": midpoint[0],
"longitude": midpoint[1],
"zoom": 11,
"pitch": 50,
},
layers=[
pdk.Layer(
"HexagonLayer",
data=data[['date/time', 'latitude', 'longitude']],
get_position=["longitude", "latitude"],
auto_highlight=True,
radius=100,
extruded=True,
pickable=True,
elevation_scale=4,
elevation_range=[0, 1000],
),
],
))
# If this checkbox is clicked, raw data is shown
if st.checkbox("Show raw data", False):
st.subheader("Raw data by minute between %i:00 and %i:00" %
(hour, (hour + 1) % 24))
st.write(data)
# Breakdown of number of crashes between the time interval
# is shown in a bar graph
st.subheader("Breakdown by minute between %i:00 and %i:00" %
(hour, (hour + 1) % 24))
# Filtering the data to be inbetween the time interval
filtered = data[
(data[DATE_TIME].dt.hour >= hour) & (data[DATE_TIME].dt.hour < (hour + 1))
]
hist = np.histogram(filtered[DATE_TIME].dt.minute, bins=60, range=(0, 60))[0]
chart_data = pd.DataFrame({"minute": range(60), "crashes": hist})
# Plotting a bar chart
fig = px.bar(chart_data, x='minute', y='crashes',
hover_data=['minute', 'crashes'], height=400)
st.write(fig)
# Top 5 dangerous streets for different classe
st.header("Top 5 dangerous streets by affected class")
select = st.selectbox(
'Affected class', ['Pedestrians', 'Cyclists', 'Motorists'])
# If the selected class is pedestrian
if select == 'Pedestrians':
st.write(original_data.query("injured_pedestrians >= 1")[["on_street_name", "injured_pedestrians"]].sort_values(
by=['injured_pedestrians'], ascending=False).dropna(how="any")[:5])
# If the selected class is cyclist
elif select == 'Cyclists':
st.write(original_data.query("injured_cyclists >= 1")[["on_street_name", "injured_cyclists"]].sort_values(
by=['injured_cyclists'], ascending=False).dropna(how="any")[:5])
# If the selected class is motorists
else:
st.write(original_data.query("injured_motorists >= 1")[["on_street_name", "injured_motorists"]].sort_values(
by=['injured_motorists'], ascending=False).dropna(how="any")[:5])