import
streamlit as st
import
pandas as pd
import
numpy as np
import
pydeck as pdk
import
plotly.express as px
DATA_URL
=
(
"accidents_2012_to_2014.csv"
)
st.title("Accidents
in
United Kingdom")
st.markdown("This app analyzes accident data
in
United Kingdom
from
2012
-
2014
")
"""
Here, we define load_data function,
to prevent loading the data everytime we made some changes
in
the dataset.
We use streamlit's cache notation.
"""
@st
.cache(persist
=
True
)
def
load_data(nrows):
data
=
pd.read_csv(DATA_URL, nrows
=
nrows, parse_dates
=
[[
'Date'
,
'Time'
]])
data.dropna(subset
=
[
'Latitude'
,
'Longitude'
], inplace
=
True
)
lowercase
=
lambda
x:
str
(x).lower()
data.rename(lowercase, axis
=
"columns", inplace
=
True
)
data.rename(columns
=
{"date_time": "date
/
time"}, inplace
=
True
)
return
data
data
=
load_data(
10000
)
st.header("Where are the most people casualties
in
accidents
in
UK?")
casualties
=
st.slider("Number of persons died",
1
,
int
(data["number_of_casualties"].
max
()))
st.
map
(data.query("number_of_casualties >
=
@casualties")[["latitude", "longitude"]].dropna(how
=
"
any
"))
st.header("How many accidents occur during a given time of day?")
hour
=
st.slider("Hour to look at",
0
,
23
)
original_data
=
data
data
=
data[data[
'date / time'
].dt.hour
=
=
hour]
st.markdown("Vehicle collisions between
%
i:
00
and
%
i:
00
"
%
(hour, (hour
+
1
)
%
24
))
midpoint
=
(np.average(data["latitude"]), np.average(data["longitude"]))
st.write(pdk.Deck(
map_style
=
"mapbox:
/
/
styles
/
mapbox
/
light
-
v9",
initial_view_state
=
{
"latitude": midpoint[
0
],
"longitude": midpoint[
1
],
"zoom":
11
,
"pitch":
50
,
},
layers
=
[
pdk.Layer(
"HexagonLayer",
data
=
data[[
'date / time'
,
'latitude'
,
'longitude'
]],
get_position
=
["longitude", "latitude"],
auto_highlight
=
True
,
radius
=
100
,
extruded
=
True
,
pickable
=
True
,
elevation_scale
=
4
,
elevation_range
=
[
0
,
1000
],
),
],
))
st.subheader("Breakdown by minute between
%
i:
00
and
%
i:
00
"
%
(hour, (hour
+
1
)
%
24
))
filtered
=
data[
(data[
'date / time'
].dt.hour >
=
hour) & (data[
'date / time'
].dt.hour < (hour
+
1
))
]
hist
=
np.histogram(filtered[
'date / time'
].dt.minute, bins
=
60
,
range
=
(
0
,
60
))[
0
]
chart_data
=
pd.DataFrame({"minute":
range
(
60
), "Accidents": hist})
fig
=
px.bar(chart_data, x
=
'minute'
, y
=
'Accidents'
, hover_data
=
[
'minute'
,
'Accidents'
], height
=
400
)
st.write(fig)
st.header("Condition of Road at the time of Accidents")
select
=
st.selectbox(
'Weather '
, [
'Dry'
,
'Wet / Damp'
,
'Frost / ice'
,
'Snow'
,
'Flood (Over 3cm of water)'
])
if
select
=
=
'Dry'
:
st.write(original_data[original_data[
'road_surface_conditions'
]
=
=
"Dry"][["weather_conditions", "light_conditions", "speed_limit", "number_of_casualties"]].sort_values(by
=
[
'number_of_casualties'
], ascending
=
False
).dropna(how
=
"
any
"))
elif
select
=
=
'Wet / Damp'
:
st.write(original_data[original_data[
'road_surface_conditions'
]
=
=
"Wet
/
Damp"][["weather_conditions", "light_conditions", "speed_limit", "number_of_casualties"]].sort_values(by
=
[
'number_of_casualties'
], ascending
=
False
).dropna(how
=
"
any
"))
elif
select
=
=
'Frost / ice'
:
st.write(original_data[original_data[
'road_surface_conditions'
]
=
=
"Frost
/
ice"][["weather_conditions", "light_conditions", "speed_limit", "number_of_casualties"]].sort_values(by
=
[
'number_of_casualties'
], ascending
=
False
).dropna(how
=
"
any
"))
elif
select
=
=
'Snow'
:
st.write(original_data[original_data[
'road_surface_conditions'
]
=
=
"Snow"][["weather_conditions", "light_conditions", "speed_limit", "number_of_casualties"]].sort_values(by
=
[
'number_of_casualties'
], ascending
=
False
).dropna(how
=
"
any
"))
else
:
st.write(original_data[original_data[
'road_surface_conditions'
]
=
=
"Flood (Over
3cm
of water)"][["weather_conditions", "light_conditions", "speed_limit", "number_of_casualties"]].sort_values(by
=
[
'number_of_casualties'
], ascending
=
False
).dropna(how
=
"
any
"))
if
st.checkbox("Show Raw Data",
False
):
st.subheader(
'Raw Data'
)
st.write(data)