Upload 2 files
Browse files- app.py +195 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
import altair as alt
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
@st.cache_data
|
7 |
+
def load_data():
|
8 |
+
df = pd.read_csv("https://huggingface.co/datasets/Chloecky/traffic_crashes_chicago/resolve/main/Traffic_Crashes_-_Crashes_20250420.csv")
|
9 |
+
df = df.dropna(subset=['LATITUDE', 'LONGITUDE'])
|
10 |
+
return df
|
11 |
+
|
12 |
+
st.set_page_config(layout="wide")
|
13 |
+
|
14 |
+
st.title('Streamlit App for IS445 FP2')
|
15 |
+
st.text('Group 8: Keyu (Chloe) Cai, Yutong Zheng')
|
16 |
+
|
17 |
+
traffic = load_data()
|
18 |
+
|
19 |
+
# Transform date column
|
20 |
+
traffic['CRASH_DATE'] = pd.to_datetime(traffic['CRASH_DATE'])
|
21 |
+
|
22 |
+
# Raw dataset already has 'Hour' and 'Month'
|
23 |
+
traffic['YEAR'] = traffic['CRASH_DATE'].dt.year
|
24 |
+
traffic['DY'] = traffic['CRASH_DATE'].dt.day
|
25 |
+
|
26 |
+
traffic_analysis = traffic.loc[:, ~traffic.columns.isin(['CRASH_RECORD_ID', 'CRASH_DATE_EST_I',
|
27 |
+
'REPORT_TYPE', 'INTERSECTION_RELATED_I', 'NOT_RIGHT_OF_WAY_I',
|
28 |
+
'HIT_AND_RUN_I', 'PHOTOS_TAKEN_I', 'STATEMENTS_TAKEN_I',
|
29 |
+
'DOORING_I', 'WORK_ZONE_I', 'WORK_ZONE_TYPE', 'WORKERS_PRESENT_I',
|
30 |
+
'LOCATION'])]
|
31 |
+
# Select years with complete records
|
32 |
+
traffic_analysis = traffic_analysis[(traffic_analysis['YEAR'].isin(range(2018, 2025))) & (traffic_analysis['INJURIES_TOTAL'] > 0)].copy()
|
33 |
+
traffic_analysis = traffic_analysis[(traffic_analysis['LONGITUDE'] != 0) & (traffic_analysis['LATITUDE'] != 0)].copy()
|
34 |
+
|
35 |
+
# Add Weekday/Weekend label for each record
|
36 |
+
traffic_analysis['DAY_TYPE'] = traffic_analysis['CRASH_DAY_OF_WEEK'].apply(lambda x: 'Weekend' if x in [1, 7] else 'Weekday')
|
37 |
+
|
38 |
+
# Driver plot: Heatmap of Injuries Total by Location in City of Chicago
|
39 |
+
alt.data_transformers.disable_max_rows()
|
40 |
+
selection = alt.selection_interval(encodings=['x','y'])
|
41 |
+
chart = alt.Chart(traffic_analysis).mark_rect().encode(
|
42 |
+
x=alt.X('LONGITUDE:Q', bin=alt.Bin(maxbins=20), title='Longitude (°)'),
|
43 |
+
y=alt.Y('LATITUDE:Q', bin=alt.Bin(maxbins=20), title='Latitude (°)'),
|
44 |
+
color=alt.Color('sum(INJURIES_TOTAL):Q',
|
45 |
+
scale=alt.Scale(scheme='blues'),
|
46 |
+
title='Injuries Total',
|
47 |
+
legend=alt.Legend(orient='left', offset=20, titlePadding=15)),
|
48 |
+
tooltip=[
|
49 |
+
alt.Tooltip('count()', title='Crash Count'),
|
50 |
+
alt.Tooltip('sum(INJURIES_TOTAL):Q', title='Injuries Total'),
|
51 |
+
alt.Tooltip('LATITUDE:Q', bin=True, title='Latitude bin'),
|
52 |
+
alt.Tooltip('LONGITUDE:Q', bin=True, title='Longitude bin')
|
53 |
+
]
|
54 |
+
).add_params(
|
55 |
+
selection
|
56 |
+
).properties(
|
57 |
+
width=300,
|
58 |
+
height=300,
|
59 |
+
title='Heatmap of Injuries Total by Location in City of Chicago'
|
60 |
+
# title=alt.TitleParams(
|
61 |
+
# text='Heatmap of Injuries Total by Location in City of Chicago',
|
62 |
+
# anchor='middle' # <<< 关键在这里,anchor设成'middle'就是居中!
|
63 |
+
# )
|
64 |
+
)
|
65 |
+
|
66 |
+
# Driven plot 1: Hourly Distribution of Injury-Related Crashes: Weekday vs Weekend
|
67 |
+
line = alt.Chart(traffic_analysis).transform_filter(
|
68 |
+
selection
|
69 |
+
).transform_aggregate(
|
70 |
+
crash_count='count()',
|
71 |
+
groupby=['CRASH_HOUR', 'DAY_TYPE']
|
72 |
+
).transform_calculate(
|
73 |
+
adjusted_count="datum.DAY_TYPE == 'Weekday' ? datum.crash_count / 5 : datum.crash_count / 2"
|
74 |
+
).mark_line(point=True).encode(
|
75 |
+
x=alt.X('CRASH_HOUR:O', title='Hour of Day'),
|
76 |
+
y=alt.Y('adjusted_count:Q', title='Average Number of Injury-Related Crashes'),
|
77 |
+
color=alt.Color('DAY_TYPE:N', legend=alt.Legend(title='Day Type', titlePadding=15)),
|
78 |
+
tooltip=[
|
79 |
+
alt.Tooltip('CRASH_HOUR:O', title='Hour of Day'),
|
80 |
+
alt.Tooltip('DAY_TYPE:N', title='Day Type'),
|
81 |
+
alt.Tooltip('adjusted_count:Q', title='Average Count', format=',d')
|
82 |
+
]
|
83 |
+
).properties(
|
84 |
+
width=300,
|
85 |
+
height=300,
|
86 |
+
title='Hourly Distribution of Injury-Related Crashes: Weekday vs Weekend'
|
87 |
+
)
|
88 |
+
|
89 |
+
# line = alt.Chart(traffic_analysis).mark_line(point=True).encode(
|
90 |
+
# x=alt.X('CRASH_HOUR:O', title='Hour of Day'),
|
91 |
+
# y=alt.Y('count()', title='Number of Injury-Related Crashes'),
|
92 |
+
# color=alt.Color('DAY_TYPE:N', legend=alt.Legend(title='Day Type'))
|
93 |
+
# ).transform_filter(
|
94 |
+
# selection
|
95 |
+
# ).properties(
|
96 |
+
# width=300,
|
97 |
+
# height=300,
|
98 |
+
# title='Hourly Distribution of Injury-Related Crashes: Weekday vs Weekend'
|
99 |
+
# )
|
100 |
+
|
101 |
+
|
102 |
+
# Driven plot 2: Fatal Injury Rate of Different Lighting Conditions
|
103 |
+
bar1 = alt.Chart(traffic_analysis).mark_bar().encode(
|
104 |
+
x=alt.X('LIGHTING_CONDITION:N', sort='-y', title='Lighting Condition'),
|
105 |
+
y=alt.Y('mean(INJURIES_FATAL):Q', scale=alt.Scale(domainMin=0), axis=alt.Axis(format='%'), title='Fatal Injury Rate'),
|
106 |
+
color=alt.Color('LIGHTING_CONDITION:N',
|
107 |
+
scale=alt.Scale(
|
108 |
+
domain=['DARKNESS', 'DARKNESS, LIGHTED ROAD', 'DAWN', 'DUSK', 'DAYLIGHT', 'UNKNOWN'],
|
109 |
+
range=['#084C88', '#2A6FB6', '#4FA3D9', '#7EC8E3', '#BFEFFF', '#E0F7FA']
|
110 |
+
),
|
111 |
+
legend=alt.Legend(orient='left', title='Lighting Condition', titlePadding=15)),
|
112 |
+
tooltip=[
|
113 |
+
alt.Tooltip('LIGHTING_CONDITION:N', title='Lighting Condition'),
|
114 |
+
alt.Tooltip('mean(INJURIES_FATAL):Q', title='Fatal Injury Rate', format='.2f')
|
115 |
+
]
|
116 |
+
).transform_filter(
|
117 |
+
selection
|
118 |
+
).properties(
|
119 |
+
width=300,
|
120 |
+
height=300,
|
121 |
+
title='Fatal Injury Rate of Different Lighting Conditions'
|
122 |
+
)
|
123 |
+
|
124 |
+
|
125 |
+
# bar2 = alt.Chart(traffic_analysis).mark_bar().encode(
|
126 |
+
# x=alt.X('WEATHER_CONDITION:N'),
|
127 |
+
# y=alt.Y('mean(INJURIES_FATAL):Q'),
|
128 |
+
# color=alt.Color('WEATHER_CONDITION:N',
|
129 |
+
# legend=alt.Legend(orient='right'))
|
130 |
+
# ).transform_filter(
|
131 |
+
# selection
|
132 |
+
# ).properties(
|
133 |
+
# width=400,
|
134 |
+
# height=400
|
135 |
+
# )
|
136 |
+
|
137 |
+
# Driven plot 3: Trends in Crash Damage Costs by Year (2018–2024)
|
138 |
+
grouped_bar = alt.Chart(traffic_analysis).mark_bar().encode(
|
139 |
+
x=alt.X('YEAR:O', title='Year'),
|
140 |
+
y=alt.Y('count()', title='Count'),
|
141 |
+
color=alt.Color('DAMAGE:N',
|
142 |
+
scale=alt.Scale(
|
143 |
+
domain=['$500 OR LESS', '$501 - $1,500', 'OVER $1,500'],
|
144 |
+
range=['#AEDFF7', '#4FA3D9', '#084C88']
|
145 |
+
),
|
146 |
+
title='Damage Level', legend=alt.Legend(title='Damage Level', titlePadding=15)),
|
147 |
+
xOffset='DAMAGE:N',
|
148 |
+
tooltip=[
|
149 |
+
alt.Tooltip('YEAR:O', title='Year'),
|
150 |
+
alt.Tooltip('DAMAGE:N', title='Damage Level'),
|
151 |
+
alt.Tooltip('count()', title='Count')
|
152 |
+
]
|
153 |
+
).transform_filter(
|
154 |
+
selection
|
155 |
+
).properties(
|
156 |
+
width=300,
|
157 |
+
height=300,
|
158 |
+
title='Annual Distribution of Crash Damage Levels (2018–2024)'
|
159 |
+
)
|
160 |
+
|
161 |
+
top_row = chart|line
|
162 |
+
bottom_row = (bar1|grouped_bar).resolve_scale(color='independent')
|
163 |
+
final_chart = top_row & bottom_row
|
164 |
+
|
165 |
+
# top_row = alt.hconcat(chart, line).resolve_scale(color='independent')
|
166 |
+
# bottom_row = alt.hconcat(bar1, grouped_bar).resolve_scale(color='independent')
|
167 |
+
|
168 |
+
# final_chart = alt.vconcat(top_row, bottom_row)
|
169 |
+
|
170 |
+
st.altair_chart(final_chart, use_container_width=False)
|
171 |
+
|
172 |
+
st.markdown('''
|
173 |
+
### Dashboard Overview and Guidance
|
174 |
+
This dashboard presents an analysis based on the City of Chicago's injury-related crash data from 2018 to 2024. The original dataset (https://data.cityofchicago.org/Transportation/Traffic-Crashes-Crashes/85ca-t3if/data_preview) includes detailed information about crashes that resulted in injuries within the city during this time period.
|
175 |
+
|
176 |
+
The top-left figure displays a colored map (heatmap) showing the geographic distribution of total injuries from crashes, with darker areas indicating places where more people were injured. This heatmap serves as the driver plot of the dashboard. Users can click and drag to select a specific region of interest, and the three driven plots will automatically update to reflect data from the selected area.
|
177 |
+
|
178 |
+
The first driven plot (top right) shows the distribution of injury-related crashes across 24 hours, comparing patterns between weekdays and weekends. This chart reflects the number of crashes that caused injuries (not the total number of injuries like in the heatmap).
|
179 |
+
|
180 |
+
The second driven plot (bottom left) illustrates how different lighting conditions (such as daylight, dusk, or darkness) are associated with variations in fatal injury rates. The lighting conditions are sorted from highest to lowest fatal injury rate.
|
181 |
+
|
182 |
+
The third driven plot (bottom right) depicts the trend of crash counts across different damage cost levels from 2018 to 2024. The bar heights represent the number of crashes falling into each damage category for each year.
|
183 |
+
|
184 |
+
Overall, this dashboard highlights key insights from the crash dataset in terms of time, environment, and damage severity. It is designed to help anyone interested in traffic crash data better understand the dataset, and it may also offer valuable guidance for city planners or traffic management officials seeking to improve road safety.
|
185 |
+
''')
|
186 |
+
|
187 |
+
st.markdown('''
|
188 |
+
#### Contexual Dataset
|
189 |
+
We have found a contextual dataset, which can be accessed at https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas/igwz-8jzy. This dataset shows the boundaries of the 77 community areas in Chicago. Adding this dataset to our project will help us group traffic accidents by these areas instead of just using latitude and longitude. This makes it easier for people to understand where accidents happen more often and helps tell a clearer story about which neighborhoods have more traffic safety issues.
|
190 |
+
''')
|
191 |
+
|
192 |
+
st.markdown('''
|
193 |
+
#### Hosting Datasets
|
194 |
+
We would continue our plan for hosting original dataset on HuggingFace like Part 1 (https://huggingface.co/datasets/Chloecky/traffic_crashes_chicago/resolve/main/Traffic_Crashes_-_Crashes_20250420.csv). To ensure consistency across all datasets, we also decided to host the contextual dataset on Hugging Face as well (https://huggingface.co/datasets/Chloecky/traffic_crashes_chicago/resolve/main/Boundaries_-_Community_Areas_20250424.csv).
|
195 |
+
''')
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
altair
|
4 |
+
numpy
|