Chloecky commited on
Commit
2b51b88
·
verified ·
1 Parent(s): 394c7d4

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +195 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ @st.cache_data
7
+ def load_data():
8
+ df = pd.read_csv("https://huggingface.co/datasets/Chloecky/traffic_crashes_chicago/resolve/main/Traffic_Crashes_-_Crashes_20250420.csv")
9
+ df = df.dropna(subset=['LATITUDE', 'LONGITUDE'])
10
+ return df
11
+
12
+ st.set_page_config(layout="wide")
13
+
14
+ st.title('Streamlit App for IS445 FP2')
15
+ st.text('Group 8: Keyu (Chloe) Cai, Yutong Zheng')
16
+
17
+ traffic = load_data()
18
+
19
+ # Transform date column
20
+ traffic['CRASH_DATE'] = pd.to_datetime(traffic['CRASH_DATE'])
21
+
22
+ # Raw dataset already has 'Hour' and 'Month'
23
+ traffic['YEAR'] = traffic['CRASH_DATE'].dt.year
24
+ traffic['DY'] = traffic['CRASH_DATE'].dt.day
25
+
26
+ traffic_analysis = traffic.loc[:, ~traffic.columns.isin(['CRASH_RECORD_ID', 'CRASH_DATE_EST_I',
27
+ 'REPORT_TYPE', 'INTERSECTION_RELATED_I', 'NOT_RIGHT_OF_WAY_I',
28
+ 'HIT_AND_RUN_I', 'PHOTOS_TAKEN_I', 'STATEMENTS_TAKEN_I',
29
+ 'DOORING_I', 'WORK_ZONE_I', 'WORK_ZONE_TYPE', 'WORKERS_PRESENT_I',
30
+ 'LOCATION'])]
31
+ # Select years with complete records
32
+ traffic_analysis = traffic_analysis[(traffic_analysis['YEAR'].isin(range(2018, 2025))) & (traffic_analysis['INJURIES_TOTAL'] > 0)].copy()
33
+ traffic_analysis = traffic_analysis[(traffic_analysis['LONGITUDE'] != 0) & (traffic_analysis['LATITUDE'] != 0)].copy()
34
+
35
+ # Add Weekday/Weekend label for each record
36
+ traffic_analysis['DAY_TYPE'] = traffic_analysis['CRASH_DAY_OF_WEEK'].apply(lambda x: 'Weekend' if x in [1, 7] else 'Weekday')
37
+
38
+ # Driver plot: Heatmap of Injuries Total by Location in City of Chicago
39
+ alt.data_transformers.disable_max_rows()
40
+ selection = alt.selection_interval(encodings=['x','y'])
41
+ chart = alt.Chart(traffic_analysis).mark_rect().encode(
42
+ x=alt.X('LONGITUDE:Q', bin=alt.Bin(maxbins=20), title='Longitude (°)'),
43
+ y=alt.Y('LATITUDE:Q', bin=alt.Bin(maxbins=20), title='Latitude (°)'),
44
+ color=alt.Color('sum(INJURIES_TOTAL):Q',
45
+ scale=alt.Scale(scheme='blues'),
46
+ title='Injuries Total',
47
+ legend=alt.Legend(orient='left', offset=20, titlePadding=15)),
48
+ tooltip=[
49
+ alt.Tooltip('count()', title='Crash Count'),
50
+ alt.Tooltip('sum(INJURIES_TOTAL):Q', title='Injuries Total'),
51
+ alt.Tooltip('LATITUDE:Q', bin=True, title='Latitude bin'),
52
+ alt.Tooltip('LONGITUDE:Q', bin=True, title='Longitude bin')
53
+ ]
54
+ ).add_params(
55
+ selection
56
+ ).properties(
57
+ width=300,
58
+ height=300,
59
+ title='Heatmap of Injuries Total by Location in City of Chicago'
60
+ # title=alt.TitleParams(
61
+ # text='Heatmap of Injuries Total by Location in City of Chicago',
62
+ # anchor='middle' # <<< 关键在这里,anchor设成'middle'就是居中!
63
+ # )
64
+ )
65
+
66
+ # Driven plot 1: Hourly Distribution of Injury-Related Crashes: Weekday vs Weekend
67
+ line = alt.Chart(traffic_analysis).transform_filter(
68
+ selection
69
+ ).transform_aggregate(
70
+ crash_count='count()',
71
+ groupby=['CRASH_HOUR', 'DAY_TYPE']
72
+ ).transform_calculate(
73
+ adjusted_count="datum.DAY_TYPE == 'Weekday' ? datum.crash_count / 5 : datum.crash_count / 2"
74
+ ).mark_line(point=True).encode(
75
+ x=alt.X('CRASH_HOUR:O', title='Hour of Day'),
76
+ y=alt.Y('adjusted_count:Q', title='Average Number of Injury-Related Crashes'),
77
+ color=alt.Color('DAY_TYPE:N', legend=alt.Legend(title='Day Type', titlePadding=15)),
78
+ tooltip=[
79
+ alt.Tooltip('CRASH_HOUR:O', title='Hour of Day'),
80
+ alt.Tooltip('DAY_TYPE:N', title='Day Type'),
81
+ alt.Tooltip('adjusted_count:Q', title='Average Count', format=',d')
82
+ ]
83
+ ).properties(
84
+ width=300,
85
+ height=300,
86
+ title='Hourly Distribution of Injury-Related Crashes: Weekday vs Weekend'
87
+ )
88
+
89
+ # line = alt.Chart(traffic_analysis).mark_line(point=True).encode(
90
+ # x=alt.X('CRASH_HOUR:O', title='Hour of Day'),
91
+ # y=alt.Y('count()', title='Number of Injury-Related Crashes'),
92
+ # color=alt.Color('DAY_TYPE:N', legend=alt.Legend(title='Day Type'))
93
+ # ).transform_filter(
94
+ # selection
95
+ # ).properties(
96
+ # width=300,
97
+ # height=300,
98
+ # title='Hourly Distribution of Injury-Related Crashes: Weekday vs Weekend'
99
+ # )
100
+
101
+
102
+ # Driven plot 2: Fatal Injury Rate of Different Lighting Conditions
103
+ bar1 = alt.Chart(traffic_analysis).mark_bar().encode(
104
+ x=alt.X('LIGHTING_CONDITION:N', sort='-y', title='Lighting Condition'),
105
+ y=alt.Y('mean(INJURIES_FATAL):Q', scale=alt.Scale(domainMin=0), axis=alt.Axis(format='%'), title='Fatal Injury Rate'),
106
+ color=alt.Color('LIGHTING_CONDITION:N',
107
+ scale=alt.Scale(
108
+ domain=['DARKNESS', 'DARKNESS, LIGHTED ROAD', 'DAWN', 'DUSK', 'DAYLIGHT', 'UNKNOWN'],
109
+ range=['#084C88', '#2A6FB6', '#4FA3D9', '#7EC8E3', '#BFEFFF', '#E0F7FA']
110
+ ),
111
+ legend=alt.Legend(orient='left', title='Lighting Condition', titlePadding=15)),
112
+ tooltip=[
113
+ alt.Tooltip('LIGHTING_CONDITION:N', title='Lighting Condition'),
114
+ alt.Tooltip('mean(INJURIES_FATAL):Q', title='Fatal Injury Rate', format='.2f')
115
+ ]
116
+ ).transform_filter(
117
+ selection
118
+ ).properties(
119
+ width=300,
120
+ height=300,
121
+ title='Fatal Injury Rate of Different Lighting Conditions'
122
+ )
123
+
124
+
125
+ # bar2 = alt.Chart(traffic_analysis).mark_bar().encode(
126
+ # x=alt.X('WEATHER_CONDITION:N'),
127
+ # y=alt.Y('mean(INJURIES_FATAL):Q'),
128
+ # color=alt.Color('WEATHER_CONDITION:N',
129
+ # legend=alt.Legend(orient='right'))
130
+ # ).transform_filter(
131
+ # selection
132
+ # ).properties(
133
+ # width=400,
134
+ # height=400
135
+ # )
136
+
137
+ # Driven plot 3: Trends in Crash Damage Costs by Year (2018–2024)
138
+ grouped_bar = alt.Chart(traffic_analysis).mark_bar().encode(
139
+ x=alt.X('YEAR:O', title='Year'),
140
+ y=alt.Y('count()', title='Count'),
141
+ color=alt.Color('DAMAGE:N',
142
+ scale=alt.Scale(
143
+ domain=['$500 OR LESS', '$501 - $1,500', 'OVER $1,500'],
144
+ range=['#AEDFF7', '#4FA3D9', '#084C88']
145
+ ),
146
+ title='Damage Level', legend=alt.Legend(title='Damage Level', titlePadding=15)),
147
+ xOffset='DAMAGE:N',
148
+ tooltip=[
149
+ alt.Tooltip('YEAR:O', title='Year'),
150
+ alt.Tooltip('DAMAGE:N', title='Damage Level'),
151
+ alt.Tooltip('count()', title='Count')
152
+ ]
153
+ ).transform_filter(
154
+ selection
155
+ ).properties(
156
+ width=300,
157
+ height=300,
158
+ title='Annual Distribution of Crash Damage Levels (2018–2024)'
159
+ )
160
+
161
+ top_row = chart|line
162
+ bottom_row = (bar1|grouped_bar).resolve_scale(color='independent')
163
+ final_chart = top_row & bottom_row
164
+
165
+ # top_row = alt.hconcat(chart, line).resolve_scale(color='independent')
166
+ # bottom_row = alt.hconcat(bar1, grouped_bar).resolve_scale(color='independent')
167
+
168
+ # final_chart = alt.vconcat(top_row, bottom_row)
169
+
170
+ st.altair_chart(final_chart, use_container_width=False)
171
+
172
+ st.markdown('''
173
+ ### Dashboard Overview and Guidance
174
+ This dashboard presents an analysis based on the City of Chicago's injury-related crash data from 2018 to 2024. The original dataset (https://data.cityofchicago.org/Transportation/Traffic-Crashes-Crashes/85ca-t3if/data_preview) includes detailed information about crashes that resulted in injuries within the city during this time period.
175
+
176
+ The top-left figure displays a colored map (heatmap) showing the geographic distribution of total injuries from crashes, with darker areas indicating places where more people were injured. This heatmap serves as the driver plot of the dashboard. Users can click and drag to select a specific region of interest, and the three driven plots will automatically update to reflect data from the selected area.
177
+
178
+ The first driven plot (top right) shows the distribution of injury-related crashes across 24 hours, comparing patterns between weekdays and weekends. This chart reflects the number of crashes that caused injuries (not the total number of injuries like in the heatmap).
179
+
180
+ The second driven plot (bottom left) illustrates how different lighting conditions (such as daylight, dusk, or darkness) are associated with variations in fatal injury rates. The lighting conditions are sorted from highest to lowest fatal injury rate.
181
+
182
+ The third driven plot (bottom right) depicts the trend of crash counts across different damage cost levels from 2018 to 2024. The bar heights represent the number of crashes falling into each damage category for each year.
183
+
184
+ Overall, this dashboard highlights key insights from the crash dataset in terms of time, environment, and damage severity. It is designed to help anyone interested in traffic crash data better understand the dataset, and it may also offer valuable guidance for city planners or traffic management officials seeking to improve road safety.
185
+ ''')
186
+
187
+ st.markdown('''
188
+ #### Contexual Dataset
189
+ We have found a contextual dataset, which can be accessed at https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas/igwz-8jzy. This dataset shows the boundaries of the 77 community areas in Chicago. Adding this dataset to our project will help us group traffic accidents by these areas instead of just using latitude and longitude. This makes it easier for people to understand where accidents happen more often and helps tell a clearer story about which neighborhoods have more traffic safety issues.
190
+ ''')
191
+
192
+ st.markdown('''
193
+ #### Hosting Datasets
194
+ We would continue our plan for hosting original dataset on HuggingFace like Part 1 (https://huggingface.co/datasets/Chloecky/traffic_crashes_chicago/resolve/main/Traffic_Crashes_-_Crashes_20250420.csv). To ensure consistency across all datasets, we also decided to host the contextual dataset on Hugging Face as well (https://huggingface.co/datasets/Chloecky/traffic_crashes_chicago/resolve/main/Boundaries_-_Community_Areas_20250424.csv).
195
+ ''')
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ altair
4
+ numpy