|
|
|
|
|
import pandas as pd |
|
import streamlit as st |
|
import altair as alt |
|
|
|
st.title('UFO Sightings in Illinois after 2010') |
|
st.text('My Streamlit App for IS 445 Data Viz Homework 6') |
|
st.markdown("The URL for this app is: https://huggingface.co/spaces/Chloecky/is445_hw6") |
|
|
|
ufo_url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv" |
|
st.markdown(f"The URL for the raw UFO data is: {ufo_url}") |
|
column_names = ['Date', 'City', 'State', 'Country', 'Shape', 'Duration(s)', 'Duration Time', 'Description', 'Report Date', 'Latitude', 'Longitude'] |
|
df = pd.read_csv(ufo_url, header=None, names=column_names) |
|
|
|
|
|
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y %H:%M', errors='coerce') |
|
df_new = df[(df['Date'].dt.year>=2010) & (df['Country']=='us') & (df['State']=='il')] |
|
df_new['YearMonth'] = df_new['Date'].dt.to_period('M').dt.to_timestamp() |
|
df_new['Shape'] = df_new['Shape'].replace(['null', 'unknown'], 'other').fillna('other') |
|
|
|
|
|
monthly_counts = df_new.groupby('YearMonth').size().reset_index(name='Counts') |
|
brush = alt.selection_interval(encodings=['x'], empty='all') |
|
line = alt.Chart(monthly_counts).mark_line(point=True).encode( |
|
x=alt.X('YearMonth:T', axis=alt.Axis(format='%Y-%m', title='Date (Year-Month)')), |
|
y=alt.Y('Counts:Q', title='Number of UFO Sightings'), |
|
tooltip=[alt.Tooltip('YearMonth:T', title='Month-Year', format='%b-%Y'), 'Counts'] |
|
).properties( |
|
width=1000, |
|
height=300, |
|
title='Monthly UFO Sightings after 2010' |
|
).add_params(brush) |
|
|
|
|
|
scatter = alt.Chart(df_new).transform_calculate( |
|
log_duration='log(datum["Duration(s)"] + 1)' |
|
).mark_circle(size=50, opacity=0.5).encode( |
|
x = alt.X('Longitude:Q', title='Longitude', scale=alt.Scale(domain=[-92, -87])), |
|
y = alt.Y('Latitude:Q', title='Latitude', scale=alt.Scale(domain=[37, 43])), |
|
color=alt.Color('Shape:N', scale=alt.Scale(scheme='category20')), |
|
size = alt.Size('log_duration:Q', title='Log Duration(s)', scale=alt.Scale(range=[10,300])), |
|
tooltip=['Date', 'City', 'Shape', 'Duration(s)'] |
|
).transform_filter(brush).properties( |
|
width=400, |
|
height=500, |
|
title='Location Distribution of UFO Sightings in IL after 2010 (Selected Time Range)' |
|
) |
|
|
|
chart = alt.vconcat(line, scatter).resolve_legend(color='independent', size='independent') |
|
|
|
st.altair_chart(chart, theme="streamlit", use_container_width=True) |
|
|
|
st.markdown("For the first visualization, plot 1 'Monthly UFO Sightings after 2010', \ |
|
I created a line chart highlighting the changes in UFO sightings in Illinois State over time. \ |
|
The temporal data 'Year-Month' was chosen as x-axis to help understand the monthly fluctuations. \ |
|
Points on the line and tooltips help audiences identify specifc monthly counts easily. \ |
|
For colors and marks, I employed standard line and markers without additional distracting color encodings to keep it clean. \ |
|
A brush selection is applied to this graph, letting user to choose the time interval and then update the corresponding data in plot 2 to display only the sightings reported within the chosen period. \ |
|
If I had more time, I would consider adding dropdowns to let users select the year and state they are interested in, allowing them to explore more on the UFO dataset. \ |
|
There is no overlap with the analysis done for HW5 in this plot.") |
|
|
|
st.markdown("In the second visualization, the geographical distribution, shape, and duration time of these UFO sightings in Illinois after 2010 are shown. \ |
|
I chose scatter plot since the longitude and latitude can be clearly visualized within Illinois coordinates. (The scales for x axis and y axis are selected according to Illinois coordinates.)\ |
|
Each point represents a sighting, colored categorically by the shape (see legends). \ |
|
Category20 is used for scale in color since there are 20 different types of shapes. \ |
|
And the size of points represent the logarithmic duration of sightings. I took the log on duration because the distribution of duration time is highly skewed right (most are short, fewer exceptionally long, so a log scale is better for viz). \ |
|
With the interactive filtering in the first plot, users will only see sightings corresponding to the selected months. \ |
|
With more time, I would first integrate a grographic basemap that can help user navigate specific geographic regions. \ |
|
Additionally, I might want to apply a click selection to this graph that enables users to choose specific shapes or duration time directly on this scatter plot, \ |
|
letting them explore potential relationships between UFO shapes and sighting durations, as well as the locations. \ |
|
There is no overlap with the analysis done for HW5 in this plot.") |