is445_hw6 / app.py
Chloecky's picture
fixing text format
3b34434
# put streamlit code here as needed
import pandas as pd
import streamlit as st
import altair as alt
st.title('UFO Sightings in Illinois after 2010')
st.text('My Streamlit App for IS 445 Data Viz Homework 6')
st.markdown("The URL for this app is: https://huggingface.co/spaces/Chloecky/is445_hw6")
ufo_url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
st.markdown(f"The URL for the raw UFO data is: {ufo_url}")
column_names = ['Date', 'City', 'State', 'Country', 'Shape', 'Duration(s)', 'Duration Time', 'Description', 'Report Date', 'Latitude', 'Longitude']
df = pd.read_csv(ufo_url, header=None, names=column_names)
# Preprocessing
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y %H:%M', errors='coerce')
df_new = df[(df['Date'].dt.year>=2010) & (df['Country']=='us') & (df['State']=='il')]
df_new['YearMonth'] = df_new['Date'].dt.to_period('M').dt.to_timestamp()
df_new['Shape'] = df_new['Shape'].replace(['null', 'unknown'], 'other').fillna('other')
# Plot 1
monthly_counts = df_new.groupby('YearMonth').size().reset_index(name='Counts')
brush = alt.selection_interval(encodings=['x'], empty='all')
line = alt.Chart(monthly_counts).mark_line(point=True).encode(
x=alt.X('YearMonth:T', axis=alt.Axis(format='%Y-%m', title='Date (Year-Month)')),
y=alt.Y('Counts:Q', title='Number of UFO Sightings'),
tooltip=[alt.Tooltip('YearMonth:T', title='Month-Year', format='%b-%Y'), 'Counts']
).properties(
width=1000,
height=300,
title='Monthly UFO Sightings after 2010'
).add_params(brush)
# Plot 2
scatter = alt.Chart(df_new).transform_calculate(
log_duration='log(datum["Duration(s)"] + 1)'
).mark_circle(size=50, opacity=0.5).encode(
x = alt.X('Longitude:Q', title='Longitude', scale=alt.Scale(domain=[-92, -87])),
y = alt.Y('Latitude:Q', title='Latitude', scale=alt.Scale(domain=[37, 43])),
color=alt.Color('Shape:N', scale=alt.Scale(scheme='category20')),
size = alt.Size('log_duration:Q', title='Log Duration(s)', scale=alt.Scale(range=[10,300])),
tooltip=['Date', 'City', 'Shape', 'Duration(s)']
).transform_filter(brush).properties(
width=400,
height=500,
title='Location Distribution of UFO Sightings in IL after 2010 (Selected Time Range)'
)
chart = alt.vconcat(line, scatter).resolve_legend(color='independent', size='independent')
st.altair_chart(chart, theme="streamlit", use_container_width=True)
st.markdown("For the first visualization, plot 1 'Monthly UFO Sightings after 2010', \
I created a line chart highlighting the changes in UFO sightings in Illinois State over time. \
The temporal data 'Year-Month' was chosen as x-axis to help understand the monthly fluctuations. \
Points on the line and tooltips help audiences identify specifc monthly counts easily. \
For colors and marks, I employed standard line and markers without additional distracting color encodings to keep it clean. \
A brush selection is applied to this graph, letting user to choose the time interval and then update the corresponding data in plot 2 to display only the sightings reported within the chosen period. \
If I had more time, I would consider adding dropdowns to let users select the year and state they are interested in, allowing them to explore more on the UFO dataset. \
There is no overlap with the analysis done for HW5 in this plot.")
st.markdown("In the second visualization, the geographical distribution, shape, and duration time of these UFO sightings in Illinois after 2010 are shown. \
I chose scatter plot since the longitude and latitude can be clearly visualized within Illinois coordinates. (The scales for x axis and y axis are selected according to Illinois coordinates.)\
Each point represents a sighting, colored categorically by the shape (see legends). \
Category20 is used for scale in color since there are 20 different types of shapes. \
And the size of points represent the logarithmic duration of sightings. I took the log on duration because the distribution of duration time is highly skewed right (most are short, fewer exceptionally long, so a log scale is better for viz). \
With the interactive filtering in the first plot, users will only see sightings corresponding to the selected months. \
With more time, I would first integrate a grographic basemap that can help user navigate specific geographic regions. \
Additionally, I might want to apply a click selection to this graph that enables users to choose specific shapes or duration time directly on this scatter plot, \
letting them explore potential relationships between UFO shapes and sighting durations, as well as the locations. \
There is no overlap with the analysis done for HW5 in this plot.")