uploading hw6
Browse files- README.md +3 -5
- app.py +58 -59
- requirements.txt +6 -2
README.md
CHANGED
@@ -1,13 +1,11 @@
|
|
1 |
---
|
2 |
-
title: My
|
3 |
emoji: 🏢
|
4 |
colorFrom: blue
|
5 |
colorTo: gray
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: My Streamlit App (Homework 6)
|
3 |
emoji: 🏢
|
4 |
colorFrom: blue
|
5 |
colorTo: gray
|
6 |
sdk: streamlit
|
7 |
+
sdk_version: 1.39.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
+
---
|
|
|
|
app.py
CHANGED
@@ -1,73 +1,72 @@
|
|
1 |
-
#
|
2 |
-
# 1. Open a "Terminal" by: View --> Terminal OR just the "Terminal" through the hamburger menu
|
3 |
-
# 2. run in terminal with: streamlit run app.py
|
4 |
-
# 3. click the "Open in Browser" link that pops up OR click on "Ports" and copy the URL
|
5 |
-
# 4. Open a Simple Browswer with View --> Command Palette --> Simple Browser: Show
|
6 |
-
# 5. use the URL from prior steps as intput into this simple browser
|
7 |
-
|
8 |
|
|
|
9 |
import streamlit as st
|
10 |
import altair as alt
|
11 |
-
from vega_datasets import data
|
12 |
|
13 |
-
st.title(
|
14 |
|
15 |
-
st.text("The URL for this app is: https://huggingface.co/spaces/
|
16 |
|
17 |
-
|
|
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
)
|
23 |
-
|
24 |
|
25 |
-
#
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
-
#
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
.
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
)
|
45 |
-
.properties(width=550, height=300)
|
46 |
-
.add_params(brush)
|
47 |
-
.transform_filter(click)
|
48 |
)
|
49 |
|
50 |
-
|
51 |
-
bars = (
|
52 |
-
alt.Chart()
|
53 |
-
.mark_bar()
|
54 |
-
.encode(
|
55 |
-
x="count()",
|
56 |
-
y="weather:N",
|
57 |
-
color=alt.condition(click, color, alt.value("lightgray")),
|
58 |
-
)
|
59 |
-
.transform_filter(brush)
|
60 |
-
.properties(
|
61 |
-
width=550,
|
62 |
-
)
|
63 |
-
.add_params(click)
|
64 |
-
)
|
65 |
|
66 |
-
|
67 |
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# put streamlit code here as needed
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
import pandas as pd
|
4 |
import streamlit as st
|
5 |
import altair as alt
|
|
|
6 |
|
7 |
+
st.title('My First Streamlit App for IS 445 Data Viz Homework 6')
|
8 |
|
9 |
+
st.text("The URL for this app is: https://huggingface.co/spaces/jnaiman/is445_demo")
|
10 |
|
11 |
+
ufo_url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
|
12 |
+
st.text(f"The URL for the raw UFO data is {ufo_url}")
|
13 |
+
column_names = ['Date', 'City', 'State', 'Country', 'Shape', 'Duration(s)', 'Duration Time', 'Description', 'Report Date', 'Latitude', 'Longitude']
|
14 |
+
df = pd.read_csv(ufo_url, header=None, names=column_names)
|
15 |
|
16 |
+
# Preprocessing
|
17 |
+
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y %H:%M', errors='coerce')
|
18 |
+
df_new = df[(df['Date'].dt.year>=2010) & (df['Country']=='us') & (df['State']=='il')]
|
19 |
+
df_new['YearMonth'] = df_new['Date'].dt.to_period('M').dt.to_timestamp()
|
20 |
+
df_new['Shape'] = df_new['Shape'].replace(['null', 'unknown'], 'other').fillna('other')
|
21 |
|
22 |
+
# Plot 1
|
23 |
+
monthly_counts = df_new.groupby('YearMonth').size().reset_index(name='Counts')
|
24 |
+
brush = alt.selection_interval(encodings=['x'], empty='all')
|
25 |
+
line = alt.Chart(monthly_counts).mark_line(point=True).encode(
|
26 |
+
x=alt.X('YearMonth:T', axis=alt.Axis(format='%Y-%m', title='Date (Year-Month)')),
|
27 |
+
y=alt.Y('Counts:Q', title='Number of UFO Sightings'),
|
28 |
+
tooltip=[alt.Tooltip('YearMonth:T', title='Month-Year', format='%b-%Y'), 'Counts']
|
29 |
+
).properties(
|
30 |
+
width=1000,
|
31 |
+
height=300,
|
32 |
+
title='Monthly UFO Sightings after 2010'
|
33 |
+
).add_params(brush)
|
34 |
|
35 |
+
# Plot 2
|
36 |
+
scatter = alt.Chart(df_new).transform_calculate(
|
37 |
+
log_duration='log(datum["Duration(s)"] + 1)'
|
38 |
+
).mark_circle(size=50, opacity=0.5).encode(
|
39 |
+
x = alt.X('Longitude:Q', title='Longitude', scale=alt.Scale(domain=[-92, -87])),
|
40 |
+
y = alt.Y('Latitude:Q', title='Latitude', scale=alt.Scale(domain=[37, 43])),
|
41 |
+
color=alt.Color('Shape:N', scale=alt.Scale(scheme='category20')),
|
42 |
+
size = alt.Size('log_duration:Q', title='Log Duration(s)', scale=alt.Scale(range=[10,300])),
|
43 |
+
tooltip=['Date', 'City', 'Shape', 'Duration(s)']
|
44 |
+
).transform_filter(brush).properties(
|
45 |
+
width=400,
|
46 |
+
height=500,
|
47 |
+
title='Location Distribution of UFO Sightings in IL after 2010 (Selected Time Range)'
|
|
|
|
|
|
|
|
|
48 |
)
|
49 |
|
50 |
+
chart = alt.vconcat(line, scatter).resolve_legend(color='independent', size='independent')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
st.altair_chart(chart, theme="streamlit", use_container_width=True)
|
53 |
|
54 |
+
st.text("For the first visualization, plot 1 'Monthly UFO Sightings after 2010', \
|
55 |
+
I created a line chart highlighting the changes in UFO sightings in Illinois State over time. \
|
56 |
+
The temporal data 'Year-Month' was chosen as x-axis to help understand the monthly fluctuations. \
|
57 |
+
Points on the line and tooltips help audiences identify specifc monthly counts easily. \
|
58 |
+
For colors and marks, I employed standard line and markers without additional distracting color encodings to keep it clean. \
|
59 |
+
A brush selection is applied to this graph, letting user to choose the time interval and then update the corresponding data in plot 2 to display only the sightings reported within the chosen period. \
|
60 |
+
If I had more time, I would consider adding dropdowns to let users select the year and state they are interested in, allowing them to explore more on the UFO dataset. \
|
61 |
+
There is no overlap with the analysis done for HW5 in this plot.")
|
62 |
|
63 |
+
st.text("In the second visualization, the geographical distribution, shape, and duration time of these UFO sightings in Illinois after 2010 are shown. \
|
64 |
+
I chose scatter plot since the longitude and latitude can be clearly visualized within Illinois coordinates. (The scales for x axis and y axis are selected according to Illinois coordinates.)\
|
65 |
+
Each point represents a sighting, colored categorically by the shape (see legends). \
|
66 |
+
Category20 is used for scale in color since there are 20 different types of shapes. \
|
67 |
+
And the size of points represent the logarithmic duration of sightings. I took the log on duration because the distribution of duration time is highly skewed right (most are short, fewer exceptionally long, so a log scale is better for viz). \
|
68 |
+
With the interactive filtering in the first plot, users will only see sightings corresponding to the selected months. \
|
69 |
+
With more time, I would first integrate a grographic basemap that can help user navigate specific geographic regions. \
|
70 |
+
Additionally, I might want to apply a click selection to this graph that enables users to choose specific shapes or duration time directly on this scatter plot, \
|
71 |
+
letting them explore potential relationships between UFO shapes and sighting durations, as well as the locations. \
|
72 |
+
There is no overlap with the analysis done for HW5 in this plot.")
|
requirements.txt
CHANGED
@@ -1,3 +1,7 @@
|
|
1 |
-
|
|
|
2 |
altair
|
3 |
-
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
streamlit==1.39.0
|
3 |
altair
|
4 |
+
numpy
|
5 |
+
pandas
|
6 |
+
matplotlib
|
7 |
+
'''
|