Spaces:

Thiloid
/

VotoMatcher

Sleeping

App Files Files Community

VotoMatcher / app.py

Thiloid

Update app.py

811d734 verified about 2 months ago

raw

history blame contribute delete

12.3 kB

	import streamlit as st
	import json
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt

	footer="""<style>
	a:link , a:visited{
	color: blue;
	background-color: transparent;
	text-decoration: underline;
	}

	a:hover, a:active {
	color: red;
	background-color: transparent;
	text-decoration: underline;
	}

	.footer {
	position: fixed;
	left: 0;
	bottom: 0;
	width: 100%;
	background-color: cornflowerblue;
	color: black;
	text-align: center;
	}
	</style>
	<div class="footer">
	<p>This app was developed by <a href="https://www.voto.vote" target="_blank">VOTO</a>.
	Developer: <a href="https://linkedin.com/in/thilo-dieing-bba5b2253" target="_blank">Thilo I. Dieing, M.Sc.</a></p>
	</div>
	"""
	st.markdown(footer,unsafe_allow_html=True)
	def add_logo():
	st.markdown(
	"""
	<style>

	[data-testid="stSidebarNav"]::before {
	content: "Uni Mannheim ASR Team Project";
	margin-left: 20px;
	margin-top: 20px;
	font-size: 20px;
	position: relative;
	top: 100px;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)
	# functions



	add_logo()
	col1, col2 = st.columns(2)
	with col1:
	st.image("voto_purple.png", width=300)
	with col2:
	st.image("tuda_logo.tif", width=300)

	# Title of the app
	st.title("VOTO party insights")

	# Allow the user to upload a file
	uploaded_file = st.file_uploader("Upload the JSON file with your VOTO party answers", type=["json"])

	# Check if a file is uploaded
	if uploaded_file is not None:
	try:
	# Try to load the uploaded file as JSON
	file_content = uploaded_file.read()
	json_data = json.loads(file_content)

	# If the file is valid JSON, show a success message

	# Convert the JSON data to a DataFrame
	if isinstance(json_data, list): # Expecting a list of dictionaries for a proper DataFrame
	df = pd.DataFrame(json_data)
	election=str(df["instance"].iloc[0])
	statements = str(df["statement"].nunique())
	statement_n = df["statement"].nunique()
	try:
	parties = str(df["party_name"].nunique())
	except KeyError:
	parties = str(df["party"].nunique())
	df["party_name"] = df["party"]

	st.success("You have uploaded the party answers for the VOTO instance: "+election+" ("+statements+" statements; "+parties+" Parties)")

	expander1 = st.expander("Statement Selection")
	pivot_df = df.pivot(index='statement', columns='party_name', values='valuation')
	# std
	df['std_deviation'] = df.groupby('statement')['valuation'].transform('std')
	# how many differnet postions
	df['unique_postions'] = df.groupby('statement')['valuation'].transform('nunique')
	# how many neutrals
	df['n_neutral'] = df.groupby('statement')['valuation'].transform(lambda x: (x == 50).sum())
	# standalone party postions
	value_counts = df.groupby(['statement', 'valuation']).size().reset_index(name='Count')
	unique_values = value_counts[value_counts['Count'] == 1]
	single_counts = unique_values.groupby('statement').size().reset_index(name='SingleCount')

	df = df.merge(single_counts, on='statement', how='left')
	df['SingleCount'] = df['SingleCount'].fillna(0).astype(int)
	#std without neutral
	df_filtered = df[df['valuation'] != 50]
	df_filtered['std_deviation2'] = df_filtered.groupby('statement')['valuation'].transform('std')
	df2= df_filtered[["statement",'std_deviation2' ]].drop_duplicates()
	df = df.merge(df2, on='statement', how='left', suffixes=('', '_new'))

	sorted_single_counts = df[['statement', 'std_deviation']].drop_duplicates()
	sorted_single_counts['AdjustedRank1'] = sorted_single_counts['std_deviation'].rank(method='dense', ascending=False).astype(int) - 1
	df = df.merge(sorted_single_counts[['statement', 'AdjustedRank1']], on='statement', how='left', suffixes=('', '_new'))

	sorted_single_counts = df[['statement', 'unique_postions']].drop_duplicates()
	sorted_single_counts['AdjustedRank2'] = sorted_single_counts['unique_postions'].rank(method='dense', ascending=False).astype(int) - 1
	df = df.merge(sorted_single_counts[['statement', 'AdjustedRank2']], on='statement', how='left', suffixes=('', '_new'))

	sorted_single_counts = df[['statement', 'n_neutral']].drop_duplicates()
	sorted_single_counts['AdjustedRank3'] = sorted_single_counts['n_neutral'].rank(method='dense').astype(int) - 1
	df = df.merge(sorted_single_counts[['statement', 'AdjustedRank3']], on='statement', how='left', suffixes=('', '_new'))

	sorted_single_counts = df[['statement', 'SingleCount']].drop_duplicates()
	sorted_single_counts['AdjustedRank4'] = sorted_single_counts['SingleCount'].rank(method='dense', ascending=False).astype(int) - 1
	df = df.merge(sorted_single_counts[['statement', 'AdjustedRank4']], on='statement', how='left', suffixes=('', '_new'))

	sorted_single_counts = df[['statement', 'std_deviation2']].drop_duplicates()
	sorted_single_counts['AdjustedRank5'] = sorted_single_counts['std_deviation2'].rank(method='dense', ascending=False).astype(int) - 1
	df = df.merge(sorted_single_counts[['statement', 'AdjustedRank5']], on='statement', how='left', suffixes=('', '_new'))

	df["statement_importance"]= (df["AdjustedRank1"]+df["AdjustedRank2"]+df["AdjustedRank3"]+df["AdjustedRank4"]+df["AdjustedRank5"])/5
	df_pres= df[['statement',"statement_importance"]].drop_duplicates()
	df_sorted1 = df_pres.sort_values(by='statement_importance')
	df_sorted1.reset_index(drop=True, inplace=True)

	df_sorted = df.sort_values(by='statement_importance')
	df_sorted.reset_index(drop=True, inplace=True)
	statn = expander1.slider("How many statements do you want in your final VOTO? You currently have "+statements+ " statements." , 0, int(statements),0)
	if statn !=0:
	expander1.write("Based on that our metric recommends keeping the following " +str(statn) +" statements:")
	lstat=df_sorted1['statement'].tolist()
	lstat1=lstat[:statn]
	i=0
	for l in lstat1:
	i=i+1
	expander1.markdown("<span style='color: green;'>"+str(i)+": "+l+"</span>", unsafe_allow_html=True)
	expander1.write("While the metric recommends dropping "+str(statement_n-statn) +" statements due to a lack of difference:")
	lstat=df_sorted1['statement'].tolist()
	lstat1=lstat[statn:]
	i=statn
	for l in lstat1:
	i=i+1
	expander1.markdown("<span style='color: red;'>"+str(i)+": "+l+"</span>", unsafe_allow_html=True)
	if expander1.button("Additional information on individual metric scores"):
	expander1.dataframe(df_sorted)

	expander2 = st.expander("Party Positions")
	df = pd.DataFrame(json_data)
	try:
	parties = str(df["party_name"].nunique())
	except KeyError:
	parties = str(df["party"].nunique())
	df["party_name"] = df["party"]

	unique_party_names = df['party_name'].unique()
	unique_party_names = [""] + list(unique_party_names) # Add an empty option
	selected_party = expander2.selectbox("Select a Party", unique_party_names)
	contains_25_or_75 = (25 in df['valuation'].values) or (75 in df['valuation'].values)
	if contains_25_or_75:
	y_ticks = [0, 25, 50, 75, 100] # Set y-ticks for scale 1 to 5
	y_tick_labels = ['Strong Disagreement', 'Disagreement', 'Neutral', 'Agreement', 'Strong Agreement'] # Custom labels
	else:
	y_ticks = [0, 50, 100] # Set y-ticks for scale 1 to 3
	y_tick_labels = ['Disagreement', 'Neutral', 'Agreement']
	if selected_party: # Check if a party has been selected
	df['short_text'] = df['statement'].apply(lambda x: ' '.join(x.split()[:11]))
	filtered_df = df[df['party_name'] == selected_party]
	fig, ax = plt.subplots(figsize=(6, 20))
	ax.scatter(filtered_df['valuation'], filtered_df['short_text'], color='purple', s=100) # s is the size of points
	ax.set_xlabel('Valuation',fontsize=16)
	ax.set_ylabel('Statements',fontsize=16)
	ax.set_title('Valuation by Statement',fontsize=16)
	ax.set_xticks(y_ticks) # Set y-ticks dynamically based on condition
	ax.tick_params(axis='y', labelsize=16)
	ax.tick_params(axis='x', labelsize=16)
	ax.set_xticklabels(y_tick_labels)
	plt.xticks(rotation=45)
	ax.grid(True)
	expander2.pyplot(fig)
	expander3 = st.expander("Unique Party Positions")
	df = pd.DataFrame(json_data)
	try:
	parties = str(df["party_name"].nunique())
	except KeyError:
	parties = str(df["party"].nunique())
	df["party_name"] = df["party"]

	unique_party_names = df['party_name'].unique()
	unique_party_names = [""] + list(unique_party_names) # Add an empty option
	selected_party = expander3.selectbox("Select a Party", unique_party_names, key="2")
	contains_25_or_75 = (25 in df['valuation'].values) or (75 in df['valuation'].values)
	if contains_25_or_75:
	y_ticks = [0, 25, 50, 75, 100] # Set y-ticks for scale 1 to 5
	y_tick_labels = ['Strong Disagreement', 'Disagreement', 'Neutral', 'Agreement', 'Strong Agreement'] # Custom labels
	else:
	y_ticks = [0, 50, 100] # Set y-ticks for scale 1 to 3
	y_tick_labels = ['Disagreement', 'Neutral', 'Agreement']
	if selected_party: # Check if a party has been selected
	value_counts = df.groupby(['statement', 'valuation']).size().reset_index(name='Count')
	unique_values = value_counts[value_counts['Count'] == 1]
	unique_parties = df.merge(unique_values[['statement', 'valuation']], on=['statement', 'valuation'])
	unique_parties_result = unique_parties[['statement', 'party_name', 'valuation']]
	unique_parties_result['short_text'] = unique_parties_result['statement'].apply(lambda x: ' '.join(x.split()[:11]))
	filtered_unique_parties_result = unique_parties_result[unique_parties_result['party_name'] == selected_party]
	fig, ax = plt.subplots(figsize=(6, 12))
	ax.scatter(filtered_unique_parties_result['valuation'], filtered_unique_parties_result['short_text'], color='purple', s=100) # s is the size of points
	ax.set_xlabel('Valuation',fontsize=16)
	ax.set_ylabel('Statements',fontsize=16)
	ax.set_title('Valuation by Statement',fontsize=16)
	ax.set_xticks(y_ticks) # Set y-ticks dynamically based on condition
	ax.tick_params(axis='y', labelsize=16)
	ax.tick_params(axis='x', labelsize=16)
	ax.set_xticklabels(y_tick_labels)
	plt.xticks(rotation=45)
	ax.grid(True)
	expander3.pyplot(fig)



	else:
	st.warning("The JSON file structure is not suitable for DataFrame conversion. It should be a list of dictionaries.")

	except json.JSONDecodeError:
	# If the file is not a valid JSON, show an error message
	st.error("This is not a valid JSON file. Please upload a valid JSON.")
	else:
	st.info("Please upload a JSON file to get started.")