import streamlit as st import json import pandas as pd import numpy as np import matplotlib.pyplot as plt footer=""" """ st.markdown(footer,unsafe_allow_html=True) def add_logo(): st.markdown( """ """, unsafe_allow_html=True, ) # functions add_logo() col1, col2 = st.columns(2) with col1: st.image("voto_purple.png", width=300) with col2: st.image("tuda_logo.tif", width=300) # Title of the app st.title("VOTO party insights") # Allow the user to upload a file uploaded_file = st.file_uploader("Upload the JSON file with your VOTO party answers", type=["json"]) # Check if a file is uploaded if uploaded_file is not None: try: # Try to load the uploaded file as JSON file_content = uploaded_file.read() json_data = json.loads(file_content) # If the file is valid JSON, show a success message # Convert the JSON data to a DataFrame if isinstance(json_data, list): # Expecting a list of dictionaries for a proper DataFrame df = pd.DataFrame(json_data) election=str(df["instance"].iloc[0]) statements = str(df["statement"].nunique()) statement_n = df["statement"].nunique() try: parties = str(df["party_name"].nunique()) except KeyError: parties = str(df["party"].nunique()) df["party_name"] = df["party"] st.success("You have uploaded the party answers for the VOTO instance: "+election+" ("+statements+" statements; "+parties+" Parties)") expander1 = st.expander("Statement Selection") pivot_df = df.pivot(index='statement', columns='party_name', values='valuation') # std df['std_deviation'] = df.groupby('statement')['valuation'].transform('std') # how many differnet postions df['unique_postions'] = df.groupby('statement')['valuation'].transform('nunique') # how many neutrals df['n_neutral'] = df.groupby('statement')['valuation'].transform(lambda x: (x == 50).sum()) # standalone party postions value_counts = df.groupby(['statement', 'valuation']).size().reset_index(name='Count') unique_values = value_counts[value_counts['Count'] == 1] single_counts = unique_values.groupby('statement').size().reset_index(name='SingleCount') df = df.merge(single_counts, on='statement', how='left') df['SingleCount'] = df['SingleCount'].fillna(0).astype(int) #std without neutral df_filtered = df[df['valuation'] != 50] df_filtered['std_deviation2'] = df_filtered.groupby('statement')['valuation'].transform('std') df2= df_filtered[["statement",'std_deviation2' ]].drop_duplicates() df = df.merge(df2, on='statement', how='left', suffixes=('', '_new')) sorted_single_counts = df[['statement', 'std_deviation']].drop_duplicates() sorted_single_counts['AdjustedRank1'] = sorted_single_counts['std_deviation'].rank(method='dense', ascending=False).astype(int) - 1 df = df.merge(sorted_single_counts[['statement', 'AdjustedRank1']], on='statement', how='left', suffixes=('', '_new')) sorted_single_counts = df[['statement', 'unique_postions']].drop_duplicates() sorted_single_counts['AdjustedRank2'] = sorted_single_counts['unique_postions'].rank(method='dense', ascending=False).astype(int) - 1 df = df.merge(sorted_single_counts[['statement', 'AdjustedRank2']], on='statement', how='left', suffixes=('', '_new')) sorted_single_counts = df[['statement', 'n_neutral']].drop_duplicates() sorted_single_counts['AdjustedRank3'] = sorted_single_counts['n_neutral'].rank(method='dense').astype(int) - 1 df = df.merge(sorted_single_counts[['statement', 'AdjustedRank3']], on='statement', how='left', suffixes=('', '_new')) sorted_single_counts = df[['statement', 'SingleCount']].drop_duplicates() sorted_single_counts['AdjustedRank4'] = sorted_single_counts['SingleCount'].rank(method='dense', ascending=False).astype(int) - 1 df = df.merge(sorted_single_counts[['statement', 'AdjustedRank4']], on='statement', how='left', suffixes=('', '_new')) sorted_single_counts = df[['statement', 'std_deviation2']].drop_duplicates() sorted_single_counts['AdjustedRank5'] = sorted_single_counts['std_deviation2'].rank(method='dense', ascending=False).astype(int) - 1 df = df.merge(sorted_single_counts[['statement', 'AdjustedRank5']], on='statement', how='left', suffixes=('', '_new')) df["statement_importance"]= (df["AdjustedRank1"]+df["AdjustedRank2"]+df["AdjustedRank3"]+df["AdjustedRank4"]+df["AdjustedRank5"])/5 df_pres= df[['statement',"statement_importance"]].drop_duplicates() df_sorted1 = df_pres.sort_values(by='statement_importance') df_sorted1.reset_index(drop=True, inplace=True) df_sorted = df.sort_values(by='statement_importance') df_sorted.reset_index(drop=True, inplace=True) statn = expander1.slider("How many statements do you want in your final VOTO? You currently have **"+statements+ "** statements." , 0, int(statements),0) if statn !=0: expander1.write("**Based on that our metric recommends keeping the following " +str(statn) +" statements:**") lstat=df_sorted1['statement'].tolist() lstat1=lstat[:statn] i=0 for l in lstat1: i=i+1 expander1.markdown(""+str(i)+": "+l+"", unsafe_allow_html=True) expander1.write("**While the metric recommends dropping "+str(statement_n-statn) +" statements due to a lack of difference:**") lstat=df_sorted1['statement'].tolist() lstat1=lstat[statn:] i=statn for l in lstat1: i=i+1 expander1.markdown(""+str(i)+": "+l+"", unsafe_allow_html=True) if expander1.button("Additional information on individual metric scores"): expander1.dataframe(df_sorted) expander2 = st.expander("Party Positions") df = pd.DataFrame(json_data) try: parties = str(df["party_name"].nunique()) except KeyError: parties = str(df["party"].nunique()) df["party_name"] = df["party"] unique_party_names = df['party_name'].unique() unique_party_names = [""] + list(unique_party_names) # Add an empty option selected_party = expander2.selectbox("Select a Party", unique_party_names) contains_25_or_75 = (25 in df['valuation'].values) or (75 in df['valuation'].values) if contains_25_or_75: y_ticks = [0, 25, 50, 75, 100] # Set y-ticks for scale 1 to 5 y_tick_labels = ['Strong Disagreement', 'Disagreement', 'Neutral', 'Agreement', 'Strong Agreement'] # Custom labels else: y_ticks = [0, 50, 100] # Set y-ticks for scale 1 to 3 y_tick_labels = ['Disagreement', 'Neutral', 'Agreement'] if selected_party: # Check if a party has been selected df['short_text'] = df['statement'].apply(lambda x: ' '.join(x.split()[:11])) filtered_df = df[df['party_name'] == selected_party] fig, ax = plt.subplots(figsize=(6, 20)) ax.scatter(filtered_df['valuation'], filtered_df['short_text'], color='purple', s=100) # s is the size of points ax.set_xlabel('Valuation',fontsize=16) ax.set_ylabel('Statements',fontsize=16) ax.set_title('Valuation by Statement',fontsize=16) ax.set_xticks(y_ticks) # Set y-ticks dynamically based on condition ax.tick_params(axis='y', labelsize=16) ax.tick_params(axis='x', labelsize=16) ax.set_xticklabels(y_tick_labels) plt.xticks(rotation=45) ax.grid(True) expander2.pyplot(fig) expander3 = st.expander("Unique Party Positions") df = pd.DataFrame(json_data) try: parties = str(df["party_name"].nunique()) except KeyError: parties = str(df["party"].nunique()) df["party_name"] = df["party"] unique_party_names = df['party_name'].unique() unique_party_names = [""] + list(unique_party_names) # Add an empty option selected_party = expander3.selectbox("Select a Party", unique_party_names, key="2") contains_25_or_75 = (25 in df['valuation'].values) or (75 in df['valuation'].values) if contains_25_or_75: y_ticks = [0, 25, 50, 75, 100] # Set y-ticks for scale 1 to 5 y_tick_labels = ['Strong Disagreement', 'Disagreement', 'Neutral', 'Agreement', 'Strong Agreement'] # Custom labels else: y_ticks = [0, 50, 100] # Set y-ticks for scale 1 to 3 y_tick_labels = ['Disagreement', 'Neutral', 'Agreement'] if selected_party: # Check if a party has been selected value_counts = df.groupby(['statement', 'valuation']).size().reset_index(name='Count') unique_values = value_counts[value_counts['Count'] == 1] unique_parties = df.merge(unique_values[['statement', 'valuation']], on=['statement', 'valuation']) unique_parties_result = unique_parties[['statement', 'party_name', 'valuation']] unique_parties_result['short_text'] = unique_parties_result['statement'].apply(lambda x: ' '.join(x.split()[:11])) filtered_unique_parties_result = unique_parties_result[unique_parties_result['party_name'] == selected_party] fig, ax = plt.subplots(figsize=(6, 12)) ax.scatter(filtered_unique_parties_result['valuation'], filtered_unique_parties_result['short_text'], color='purple', s=100) # s is the size of points ax.set_xlabel('Valuation',fontsize=16) ax.set_ylabel('Statements',fontsize=16) ax.set_title('Valuation by Statement',fontsize=16) ax.set_xticks(y_ticks) # Set y-ticks dynamically based on condition ax.tick_params(axis='y', labelsize=16) ax.tick_params(axis='x', labelsize=16) ax.set_xticklabels(y_tick_labels) plt.xticks(rotation=45) ax.grid(True) expander3.pyplot(fig) else: st.warning("The JSON file structure is not suitable for DataFrame conversion. It should be a list of dictionaries.") except json.JSONDecodeError: # If the file is not a valid JSON, show an error message st.error("This is not a valid JSON file. Please upload a valid JSON.") else: st.info("Please upload a JSON file to get started.")