Spaces:
Sleeping
Sleeping
# Streamlit app to visualize homoglyphs alarm experiment results | |
# This app lets users interactively explore experiment results stored in timestamped results folders | |
# It loads the latest results by default, but allows selection of other runs | |
import streamlit as st | |
import os | |
import glob | |
import pandas as pd | |
import yaml | |
import matplotlib.pyplot as plt | |
from matplotlib import font_manager | |
import pycountry | |
import re | |
# Set Streamlit theme and custom font via config.toml (no manual CSS needed) | |
st.set_page_config( | |
page_title="Homoglyphs Alarm Results", page_icon="📊", layout="centered" | |
) | |
# Set matplotlib font and color palette | |
font_path = "IBMPlexSans-Regular.ttf" | |
font_manager.fontManager.addfont(font_path) | |
plt.rcParams["font.family"] = "IBM Plex Sans" | |
plt.rcParams["axes.prop_cycle"] = plt.cycler( | |
color=["#F600FF", "#FF0000", "#00FBFF", "#00AAEC", "#0034A3"] | |
) | |
# Helper to get all result folders sorted by timestamp (descending) | |
def get_result_folders(base_dir="results"): | |
folders = [ | |
os.path.join(base_dir, d) | |
for d in os.listdir(base_dir) | |
if os.path.isdir(os.path.join(base_dir, d)) | |
] | |
folders = sorted(folders, reverse=True) | |
return folders | |
# Helper to load YAML parameters | |
def load_parameters(yaml_path): | |
with open(yaml_path, "r") as f: | |
return yaml.safe_load(f) | |
# Helper to load CSVs | |
def load_csv(csv_path): | |
# The first column is the row index, so set index_col=0 and drop it | |
return pd.read_csv(csv_path, index_col=0) | |
# Map ISO language codes to human names | |
def iso_to_name(lang_code): | |
try: | |
return pycountry.languages.get(alpha_2=lang_code).name | |
except Exception: | |
if lang_code == "iw": | |
return "Hebrew" | |
if lang_code == "language_agnostic": | |
return "Language Agnostic" | |
return lang_code | |
# Main app logic | |
def main(): | |
st.title("Homoglyphs Alarm Experiment Results Viewer") | |
# Find all result folders | |
result_folders = get_result_folders() | |
if not result_folders: | |
st.error("No results found. Please run experiments first.") | |
return | |
# Always use the latest results folder | |
folder = result_folders[0] | |
# Load parameters | |
param_path = os.path.join(folder, "parameters.yaml") | |
if not os.path.exists(param_path): | |
st.error(f"parameters.yaml not found in {folder}") | |
return | |
params = load_parameters(param_path) | |
st.sidebar.header("Run Parameters") | |
# Parameter descriptions for user-friendly sidebar | |
param_descriptions = { | |
"LIST_OF_PERCENTAGES": "List of percentages of text replaced with homoglyphs in the experiments.", | |
"MAX_NUM_OF_EXAMPLES_PER_LANG": "Maximum number of examples per language included in the analysis.", | |
"NUMBER_OF_TIMES_TO_RUN_PROFILING": "Number of times each alarm is run for profiling (timing) purposes.", | |
"NUMBER_OF_TEXTS_TO_PROFILE": "Number of texts used for profiling the alarms.", | |
"LIMIT_TEXTS_MAX_CHARACTERS": "Maximum number of characters per text sample.", | |
"LANGS_TO_USE": "Languages included in the experiments (ISO codes).", | |
"ALARM_TYPES_CONFIGURED": "Configured alarm types (methods for detecting homoglyph attacks).", | |
} | |
# Try to get alarm type display names from parameters if available | |
alarm_type_display = None | |
for k in params: | |
if k.upper() == "ALARM_TYPES_CONFIGURED" and isinstance(params[k], dict): | |
alarm_type_display = params[k] | |
break | |
if k.upper() == "ATTACK_TYPES_CONFIGURED" and isinstance(params[k], dict): | |
alarm_type_display = params[k] | |
break | |
def get_alarm_display_name(alarm_type): | |
if alarm_type_display and alarm_type in alarm_type_display: | |
return alarm_type_display[alarm_type] | |
return alarm_type.replace("_", " ").capitalize() | |
def prettify_param_name(name): | |
# Replace underscores with spaces, capitalize, and handle ALL_CAPS | |
name = re.sub(r"_+", " ", name) | |
name = name.strip().capitalize() | |
# If all uppercase, just capitalize first letter | |
if name.isupper(): | |
name = name.capitalize() | |
return name | |
for k, v in params.items(): | |
desc = param_descriptions.get(k, None) | |
if desc: | |
st.sidebar.write(f"**{prettify_param_name(k)}**: {v}") | |
st.sidebar.caption(desc) | |
else: | |
st.sidebar.write(f"**{prettify_param_name(k)}**: {v}") | |
# Load results | |
results_csv = os.path.join(folder, "results.csv") | |
agg_csv = os.path.join(folder, "aggregates.csv") | |
if not os.path.exists(results_csv) or not os.path.exists(agg_csv): | |
st.error("results.csv or aggregates.csv not found in selected folder.") | |
return | |
df_results = load_csv(results_csv) | |
df_agg = load_csv(agg_csv) | |
# Defensive: ensure 'lang' column exists and is not all NaN | |
if "lang" not in df_results.columns or df_results["lang"].isnull().all(): | |
st.error( | |
"No language information found in results.csv. Please check your experiment output." | |
) | |
return | |
# Map ISO language codes to human names (fix KeyError) | |
if "lang_name" not in df_results.columns: | |
df_results["lang_name"] = df_results["lang"].apply(iso_to_name) | |
if "lang" in df_agg.columns and "lang_name" not in df_agg.columns: | |
df_agg["lang_name"] = df_agg["lang"].apply(iso_to_name) | |
# Load profiling results if available | |
profiling_csv = os.path.join(folder, "profiling.csv") | |
df_profiling = None | |
if os.path.exists(profiling_csv): | |
df_profiling = load_csv(profiling_csv) | |
tab1, tab2, tab3 = st.tabs( | |
["Language-centric view", "Alarm-centric view", "Profiling results"] | |
) | |
# --- Tab 1: Language-centric view --- | |
with tab1: | |
st.header( | |
"Language-centric: Compare alarms and percentages for a given language" | |
) | |
language_names = df_results["lang_name"].unique().tolist() | |
lang_name = st.selectbox("Language:", language_names, key="lang_tab2") | |
lang = None | |
for code in df_results["lang"].unique(): | |
if iso_to_name(code) == lang_name: | |
lang = code | |
break | |
filtered = df_results[df_results["lang"] == lang] | |
st.subheader(f"AUC by Alarm and Percentage for {lang_name}") | |
# Show human-friendly alarm names in the table | |
filtered_disp = filtered.copy() | |
filtered_disp["alarm_display"] = filtered_disp["alarm_type"].apply( | |
get_alarm_display_name | |
) | |
st.dataframe( | |
filtered_disp[["alarm_display", "percentage", "auc"]] | |
.rename(columns={"alarm_display": "Alarm type"}) | |
.sort_values(["Alarm type", "percentage"]) | |
) | |
# Plot | |
fig, ax = plt.subplots() | |
for alarm in filtered["alarm_type"].unique(): | |
sub = filtered[filtered["alarm_type"] == alarm] | |
ax.plot( | |
sub["percentage"], | |
sub["auc"], | |
marker="o", | |
label=get_alarm_display_name(alarm), | |
) | |
ax.set_xlabel("Percentage of text replaced") | |
ax.set_ylabel("AUC (Area Under Curve)") | |
ax.set_title(f"AUC by Alarm for {lang_name}") | |
ax.legend(title="Alarm type") | |
st.pyplot(fig) | |
# --- Tab 2: Alarm-centric view --- | |
with tab2: | |
st.header("Alarm-centric: Compare languages for a given alarm and percentage") | |
alarm_types = df_results["alarm_type"].unique().tolist() | |
alarm = st.selectbox( | |
"Alarm type:", | |
alarm_types, | |
key="alarm_tab1", | |
help="Select the alarm (detection method) to analyze.", | |
format_func=get_alarm_display_name, | |
) | |
percentages = sorted(df_results["percentage"].unique()) | |
perc = st.selectbox( | |
"Percentage:", | |
percentages, | |
key="perc_tab1", | |
help="Select the percentage of text replaced with homoglyphs.", | |
) | |
filtered = df_results[ | |
(df_results["alarm_type"] == alarm) & (df_results["percentage"] == perc) | |
] | |
st.subheader(f"AUC by Language for {get_alarm_display_name(alarm)} at {perc}") | |
st.dataframe( | |
filtered[["lang_name", "auc"]] | |
.sort_values("auc", ascending=False) | |
.reset_index(drop=True) | |
) | |
st.info( | |
f"As there are {len(df_results['lang'].unique())} languages, we can't show all of them in a chart. " | |
"Please use the Language-centric tab to explore individual languages." | |
) | |
# Chart removed for clarity due to too many languages | |
# --- Tab 3: Profiling results --- | |
with tab3: | |
st.header("Profiling Results: Alarm Execution Time and Efficiency") | |
if df_profiling is not None: | |
st.dataframe(df_profiling) | |
st.markdown( | |
""" | |
- **alarm**: The alarm type (method) being profiled. | |
- **total_time**: Total time taken for all runs (seconds). | |
- **number_of_runs**: Number of times the profiling was repeated. | |
- **number_of_texts**: Number of texts used in each profiling run. | |
- **time_per_run**: Average time per profiling run (seconds). | |
""" | |
) | |
# Optional: bar chart of time per run | |
fig, ax = plt.subplots() | |
ax.bar(df_profiling["alarm"], df_profiling["time_per_run"], color="#F600FF") | |
ax.set_xlabel("Alarm type") | |
ax.set_ylabel("Time per run (s)") | |
ax.set_title("Average Time per Profiling Run by Alarm Type") | |
st.pyplot(fig) | |
else: | |
st.info("No profiling results found for this run.") | |
if __name__ == "__main__": | |
main() | |