Spaces:
Sleeping
Sleeping
File size: 9,852 Bytes
06bb199 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
# Streamlit app to visualize homoglyphs alarm experiment results
# This app lets users interactively explore experiment results stored in timestamped results folders
# It loads the latest results by default, but allows selection of other runs
import streamlit as st
import os
import glob
import pandas as pd
import yaml
import matplotlib.pyplot as plt
from matplotlib import font_manager
import pycountry
import re
# Set Streamlit theme and custom font via config.toml (no manual CSS needed)
st.set_page_config(
page_title="Homoglyphs Alarm Results", page_icon="📊", layout="centered"
)
# Set matplotlib font and color palette
font_path = "IBMPlexSans-Regular.ttf"
font_manager.fontManager.addfont(font_path)
plt.rcParams["font.family"] = "IBM Plex Sans"
plt.rcParams["axes.prop_cycle"] = plt.cycler(
color=["#F600FF", "#FF0000", "#00FBFF", "#00AAEC", "#0034A3"]
)
# Helper to get all result folders sorted by timestamp (descending)
def get_result_folders(base_dir="results"):
folders = [
os.path.join(base_dir, d)
for d in os.listdir(base_dir)
if os.path.isdir(os.path.join(base_dir, d))
]
folders = sorted(folders, reverse=True)
return folders
# Helper to load YAML parameters
def load_parameters(yaml_path):
with open(yaml_path, "r") as f:
return yaml.safe_load(f)
# Helper to load CSVs
def load_csv(csv_path):
# The first column is the row index, so set index_col=0 and drop it
return pd.read_csv(csv_path, index_col=0)
# Map ISO language codes to human names
def iso_to_name(lang_code):
try:
return pycountry.languages.get(alpha_2=lang_code).name
except Exception:
if lang_code == "iw":
return "Hebrew"
if lang_code == "language_agnostic":
return "Language Agnostic"
return lang_code
# Main app logic
def main():
st.title("Homoglyphs Alarm Experiment Results Viewer")
# Find all result folders
result_folders = get_result_folders()
if not result_folders:
st.error("No results found. Please run experiments first.")
return
# Always use the latest results folder
folder = result_folders[0]
# Load parameters
param_path = os.path.join(folder, "parameters.yaml")
if not os.path.exists(param_path):
st.error(f"parameters.yaml not found in {folder}")
return
params = load_parameters(param_path)
st.sidebar.header("Run Parameters")
# Parameter descriptions for user-friendly sidebar
param_descriptions = {
"LIST_OF_PERCENTAGES": "List of percentages of text replaced with homoglyphs in the experiments.",
"MAX_NUM_OF_EXAMPLES_PER_LANG": "Maximum number of examples per language included in the analysis.",
"NUMBER_OF_TIMES_TO_RUN_PROFILING": "Number of times each alarm is run for profiling (timing) purposes.",
"NUMBER_OF_TEXTS_TO_PROFILE": "Number of texts used for profiling the alarms.",
"LIMIT_TEXTS_MAX_CHARACTERS": "Maximum number of characters per text sample.",
"LANGS_TO_USE": "Languages included in the experiments (ISO codes).",
"ALARM_TYPES_CONFIGURED": "Configured alarm types (methods for detecting homoglyph attacks).",
}
# Try to get alarm type display names from parameters if available
alarm_type_display = None
for k in params:
if k.upper() == "ALARM_TYPES_CONFIGURED" and isinstance(params[k], dict):
alarm_type_display = params[k]
break
if k.upper() == "ATTACK_TYPES_CONFIGURED" and isinstance(params[k], dict):
alarm_type_display = params[k]
break
def get_alarm_display_name(alarm_type):
if alarm_type_display and alarm_type in alarm_type_display:
return alarm_type_display[alarm_type]
return alarm_type.replace("_", " ").capitalize()
def prettify_param_name(name):
# Replace underscores with spaces, capitalize, and handle ALL_CAPS
name = re.sub(r"_+", " ", name)
name = name.strip().capitalize()
# If all uppercase, just capitalize first letter
if name.isupper():
name = name.capitalize()
return name
for k, v in params.items():
desc = param_descriptions.get(k, None)
if desc:
st.sidebar.write(f"**{prettify_param_name(k)}**: {v}")
st.sidebar.caption(desc)
else:
st.sidebar.write(f"**{prettify_param_name(k)}**: {v}")
# Load results
results_csv = os.path.join(folder, "results.csv")
agg_csv = os.path.join(folder, "aggregates.csv")
if not os.path.exists(results_csv) or not os.path.exists(agg_csv):
st.error("results.csv or aggregates.csv not found in selected folder.")
return
df_results = load_csv(results_csv)
df_agg = load_csv(agg_csv)
# Defensive: ensure 'lang' column exists and is not all NaN
if "lang" not in df_results.columns or df_results["lang"].isnull().all():
st.error(
"No language information found in results.csv. Please check your experiment output."
)
return
# Map ISO language codes to human names (fix KeyError)
if "lang_name" not in df_results.columns:
df_results["lang_name"] = df_results["lang"].apply(iso_to_name)
if "lang" in df_agg.columns and "lang_name" not in df_agg.columns:
df_agg["lang_name"] = df_agg["lang"].apply(iso_to_name)
# Load profiling results if available
profiling_csv = os.path.join(folder, "profiling.csv")
df_profiling = None
if os.path.exists(profiling_csv):
df_profiling = load_csv(profiling_csv)
tab1, tab2, tab3 = st.tabs(
["Language-centric view", "Alarm-centric view", "Profiling results"]
)
# --- Tab 1: Language-centric view ---
with tab1:
st.header(
"Language-centric: Compare alarms and percentages for a given language"
)
language_names = df_results["lang_name"].unique().tolist()
lang_name = st.selectbox("Language:", language_names, key="lang_tab2")
lang = None
for code in df_results["lang"].unique():
if iso_to_name(code) == lang_name:
lang = code
break
filtered = df_results[df_results["lang"] == lang]
st.subheader(f"AUC by Alarm and Percentage for {lang_name}")
# Show human-friendly alarm names in the table
filtered_disp = filtered.copy()
filtered_disp["alarm_display"] = filtered_disp["alarm_type"].apply(
get_alarm_display_name
)
st.dataframe(
filtered_disp[["alarm_display", "percentage", "auc"]]
.rename(columns={"alarm_display": "Alarm type"})
.sort_values(["Alarm type", "percentage"])
)
# Plot
fig, ax = plt.subplots()
for alarm in filtered["alarm_type"].unique():
sub = filtered[filtered["alarm_type"] == alarm]
ax.plot(
sub["percentage"],
sub["auc"],
marker="o",
label=get_alarm_display_name(alarm),
)
ax.set_xlabel("Percentage of text replaced")
ax.set_ylabel("AUC (Area Under Curve)")
ax.set_title(f"AUC by Alarm for {lang_name}")
ax.legend(title="Alarm type")
st.pyplot(fig)
# --- Tab 2: Alarm-centric view ---
with tab2:
st.header("Alarm-centric: Compare languages for a given alarm and percentage")
alarm_types = df_results["alarm_type"].unique().tolist()
alarm = st.selectbox(
"Alarm type:",
alarm_types,
key="alarm_tab1",
help="Select the alarm (detection method) to analyze.",
format_func=get_alarm_display_name,
)
percentages = sorted(df_results["percentage"].unique())
perc = st.selectbox(
"Percentage:",
percentages,
key="perc_tab1",
help="Select the percentage of text replaced with homoglyphs.",
)
filtered = df_results[
(df_results["alarm_type"] == alarm) & (df_results["percentage"] == perc)
]
st.subheader(f"AUC by Language for {get_alarm_display_name(alarm)} at {perc}")
st.dataframe(
filtered[["lang_name", "auc"]]
.sort_values("auc", ascending=False)
.reset_index(drop=True)
)
st.info(
f"As there are {len(df_results['lang'].unique())} languages, we can't show all of them in a chart. "
"Please use the Language-centric tab to explore individual languages."
)
# Chart removed for clarity due to too many languages
# --- Tab 3: Profiling results ---
with tab3:
st.header("Profiling Results: Alarm Execution Time and Efficiency")
if df_profiling is not None:
st.dataframe(df_profiling)
st.markdown(
"""
- **alarm**: The alarm type (method) being profiled.
- **total_time**: Total time taken for all runs (seconds).
- **number_of_runs**: Number of times the profiling was repeated.
- **number_of_texts**: Number of texts used in each profiling run.
- **time_per_run**: Average time per profiling run (seconds).
"""
)
# Optional: bar chart of time per run
fig, ax = plt.subplots()
ax.bar(df_profiling["alarm"], df_profiling["time_per_run"], color="#F600FF")
ax.set_xlabel("Alarm type")
ax.set_ylabel("Time per run (s)")
ax.set_title("Average Time per Profiling Run by Alarm Type")
st.pyplot(fig)
else:
st.info("No profiling results found for this run.")
if __name__ == "__main__":
main()
|