acmc commited on
Commit
06bb199
·
verified ·
1 Parent(s): 6e74762

Create visualization_app.py

Browse files
Files changed (1) hide show
  1. visualization_app.py +259 -0
visualization_app.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Streamlit app to visualize homoglyphs alarm experiment results
2
+ # This app lets users interactively explore experiment results stored in timestamped results folders
3
+ # It loads the latest results by default, but allows selection of other runs
4
+
5
+ import streamlit as st
6
+ import os
7
+ import glob
8
+ import pandas as pd
9
+ import yaml
10
+ import matplotlib.pyplot as plt
11
+ from matplotlib import font_manager
12
+ import pycountry
13
+ import re
14
+
15
+ # Set Streamlit theme and custom font via config.toml (no manual CSS needed)
16
+ st.set_page_config(
17
+ page_title="Homoglyphs Alarm Results", page_icon="📊", layout="centered"
18
+ )
19
+
20
+ # Set matplotlib font and color palette
21
+ font_path = "IBMPlexSans-Regular.ttf"
22
+ font_manager.fontManager.addfont(font_path)
23
+ plt.rcParams["font.family"] = "IBM Plex Sans"
24
+ plt.rcParams["axes.prop_cycle"] = plt.cycler(
25
+ color=["#F600FF", "#FF0000", "#00FBFF", "#00AAEC", "#0034A3"]
26
+ )
27
+
28
+
29
+ # Helper to get all result folders sorted by timestamp (descending)
30
+ def get_result_folders(base_dir="results"):
31
+ folders = [
32
+ os.path.join(base_dir, d)
33
+ for d in os.listdir(base_dir)
34
+ if os.path.isdir(os.path.join(base_dir, d))
35
+ ]
36
+ folders = sorted(folders, reverse=True)
37
+ return folders
38
+
39
+
40
+ # Helper to load YAML parameters
41
+ def load_parameters(yaml_path):
42
+ with open(yaml_path, "r") as f:
43
+ return yaml.safe_load(f)
44
+
45
+
46
+ # Helper to load CSVs
47
+ def load_csv(csv_path):
48
+ # The first column is the row index, so set index_col=0 and drop it
49
+ return pd.read_csv(csv_path, index_col=0)
50
+
51
+
52
+ # Map ISO language codes to human names
53
+ def iso_to_name(lang_code):
54
+ try:
55
+ return pycountry.languages.get(alpha_2=lang_code).name
56
+ except Exception:
57
+ if lang_code == "iw":
58
+ return "Hebrew"
59
+ if lang_code == "language_agnostic":
60
+ return "Language Agnostic"
61
+ return lang_code
62
+
63
+
64
+ # Main app logic
65
+ def main():
66
+ st.title("Homoglyphs Alarm Experiment Results Viewer")
67
+
68
+ # Find all result folders
69
+ result_folders = get_result_folders()
70
+ if not result_folders:
71
+ st.error("No results found. Please run experiments first.")
72
+ return
73
+
74
+ # Always use the latest results folder
75
+ folder = result_folders[0]
76
+
77
+ # Load parameters
78
+ param_path = os.path.join(folder, "parameters.yaml")
79
+ if not os.path.exists(param_path):
80
+ st.error(f"parameters.yaml not found in {folder}")
81
+ return
82
+ params = load_parameters(param_path)
83
+ st.sidebar.header("Run Parameters")
84
+
85
+ # Parameter descriptions for user-friendly sidebar
86
+ param_descriptions = {
87
+ "LIST_OF_PERCENTAGES": "List of percentages of text replaced with homoglyphs in the experiments.",
88
+ "MAX_NUM_OF_EXAMPLES_PER_LANG": "Maximum number of examples per language included in the analysis.",
89
+ "NUMBER_OF_TIMES_TO_RUN_PROFILING": "Number of times each alarm is run for profiling (timing) purposes.",
90
+ "NUMBER_OF_TEXTS_TO_PROFILE": "Number of texts used for profiling the alarms.",
91
+ "LIMIT_TEXTS_MAX_CHARACTERS": "Maximum number of characters per text sample.",
92
+ "LANGS_TO_USE": "Languages included in the experiments (ISO codes).",
93
+ "ALARM_TYPES_CONFIGURED": "Configured alarm types (methods for detecting homoglyph attacks).",
94
+ }
95
+
96
+ # Try to get alarm type display names from parameters if available
97
+ alarm_type_display = None
98
+ for k in params:
99
+ if k.upper() == "ALARM_TYPES_CONFIGURED" and isinstance(params[k], dict):
100
+ alarm_type_display = params[k]
101
+ break
102
+ if k.upper() == "ATTACK_TYPES_CONFIGURED" and isinstance(params[k], dict):
103
+ alarm_type_display = params[k]
104
+ break
105
+
106
+ def get_alarm_display_name(alarm_type):
107
+ if alarm_type_display and alarm_type in alarm_type_display:
108
+ return alarm_type_display[alarm_type]
109
+ return alarm_type.replace("_", " ").capitalize()
110
+
111
+ def prettify_param_name(name):
112
+ # Replace underscores with spaces, capitalize, and handle ALL_CAPS
113
+ name = re.sub(r"_+", " ", name)
114
+ name = name.strip().capitalize()
115
+ # If all uppercase, just capitalize first letter
116
+ if name.isupper():
117
+ name = name.capitalize()
118
+ return name
119
+
120
+ for k, v in params.items():
121
+ desc = param_descriptions.get(k, None)
122
+ if desc:
123
+ st.sidebar.write(f"**{prettify_param_name(k)}**: {v}")
124
+ st.sidebar.caption(desc)
125
+ else:
126
+ st.sidebar.write(f"**{prettify_param_name(k)}**: {v}")
127
+
128
+ # Load results
129
+ results_csv = os.path.join(folder, "results.csv")
130
+ agg_csv = os.path.join(folder, "aggregates.csv")
131
+ if not os.path.exists(results_csv) or not os.path.exists(agg_csv):
132
+ st.error("results.csv or aggregates.csv not found in selected folder.")
133
+ return
134
+ df_results = load_csv(results_csv)
135
+ df_agg = load_csv(agg_csv)
136
+
137
+ # Defensive: ensure 'lang' column exists and is not all NaN
138
+ if "lang" not in df_results.columns or df_results["lang"].isnull().all():
139
+ st.error(
140
+ "No language information found in results.csv. Please check your experiment output."
141
+ )
142
+ return
143
+
144
+ # Map ISO language codes to human names (fix KeyError)
145
+ if "lang_name" not in df_results.columns:
146
+ df_results["lang_name"] = df_results["lang"].apply(iso_to_name)
147
+ if "lang" in df_agg.columns and "lang_name" not in df_agg.columns:
148
+ df_agg["lang_name"] = df_agg["lang"].apply(iso_to_name)
149
+
150
+ # Load profiling results if available
151
+ profiling_csv = os.path.join(folder, "profiling.csv")
152
+ df_profiling = None
153
+ if os.path.exists(profiling_csv):
154
+ df_profiling = load_csv(profiling_csv)
155
+
156
+ tab1, tab2, tab3 = st.tabs(
157
+ ["Language-centric view", "Alarm-centric view", "Profiling results"]
158
+ )
159
+
160
+ # --- Tab 1: Language-centric view ---
161
+ with tab1:
162
+ st.header(
163
+ "Language-centric: Compare alarms and percentages for a given language"
164
+ )
165
+ language_names = df_results["lang_name"].unique().tolist()
166
+ lang_name = st.selectbox("Language:", language_names, key="lang_tab2")
167
+ lang = None
168
+ for code in df_results["lang"].unique():
169
+ if iso_to_name(code) == lang_name:
170
+ lang = code
171
+ break
172
+ filtered = df_results[df_results["lang"] == lang]
173
+ st.subheader(f"AUC by Alarm and Percentage for {lang_name}")
174
+ # Show human-friendly alarm names in the table
175
+ filtered_disp = filtered.copy()
176
+ filtered_disp["alarm_display"] = filtered_disp["alarm_type"].apply(
177
+ get_alarm_display_name
178
+ )
179
+ st.dataframe(
180
+ filtered_disp[["alarm_display", "percentage", "auc"]]
181
+ .rename(columns={"alarm_display": "Alarm type"})
182
+ .sort_values(["Alarm type", "percentage"])
183
+ )
184
+ # Plot
185
+ fig, ax = plt.subplots()
186
+ for alarm in filtered["alarm_type"].unique():
187
+ sub = filtered[filtered["alarm_type"] == alarm]
188
+ ax.plot(
189
+ sub["percentage"],
190
+ sub["auc"],
191
+ marker="o",
192
+ label=get_alarm_display_name(alarm),
193
+ )
194
+ ax.set_xlabel("Percentage of text replaced")
195
+ ax.set_ylabel("AUC (Area Under Curve)")
196
+ ax.set_title(f"AUC by Alarm for {lang_name}")
197
+ ax.legend(title="Alarm type")
198
+ st.pyplot(fig)
199
+
200
+ # --- Tab 2: Alarm-centric view ---
201
+ with tab2:
202
+ st.header("Alarm-centric: Compare languages for a given alarm and percentage")
203
+ alarm_types = df_results["alarm_type"].unique().tolist()
204
+ alarm = st.selectbox(
205
+ "Alarm type:",
206
+ alarm_types,
207
+ key="alarm_tab1",
208
+ help="Select the alarm (detection method) to analyze.",
209
+ format_func=get_alarm_display_name,
210
+ )
211
+ percentages = sorted(df_results["percentage"].unique())
212
+ perc = st.selectbox(
213
+ "Percentage:",
214
+ percentages,
215
+ key="perc_tab1",
216
+ help="Select the percentage of text replaced with homoglyphs.",
217
+ )
218
+ filtered = df_results[
219
+ (df_results["alarm_type"] == alarm) & (df_results["percentage"] == perc)
220
+ ]
221
+ st.subheader(f"AUC by Language for {get_alarm_display_name(alarm)} at {perc}")
222
+ st.dataframe(
223
+ filtered[["lang_name", "auc"]]
224
+ .sort_values("auc", ascending=False)
225
+ .reset_index(drop=True)
226
+ )
227
+ st.info(
228
+ f"As there are {len(df_results['lang'].unique())} languages, we can't show all of them in a chart. "
229
+ "Please use the Language-centric tab to explore individual languages."
230
+ )
231
+ # Chart removed for clarity due to too many languages
232
+
233
+ # --- Tab 3: Profiling results ---
234
+ with tab3:
235
+ st.header("Profiling Results: Alarm Execution Time and Efficiency")
236
+ if df_profiling is not None:
237
+ st.dataframe(df_profiling)
238
+ st.markdown(
239
+ """
240
+ - **alarm**: The alarm type (method) being profiled.
241
+ - **total_time**: Total time taken for all runs (seconds).
242
+ - **number_of_runs**: Number of times the profiling was repeated.
243
+ - **number_of_texts**: Number of texts used in each profiling run.
244
+ - **time_per_run**: Average time per profiling run (seconds).
245
+ """
246
+ )
247
+ # Optional: bar chart of time per run
248
+ fig, ax = plt.subplots()
249
+ ax.bar(df_profiling["alarm"], df_profiling["time_per_run"], color="#F600FF")
250
+ ax.set_xlabel("Alarm type")
251
+ ax.set_ylabel("Time per run (s)")
252
+ ax.set_title("Average Time per Profiling Run by Alarm Type")
253
+ st.pyplot(fig)
254
+ else:
255
+ st.info("No profiling results found for this run.")
256
+
257
+
258
+ if __name__ == "__main__":
259
+ main()