File size: 9,852 Bytes
06bb199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# Streamlit app to visualize homoglyphs alarm experiment results
# This app lets users interactively explore experiment results stored in timestamped results folders
# It loads the latest results by default, but allows selection of other runs

import streamlit as st
import os
import glob
import pandas as pd
import yaml
import matplotlib.pyplot as plt
from matplotlib import font_manager
import pycountry
import re

# Set Streamlit theme and custom font via config.toml (no manual CSS needed)
st.set_page_config(
    page_title="Homoglyphs Alarm Results", page_icon="📊", layout="centered"
)

# Set matplotlib font and color palette
font_path = "IBMPlexSans-Regular.ttf"
font_manager.fontManager.addfont(font_path)
plt.rcParams["font.family"] = "IBM Plex Sans"
plt.rcParams["axes.prop_cycle"] = plt.cycler(
    color=["#F600FF", "#FF0000", "#00FBFF", "#00AAEC", "#0034A3"]
)


# Helper to get all result folders sorted by timestamp (descending)
def get_result_folders(base_dir="results"):
    folders = [
        os.path.join(base_dir, d)
        for d in os.listdir(base_dir)
        if os.path.isdir(os.path.join(base_dir, d))
    ]
    folders = sorted(folders, reverse=True)
    return folders


# Helper to load YAML parameters
def load_parameters(yaml_path):
    with open(yaml_path, "r") as f:
        return yaml.safe_load(f)


# Helper to load CSVs
def load_csv(csv_path):
    # The first column is the row index, so set index_col=0 and drop it
    return pd.read_csv(csv_path, index_col=0)


# Map ISO language codes to human names
def iso_to_name(lang_code):
    try:
        return pycountry.languages.get(alpha_2=lang_code).name
    except Exception:
        if lang_code == "iw":
            return "Hebrew"
        if lang_code == "language_agnostic":
            return "Language Agnostic"
        return lang_code


# Main app logic
def main():
    st.title("Homoglyphs Alarm Experiment Results Viewer")

    # Find all result folders
    result_folders = get_result_folders()
    if not result_folders:
        st.error("No results found. Please run experiments first.")
        return

    # Always use the latest results folder
    folder = result_folders[0]

    # Load parameters
    param_path = os.path.join(folder, "parameters.yaml")
    if not os.path.exists(param_path):
        st.error(f"parameters.yaml not found in {folder}")
        return
    params = load_parameters(param_path)
    st.sidebar.header("Run Parameters")

    # Parameter descriptions for user-friendly sidebar
    param_descriptions = {
        "LIST_OF_PERCENTAGES": "List of percentages of text replaced with homoglyphs in the experiments.",
        "MAX_NUM_OF_EXAMPLES_PER_LANG": "Maximum number of examples per language included in the analysis.",
        "NUMBER_OF_TIMES_TO_RUN_PROFILING": "Number of times each alarm is run for profiling (timing) purposes.",
        "NUMBER_OF_TEXTS_TO_PROFILE": "Number of texts used for profiling the alarms.",
        "LIMIT_TEXTS_MAX_CHARACTERS": "Maximum number of characters per text sample.",
        "LANGS_TO_USE": "Languages included in the experiments (ISO codes).",
        "ALARM_TYPES_CONFIGURED": "Configured alarm types (methods for detecting homoglyph attacks).",
    }

    # Try to get alarm type display names from parameters if available
    alarm_type_display = None
    for k in params:
        if k.upper() == "ALARM_TYPES_CONFIGURED" and isinstance(params[k], dict):
            alarm_type_display = params[k]
            break
        if k.upper() == "ATTACK_TYPES_CONFIGURED" and isinstance(params[k], dict):
            alarm_type_display = params[k]
            break

    def get_alarm_display_name(alarm_type):
        if alarm_type_display and alarm_type in alarm_type_display:
            return alarm_type_display[alarm_type]
        return alarm_type.replace("_", " ").capitalize()

    def prettify_param_name(name):
        # Replace underscores with spaces, capitalize, and handle ALL_CAPS
        name = re.sub(r"_+", " ", name)
        name = name.strip().capitalize()
        # If all uppercase, just capitalize first letter
        if name.isupper():
            name = name.capitalize()
        return name

    for k, v in params.items():
        desc = param_descriptions.get(k, None)
        if desc:
            st.sidebar.write(f"**{prettify_param_name(k)}**: {v}")
            st.sidebar.caption(desc)
        else:
            st.sidebar.write(f"**{prettify_param_name(k)}**: {v}")

    # Load results
    results_csv = os.path.join(folder, "results.csv")
    agg_csv = os.path.join(folder, "aggregates.csv")
    if not os.path.exists(results_csv) or not os.path.exists(agg_csv):
        st.error("results.csv or aggregates.csv not found in selected folder.")
        return
    df_results = load_csv(results_csv)
    df_agg = load_csv(agg_csv)

    # Defensive: ensure 'lang' column exists and is not all NaN
    if "lang" not in df_results.columns or df_results["lang"].isnull().all():
        st.error(
            "No language information found in results.csv. Please check your experiment output."
        )
        return

    # Map ISO language codes to human names (fix KeyError)
    if "lang_name" not in df_results.columns:
        df_results["lang_name"] = df_results["lang"].apply(iso_to_name)
    if "lang" in df_agg.columns and "lang_name" not in df_agg.columns:
        df_agg["lang_name"] = df_agg["lang"].apply(iso_to_name)

    # Load profiling results if available
    profiling_csv = os.path.join(folder, "profiling.csv")
    df_profiling = None
    if os.path.exists(profiling_csv):
        df_profiling = load_csv(profiling_csv)

    tab1, tab2, tab3 = st.tabs(
        ["Language-centric view", "Alarm-centric view", "Profiling results"]
    )

    # --- Tab 1: Language-centric view ---
    with tab1:
        st.header(
            "Language-centric: Compare alarms and percentages for a given language"
        )
        language_names = df_results["lang_name"].unique().tolist()
        lang_name = st.selectbox("Language:", language_names, key="lang_tab2")
        lang = None
        for code in df_results["lang"].unique():
            if iso_to_name(code) == lang_name:
                lang = code
                break
        filtered = df_results[df_results["lang"] == lang]
        st.subheader(f"AUC by Alarm and Percentage for {lang_name}")
        # Show human-friendly alarm names in the table
        filtered_disp = filtered.copy()
        filtered_disp["alarm_display"] = filtered_disp["alarm_type"].apply(
            get_alarm_display_name
        )
        st.dataframe(
            filtered_disp[["alarm_display", "percentage", "auc"]]
            .rename(columns={"alarm_display": "Alarm type"})
            .sort_values(["Alarm type", "percentage"])
        )
        # Plot
        fig, ax = plt.subplots()
        for alarm in filtered["alarm_type"].unique():
            sub = filtered[filtered["alarm_type"] == alarm]
            ax.plot(
                sub["percentage"],
                sub["auc"],
                marker="o",
                label=get_alarm_display_name(alarm),
            )
        ax.set_xlabel("Percentage of text replaced")
        ax.set_ylabel("AUC (Area Under Curve)")
        ax.set_title(f"AUC by Alarm for {lang_name}")
        ax.legend(title="Alarm type")
        st.pyplot(fig)

    # --- Tab 2: Alarm-centric view ---
    with tab2:
        st.header("Alarm-centric: Compare languages for a given alarm and percentage")
        alarm_types = df_results["alarm_type"].unique().tolist()
        alarm = st.selectbox(
            "Alarm type:",
            alarm_types,
            key="alarm_tab1",
            help="Select the alarm (detection method) to analyze.",
            format_func=get_alarm_display_name,
        )
        percentages = sorted(df_results["percentage"].unique())
        perc = st.selectbox(
            "Percentage:",
            percentages,
            key="perc_tab1",
            help="Select the percentage of text replaced with homoglyphs.",
        )
        filtered = df_results[
            (df_results["alarm_type"] == alarm) & (df_results["percentage"] == perc)
        ]
        st.subheader(f"AUC by Language for {get_alarm_display_name(alarm)} at {perc}")
        st.dataframe(
            filtered[["lang_name", "auc"]]
            .sort_values("auc", ascending=False)
            .reset_index(drop=True)
        )
        st.info(
            f"As there are {len(df_results['lang'].unique())} languages, we can't show all of them in a chart. "
            "Please use the Language-centric tab to explore individual languages."
        )
        # Chart removed for clarity due to too many languages

    # --- Tab 3: Profiling results ---
    with tab3:
        st.header("Profiling Results: Alarm Execution Time and Efficiency")
        if df_profiling is not None:
            st.dataframe(df_profiling)
            st.markdown(
                """
            - **alarm**: The alarm type (method) being profiled.
            - **total_time**: Total time taken for all runs (seconds).
            - **number_of_runs**: Number of times the profiling was repeated.
            - **number_of_texts**: Number of texts used in each profiling run.
            - **time_per_run**: Average time per profiling run (seconds).
            """
            )
            # Optional: bar chart of time per run
            fig, ax = plt.subplots()
            ax.bar(df_profiling["alarm"], df_profiling["time_per_run"], color="#F600FF")
            ax.set_xlabel("Alarm type")
            ax.set_ylabel("Time per run (s)")
            ax.set_title("Average Time per Profiling Run by Alarm Type")
            st.pyplot(fig)
        else:
            st.info("No profiling results found for this run.")


if __name__ == "__main__":
    main()