Add award icons for 5-shot and 0-shot models; shorten some table column names for clarity
Browse files- app.py +46 -12
- src/display/utils.py +1 -1
app.py
CHANGED
@@ -24,14 +24,14 @@ def line_chart(dataframe):
|
|
24 |
|
25 |
# Estrai valori x, y e labels per True e False
|
26 |
x_true = df_true['#Params (B)'].tolist()
|
27 |
-
y_true = df_true['Avg.
|
28 |
labels_true = [
|
29 |
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
30 |
for m in df_true['Model'].tolist()
|
31 |
]
|
32 |
|
33 |
x_false = df_false['#Params (B)'].tolist()
|
34 |
-
y_false = df_false['Avg.
|
35 |
labels_false = [
|
36 |
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
37 |
for m in df_false['Model'].tolist()
|
@@ -77,6 +77,7 @@ def line_chart(dataframe):
|
|
77 |
return fig
|
78 |
|
79 |
|
|
|
80 |
# Define task metadata (icons, names, descriptions)
|
81 |
TASK_METADATA_MULTIPLECHOICE = {
|
82 |
"TE": {"icon": "📊", "name": "Textual Entailment", "tooltip": ""},
|
@@ -108,18 +109,51 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
|
|
108 |
if dataframe is None or dataframe.empty:
|
109 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
110 |
|
111 |
-
sorted_dataframe = dataframe.sort_values(by="Avg.
|
112 |
|
113 |
sorted_dataframe = sorted_dataframe.reset_index(drop=True)
|
114 |
sorted_dataframe["rank"] = sorted_dataframe.index + 1
|
115 |
|
116 |
-
#
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
field_list = fields(AutoEvalColumn)
|
125 |
|
@@ -268,8 +302,8 @@ with demo:
|
|
268 |
|
269 |
leaderboard = init_leaderboard(
|
270 |
LEADERBOARD_DF,
|
271 |
-
default_selection=['rank', 'FS', 'Model', "Avg.
|
272 |
-
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['rank', 'FS', 'Model', "Avg.
|
273 |
)
|
274 |
|
275 |
with gr.TabItem("📈 Charts"):
|
|
|
24 |
|
25 |
# Estrai valori x, y e labels per True e False
|
26 |
x_true = df_true['#Params (B)'].tolist()
|
27 |
+
y_true = df_true['Avg. Comb. Perf. ⬆️'].tolist()
|
28 |
labels_true = [
|
29 |
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
30 |
for m in df_true['Model'].tolist()
|
31 |
]
|
32 |
|
33 |
x_false = df_false['#Params (B)'].tolist()
|
34 |
+
y_false = df_false['Avg. Comb. Perf. ⬆️'].tolist()
|
35 |
labels_false = [
|
36 |
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
37 |
for m in df_false['Model'].tolist()
|
|
|
77 |
return fig
|
78 |
|
79 |
|
80 |
+
|
81 |
# Define task metadata (icons, names, descriptions)
|
82 |
TASK_METADATA_MULTIPLECHOICE = {
|
83 |
"TE": {"icon": "📊", "name": "Textual Entailment", "tooltip": ""},
|
|
|
109 |
if dataframe is None or dataframe.empty:
|
110 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
111 |
|
112 |
+
sorted_dataframe = dataframe.sort_values(by="Avg. Comb. Perf. ⬆️", ascending=False)
|
113 |
|
114 |
sorted_dataframe = sorted_dataframe.reset_index(drop=True)
|
115 |
sorted_dataframe["rank"] = sorted_dataframe.index + 1
|
116 |
|
117 |
+
# Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
|
118 |
+
large_medal_fs_assigned = False
|
119 |
+
medium_medal_fs_assigned = False
|
120 |
+
small_medal_fs_assigned = False
|
121 |
+
|
122 |
+
large_medal_0shot_assigned = False
|
123 |
+
medium_medal_0shot_assigned = False
|
124 |
+
small_medal_0shot_assigned = False
|
125 |
+
|
126 |
+
# Lista temporanea per salvare i nuovi valori della colonna Model
|
127 |
+
new_model_column = []
|
128 |
+
|
129 |
+
for _, row in sorted_dataframe.iterrows():
|
130 |
+
if row['IS_FS']: # 5-Few-Shot
|
131 |
+
if row["#Params (B)"] > 30 and not large_medal_fs_assigned:
|
132 |
+
new_model_column.append(f"{row['Model']} 7️⃣0️⃣🅱️🏆")
|
133 |
+
large_medal_fs_assigned = True
|
134 |
+
elif 10 < row["#Params (B)"] <= 30 and not medium_medal_fs_assigned:
|
135 |
+
new_model_column.append(f"{row['Model']} 3️⃣0️⃣🅱️🏆")
|
136 |
+
medium_medal_fs_assigned = True
|
137 |
+
elif row["#Params (B)"] <= 10 and not small_medal_fs_assigned:
|
138 |
+
new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🏆")
|
139 |
+
small_medal_fs_assigned = True
|
140 |
+
else:
|
141 |
+
new_model_column.append(row["Model"])
|
142 |
+
else: # 0-Shot
|
143 |
+
if row["#Params (B)"] > 30 and not large_medal_0shot_assigned:
|
144 |
+
new_model_column.append(f"{row['Model']} 7️⃣0️⃣🅱️🎖️")
|
145 |
+
large_medal_0shot_assigned = True
|
146 |
+
elif 10 < row["#Params (B)"] <= 30 and not medium_medal_0shot_assigned:
|
147 |
+
new_model_column.append(f"{row['Model']} 3️⃣0️⃣🅱️🎖️")
|
148 |
+
medium_medal_0shot_assigned = True
|
149 |
+
elif row["#Params (B)"] <= 10 and not small_medal_0shot_assigned:
|
150 |
+
new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🎖️")
|
151 |
+
small_medal_0shot_assigned = True
|
152 |
+
else:
|
153 |
+
new_model_column.append(row["Model"])
|
154 |
+
|
155 |
+
# Aggiorna la colonna Model
|
156 |
+
sorted_dataframe["Model"] = new_model_column
|
157 |
|
158 |
field_list = fields(AutoEvalColumn)
|
159 |
|
|
|
302 |
|
303 |
leaderboard = init_leaderboard(
|
304 |
LEADERBOARD_DF,
|
305 |
+
default_selection=['rank', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
|
306 |
+
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['rank', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
|
307 |
)
|
308 |
|
309 |
with gr.TabItem("📈 Charts"):
|
src/display/utils.py
CHANGED
@@ -34,7 +34,7 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
|
|
34 |
#auto_eval_column_dict.append(["fewshot", ColumnContent, ColumnContent("Few-Shot", "str", True)])
|
35 |
|
36 |
#Scores
|
37 |
-
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg.
|
38 |
for task in Tasks:
|
39 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
40 |
|
|
|
34 |
#auto_eval_column_dict.append(["fewshot", ColumnContent, ColumnContent("Few-Shot", "str", True)])
|
35 |
|
36 |
#Scores
|
37 |
+
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg. Comb. Perf. ⬆️", "number", True)])
|
38 |
for task in Tasks:
|
39 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
40 |
|