Add model positions in the ranking
Browse files- app.py +35 -9
- src/display/utils.py +2 -0
- src/leaderboard/read_evals.py +5 -2
app.py
CHANGED
@@ -108,10 +108,23 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
|
|
108 |
if dataframe is None or dataframe.empty:
|
109 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
field_list = fields(AutoEvalColumn)
|
112 |
|
113 |
return Leaderboard(
|
114 |
-
value=
|
115 |
datatype=[c.type for c in field_list],
|
116 |
#select_columns=SelectColumns(
|
117 |
# default_selection=default_selection or [c.name for c in field_list if c.displayed_by_default],
|
@@ -144,6 +157,18 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
|
|
144 |
|
145 |
sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
pd.set_option('display.max_colwidth', None)
|
148 |
#print("========================", dataframe['Model'])
|
149 |
|
@@ -153,7 +178,8 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
|
|
153 |
|
154 |
return Leaderboard(
|
155 |
value=sorted_dataframe,
|
156 |
-
datatype=[c.type for c in field_list],
|
|
|
157 |
#select_columns=SelectColumns(
|
158 |
# default_selection=default_selection or [c.name for c in field_list if c.displayed_by_default],
|
159 |
# cant_deselect=[c.name for c in field_list if c.never_hidden],
|
@@ -211,7 +237,7 @@ download_snapshot(RESULTS_REPO, EVAL_RESULTS_PATH)
|
|
211 |
# Load leaderboard data
|
212 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
213 |
finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
214 |
-
print(LEADERBOARD_DF.columns.tolist())
|
215 |
|
216 |
# Prepare the main interface
|
217 |
demo = gr.Blocks(css=custom_css)
|
@@ -242,8 +268,8 @@ with demo:
|
|
242 |
|
243 |
leaderboard = init_leaderboard(
|
244 |
LEADERBOARD_DF,
|
245 |
-
default_selection=['FS', 'Model', "Avg. Combined Performance β¬οΈ", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
|
246 |
-
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['FS', 'Model', "Avg. Combined Performance β¬οΈ", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
|
247 |
)
|
248 |
|
249 |
with gr.TabItem("π Charts"):
|
@@ -269,8 +295,8 @@ with demo:
|
|
269 |
|
270 |
leaderboard = update_task_leaderboard(
|
271 |
LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average", f"{task} Prompt Std": "Prompt Std", f"{task} Best Prompt": "Best Prompt", f"{task} Best Prompt Id": "Best Prompt Id", task: "Combined Performance"}),
|
272 |
-
default_selection=['FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
|
273 |
-
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id']]
|
274 |
)
|
275 |
|
276 |
# About tab
|
@@ -289,10 +315,10 @@ with demo:
|
|
289 |
f"{task} Best Prompt": "Best Prompt",
|
290 |
f"{task} Best Prompt Id": "Best Prompt Id",
|
291 |
task: "Combined Performance"}),
|
292 |
-
default_selection=['FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt',
|
293 |
'Best Prompt Id'],
|
294 |
hidden_columns=[col for col in LEADERBOARD_DF.columns if
|
295 |
-
col not in ['FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std',
|
296 |
'Best Prompt', 'Best Prompt Id']]
|
297 |
)
|
298 |
|
|
|
108 |
if dataframe is None or dataframe.empty:
|
109 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
110 |
|
111 |
+
sorted_dataframe = dataframe.sort_values(by="Avg. Combined Performance β¬οΈ", ascending=False)
|
112 |
+
|
113 |
+
sorted_dataframe = sorted_dataframe.reset_index(drop=True)
|
114 |
+
sorted_dataframe["rank"] = sorted_dataframe.index + 1
|
115 |
+
|
116 |
+
# aggiungi la corona accanto al nome del modello se il rank Γ¨ 1
|
117 |
+
sorted_dataframe["Model"] = sorted_dataframe.apply(
|
118 |
+
lambda row: f"{row['Model']} π₯" if row["rank"] == 1 else
|
119 |
+
(f"{row['Model']} π₯" if row["rank"] == 2 else
|
120 |
+
(f"{row['Model']} π₯" if row["rank"] == 3 else row["Model"])),
|
121 |
+
axis=1
|
122 |
+
)
|
123 |
+
|
124 |
field_list = fields(AutoEvalColumn)
|
125 |
|
126 |
return Leaderboard(
|
127 |
+
value=sorted_dataframe,
|
128 |
datatype=[c.type for c in field_list],
|
129 |
#select_columns=SelectColumns(
|
130 |
# default_selection=default_selection or [c.name for c in field_list if c.displayed_by_default],
|
|
|
157 |
|
158 |
sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
|
159 |
|
160 |
+
# aggiungo la colonna rank in base alla posizione
|
161 |
+
sorted_dataframe = sorted_dataframe.reset_index(drop=True)
|
162 |
+
sorted_dataframe["rank"] = sorted_dataframe.index + 1
|
163 |
+
|
164 |
+
# aggiungi la corona accanto al nome del modello se il rank Γ¨ 1
|
165 |
+
sorted_dataframe["Model"] = sorted_dataframe.apply(
|
166 |
+
lambda row: f"{row['Model']} π₯" if row["rank"] == 1 else
|
167 |
+
(f"{row['Model']} π₯" if row["rank"] == 2 else
|
168 |
+
(f"{row['Model']} π₯" if row["rank"] == 3 else row["Model"])),
|
169 |
+
axis=1
|
170 |
+
)
|
171 |
+
|
172 |
pd.set_option('display.max_colwidth', None)
|
173 |
#print("========================", dataframe['Model'])
|
174 |
|
|
|
178 |
|
179 |
return Leaderboard(
|
180 |
value=sorted_dataframe,
|
181 |
+
#datatype=[c.type for c in field_list],
|
182 |
+
datatype=[c.type for c in field_list] + [int],
|
183 |
#select_columns=SelectColumns(
|
184 |
# default_selection=default_selection or [c.name for c in field_list if c.displayed_by_default],
|
185 |
# cant_deselect=[c.name for c in field_list if c.never_hidden],
|
|
|
237 |
# Load leaderboard data
|
238 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
239 |
finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
240 |
+
#print(LEADERBOARD_DF.columns.tolist())
|
241 |
|
242 |
# Prepare the main interface
|
243 |
demo = gr.Blocks(css=custom_css)
|
|
|
268 |
|
269 |
leaderboard = init_leaderboard(
|
270 |
LEADERBOARD_DF,
|
271 |
+
default_selection=['rank', 'FS', 'Model', "Avg. Combined Performance β¬οΈ", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
|
272 |
+
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['rank', 'FS', 'Model', "Avg. Combined Performance β¬οΈ", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
|
273 |
)
|
274 |
|
275 |
with gr.TabItem("π Charts"):
|
|
|
295 |
|
296 |
leaderboard = update_task_leaderboard(
|
297 |
LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average", f"{task} Prompt Std": "Prompt Std", f"{task} Best Prompt": "Best Prompt", f"{task} Best Prompt Id": "Best Prompt Id", task: "Combined Performance"}),
|
298 |
+
default_selection=['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
|
299 |
+
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id']]
|
300 |
)
|
301 |
|
302 |
# About tab
|
|
|
315 |
f"{task} Best Prompt": "Best Prompt",
|
316 |
f"{task} Best Prompt Id": "Best Prompt Id",
|
317 |
task: "Combined Performance"}),
|
318 |
+
default_selection=['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt',
|
319 |
'Best Prompt Id'],
|
320 |
hidden_columns=[col for col in LEADERBOARD_DF.columns if
|
321 |
+
col not in ['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std',
|
322 |
'Best Prompt', 'Best Prompt Id']]
|
323 |
)
|
324 |
|
src/display/utils.py
CHANGED
@@ -25,6 +25,8 @@ auto_eval_column_dict = []
|
|
25 |
# Init
|
26 |
#auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
27 |
|
|
|
|
|
28 |
auto_eval_column_dict.append(["fewshot_symbol", ColumnContent, ColumnContent("FS", "str", True, never_hidden=True)])
|
29 |
auto_eval_column_dict.append(["is_5fewshot", ColumnContent, ColumnContent("IS_FS", "bool", True)])
|
30 |
|
|
|
25 |
# Init
|
26 |
#auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
27 |
|
28 |
+
auto_eval_column_dict.append(["rank", ColumnContent, ColumnContent("rank", "str", True, never_hidden=True)])
|
29 |
+
|
30 |
auto_eval_column_dict.append(["fewshot_symbol", ColumnContent, ColumnContent("FS", "str", True, never_hidden=True)])
|
31 |
auto_eval_column_dict.append(["is_5fewshot", ColumnContent, ColumnContent("IS_FS", "bool", True)])
|
32 |
|
src/leaderboard/read_evals.py
CHANGED
@@ -2,7 +2,7 @@ import glob
|
|
2 |
import json
|
3 |
import math
|
4 |
import os
|
5 |
-
from dataclasses import dataclass
|
6 |
|
7 |
import dateutil
|
8 |
import numpy as np
|
@@ -34,6 +34,7 @@ class EvalResult:
|
|
34 |
num_params: int = 0
|
35 |
date: str = "" # submission date of request file
|
36 |
still_on_hub: bool = False
|
|
|
37 |
|
38 |
@classmethod
|
39 |
def init_from_json_file(self, json_filepath):
|
@@ -117,7 +118,8 @@ class EvalResult:
|
|
117 |
revision= config.get("model_sha", ""),
|
118 |
still_on_hub=still_on_hub,
|
119 |
architecture=architecture,
|
120 |
-
num_params=num_params
|
|
|
121 |
)
|
122 |
|
123 |
'''
|
@@ -164,6 +166,7 @@ class EvalResult:
|
|
164 |
AutoEvalColumn.likes.name: self.likes,
|
165 |
AutoEvalColumn.params.name: self.num_params,
|
166 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
|
|
167 |
}
|
168 |
|
169 |
for task in Tasks:
|
|
|
2 |
import json
|
3 |
import math
|
4 |
import os
|
5 |
+
from dataclasses import dataclass, field
|
6 |
|
7 |
import dateutil
|
8 |
import numpy as np
|
|
|
34 |
num_params: int = 0
|
35 |
date: str = "" # submission date of request file
|
36 |
still_on_hub: bool = False
|
37 |
+
rank: int = field(default=0) # π nuovo campo con default = 0
|
38 |
|
39 |
@classmethod
|
40 |
def init_from_json_file(self, json_filepath):
|
|
|
118 |
revision= config.get("model_sha", ""),
|
119 |
still_on_hub=still_on_hub,
|
120 |
architecture=architecture,
|
121 |
+
num_params=num_params,
|
122 |
+
rank = 0
|
123 |
)
|
124 |
|
125 |
'''
|
|
|
166 |
AutoEvalColumn.likes.name: self.likes,
|
167 |
AutoEvalColumn.params.name: self.num_params,
|
168 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
169 |
+
"rank": self.rank
|
170 |
}
|
171 |
|
172 |
for task in Tasks:
|