rzanoli commited on
Commit
13fe545
Β·
1 Parent(s): af6e747

Add model positions in the ranking

Browse files
Files changed (3) hide show
  1. app.py +35 -9
  2. src/display/utils.py +2 -0
  3. src/leaderboard/read_evals.py +5 -2
app.py CHANGED
@@ -108,10 +108,23 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
108
  if dataframe is None or dataframe.empty:
109
  raise ValueError("Leaderboard DataFrame is empty or None.")
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  field_list = fields(AutoEvalColumn)
112
 
113
  return Leaderboard(
114
- value=dataframe,
115
  datatype=[c.type for c in field_list],
116
  #select_columns=SelectColumns(
117
  # default_selection=default_selection or [c.name for c in field_list if c.displayed_by_default],
@@ -144,6 +157,18 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
144
 
145
  sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
146
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  pd.set_option('display.max_colwidth', None)
148
  #print("========================", dataframe['Model'])
149
 
@@ -153,7 +178,8 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
153
 
154
  return Leaderboard(
155
  value=sorted_dataframe,
156
- datatype=[c.type for c in field_list],
 
157
  #select_columns=SelectColumns(
158
  # default_selection=default_selection or [c.name for c in field_list if c.displayed_by_default],
159
  # cant_deselect=[c.name for c in field_list if c.never_hidden],
@@ -211,7 +237,7 @@ download_snapshot(RESULTS_REPO, EVAL_RESULTS_PATH)
211
  # Load leaderboard data
212
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
213
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
214
- print(LEADERBOARD_DF.columns.tolist())
215
 
216
  # Prepare the main interface
217
  demo = gr.Blocks(css=custom_css)
@@ -242,8 +268,8 @@ with demo:
242
 
243
  leaderboard = init_leaderboard(
244
  LEADERBOARD_DF,
245
- default_selection=['FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
246
- hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
247
  )
248
 
249
  with gr.TabItem("πŸ“ˆ Charts"):
@@ -269,8 +295,8 @@ with demo:
269
 
270
  leaderboard = update_task_leaderboard(
271
  LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average", f"{task} Prompt Std": "Prompt Std", f"{task} Best Prompt": "Best Prompt", f"{task} Best Prompt Id": "Best Prompt Id", task: "Combined Performance"}),
272
- default_selection=['FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
273
- hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id']]
274
  )
275
 
276
  # About tab
@@ -289,10 +315,10 @@ with demo:
289
  f"{task} Best Prompt": "Best Prompt",
290
  f"{task} Best Prompt Id": "Best Prompt Id",
291
  task: "Combined Performance"}),
292
- default_selection=['FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt',
293
  'Best Prompt Id'],
294
  hidden_columns=[col for col in LEADERBOARD_DF.columns if
295
- col not in ['FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std',
296
  'Best Prompt', 'Best Prompt Id']]
297
  )
298
 
 
108
  if dataframe is None or dataframe.empty:
109
  raise ValueError("Leaderboard DataFrame is empty or None.")
110
 
111
+ sorted_dataframe = dataframe.sort_values(by="Avg. Combined Performance ⬆️", ascending=False)
112
+
113
+ sorted_dataframe = sorted_dataframe.reset_index(drop=True)
114
+ sorted_dataframe["rank"] = sorted_dataframe.index + 1
115
+
116
+ # aggiungi la corona accanto al nome del modello se il rank Γ¨ 1
117
+ sorted_dataframe["Model"] = sorted_dataframe.apply(
118
+ lambda row: f"{row['Model']} πŸ₯‡" if row["rank"] == 1 else
119
+ (f"{row['Model']} πŸ₯ˆ" if row["rank"] == 2 else
120
+ (f"{row['Model']} πŸ₯‰" if row["rank"] == 3 else row["Model"])),
121
+ axis=1
122
+ )
123
+
124
  field_list = fields(AutoEvalColumn)
125
 
126
  return Leaderboard(
127
+ value=sorted_dataframe,
128
  datatype=[c.type for c in field_list],
129
  #select_columns=SelectColumns(
130
  # default_selection=default_selection or [c.name for c in field_list if c.displayed_by_default],
 
157
 
158
  sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
159
 
160
+ # aggiungo la colonna rank in base alla posizione
161
+ sorted_dataframe = sorted_dataframe.reset_index(drop=True)
162
+ sorted_dataframe["rank"] = sorted_dataframe.index + 1
163
+
164
+ # aggiungi la corona accanto al nome del modello se il rank Γ¨ 1
165
+ sorted_dataframe["Model"] = sorted_dataframe.apply(
166
+ lambda row: f"{row['Model']} πŸ₯‡" if row["rank"] == 1 else
167
+ (f"{row['Model']} πŸ₯ˆ" if row["rank"] == 2 else
168
+ (f"{row['Model']} πŸ₯‰" if row["rank"] == 3 else row["Model"])),
169
+ axis=1
170
+ )
171
+
172
  pd.set_option('display.max_colwidth', None)
173
  #print("========================", dataframe['Model'])
174
 
 
178
 
179
  return Leaderboard(
180
  value=sorted_dataframe,
181
+ #datatype=[c.type for c in field_list],
182
+ datatype=[c.type for c in field_list] + [int],
183
  #select_columns=SelectColumns(
184
  # default_selection=default_selection or [c.name for c in field_list if c.displayed_by_default],
185
  # cant_deselect=[c.name for c in field_list if c.never_hidden],
 
237
  # Load leaderboard data
238
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
239
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
240
+ #print(LEADERBOARD_DF.columns.tolist())
241
 
242
  # Prepare the main interface
243
  demo = gr.Blocks(css=custom_css)
 
268
 
269
  leaderboard = init_leaderboard(
270
  LEADERBOARD_DF,
271
+ default_selection=['rank', 'FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
272
+ hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['rank', 'FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
273
  )
274
 
275
  with gr.TabItem("πŸ“ˆ Charts"):
 
295
 
296
  leaderboard = update_task_leaderboard(
297
  LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average", f"{task} Prompt Std": "Prompt Std", f"{task} Best Prompt": "Best Prompt", f"{task} Best Prompt Id": "Best Prompt Id", task: "Combined Performance"}),
298
+ default_selection=['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
299
+ hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id']]
300
  )
301
 
302
  # About tab
 
315
  f"{task} Best Prompt": "Best Prompt",
316
  f"{task} Best Prompt Id": "Best Prompt Id",
317
  task: "Combined Performance"}),
318
+ default_selection=['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt',
319
  'Best Prompt Id'],
320
  hidden_columns=[col for col in LEADERBOARD_DF.columns if
321
+ col not in ['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std',
322
  'Best Prompt', 'Best Prompt Id']]
323
  )
324
 
src/display/utils.py CHANGED
@@ -25,6 +25,8 @@ auto_eval_column_dict = []
25
  # Init
26
  #auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
 
 
 
28
  auto_eval_column_dict.append(["fewshot_symbol", ColumnContent, ColumnContent("FS", "str", True, never_hidden=True)])
29
  auto_eval_column_dict.append(["is_5fewshot", ColumnContent, ColumnContent("IS_FS", "bool", True)])
30
 
 
25
  # Init
26
  #auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
 
28
+ auto_eval_column_dict.append(["rank", ColumnContent, ColumnContent("rank", "str", True, never_hidden=True)])
29
+
30
  auto_eval_column_dict.append(["fewshot_symbol", ColumnContent, ColumnContent("FS", "str", True, never_hidden=True)])
31
  auto_eval_column_dict.append(["is_5fewshot", ColumnContent, ColumnContent("IS_FS", "bool", True)])
32
 
src/leaderboard/read_evals.py CHANGED
@@ -2,7 +2,7 @@ import glob
2
  import json
3
  import math
4
  import os
5
- from dataclasses import dataclass
6
 
7
  import dateutil
8
  import numpy as np
@@ -34,6 +34,7 @@ class EvalResult:
34
  num_params: int = 0
35
  date: str = "" # submission date of request file
36
  still_on_hub: bool = False
 
37
 
38
  @classmethod
39
  def init_from_json_file(self, json_filepath):
@@ -117,7 +118,8 @@ class EvalResult:
117
  revision= config.get("model_sha", ""),
118
  still_on_hub=still_on_hub,
119
  architecture=architecture,
120
- num_params=num_params
 
121
  )
122
 
123
  '''
@@ -164,6 +166,7 @@ class EvalResult:
164
  AutoEvalColumn.likes.name: self.likes,
165
  AutoEvalColumn.params.name: self.num_params,
166
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
 
167
  }
168
 
169
  for task in Tasks:
 
2
  import json
3
  import math
4
  import os
5
+ from dataclasses import dataclass, field
6
 
7
  import dateutil
8
  import numpy as np
 
34
  num_params: int = 0
35
  date: str = "" # submission date of request file
36
  still_on_hub: bool = False
37
+ rank: int = field(default=0) # πŸ‘ˆ nuovo campo con default = 0
38
 
39
  @classmethod
40
  def init_from_json_file(self, json_filepath):
 
118
  revision= config.get("model_sha", ""),
119
  still_on_hub=still_on_hub,
120
  architecture=architecture,
121
+ num_params=num_params,
122
+ rank = 0
123
  )
124
 
125
  '''
 
166
  AutoEvalColumn.likes.name: self.likes,
167
  AutoEvalColumn.params.name: self.num_params,
168
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
169
+ "rank": self.rank
170
  }
171
 
172
  for task in Tasks: