huckiyang commited on
Commit
88d5920
·
1 Parent(s): 21e142e

[rank] adding rank

Browse files
Files changed (2) hide show
  1. app.py +28 -8
  2. src/about.py +1 -1
app.py CHANGED
@@ -38,17 +38,17 @@ for col_tuple in n_avg_cols_to_average:
38
  # Calculate average, handling cases where some N-avg columns might be missing
39
  existing_n_avg_cols = [col for col in n_avg_cols_to_average if col in LEADERBOARD_DF_ORIGINAL.columns]
40
  if existing_n_avg_cols:
41
- LEADERBOARD_DF_ORIGINAL[('Overall', 'Average N-avg')] = LEADERBOARD_DF_ORIGINAL[existing_n_avg_cols].mean(axis=1)
42
- LEADERBOARD_DF_ORIGINAL[('Overall', 'Rank')] = LEADERBOARD_DF_ORIGINAL[('Overall', 'Average N-avg')].rank(method='min', ascending=False).astype(int)
43
  else:
44
- LEADERBOARD_DF_ORIGINAL[('Overall', 'Average N-avg')] = np.nan
45
- LEADERBOARD_DF_ORIGINAL[('Overall', 'Rank')] = np.nan
46
 
47
 
48
  # Reorder columns to put Rank and Average N-avg first, then Model, then the rest
49
  model_col_tuple = ('Model', 'Model') # Original name of the model column
50
- rank_col_tuple = ('Overall', 'Rank')
51
- avg_navg_col_tuple = ('Overall', 'Average N-avg')
52
 
53
  new_col_order = []
54
  if rank_col_tuple in LEADERBOARD_DF_ORIGINAL.columns:
@@ -63,18 +63,38 @@ for col in LEADERBOARD_DF_ORIGINAL.columns:
63
  new_col_order.append(col)
64
  LEADERBOARD_DF_ORIGINAL = LEADERBOARD_DF_ORIGINAL[new_col_order]
65
 
 
 
 
66
 
67
  # Function to prepare DataFrame for display (format headers, ensure Model column)
68
  def format_leaderboard_df_for_display(df_orig):
69
  df_display = df_orig.copy()
70
  new_columns = []
71
  for col_tuple in df_display.columns:
72
- if col_tuple == ('Model', 'Model'):
 
 
 
 
73
  new_columns.append('Model')
74
  else:
75
  new_columns.append(f"{col_tuple[0]}\n{col_tuple[1]}")
76
  df_display.columns = new_columns
77
- return df_display
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  LEADERBOARD_DF_DISPLAY_INIT = format_leaderboard_df_for_display(LEADERBOARD_DF_ORIGINAL)
80
 
 
38
  # Calculate average, handling cases where some N-avg columns might be missing
39
  existing_n_avg_cols = [col for col in n_avg_cols_to_average if col in LEADERBOARD_DF_ORIGINAL.columns]
40
  if existing_n_avg_cols:
41
+ LEADERBOARD_DF_ORIGINAL[('Avg-', ' N-avg')] = LEADERBOARD_DF_ORIGINAL[existing_n_avg_cols].mean(axis=1)
42
+ LEADERBOARD_DF_ORIGINAL[('Avg-', 'Rank')] = LEADERBOARD_DF_ORIGINAL[('Avg-', ' N-avg')].rank(method='min', ascending=False).astype(int)
43
  else:
44
+ LEADERBOARD_DF_ORIGINAL[('Avg-', ' N-avg')] = np.nan
45
+ LEADERBOARD_DF_ORIGINAL[('Avg-', 'Rank')] = np.nan
46
 
47
 
48
  # Reorder columns to put Rank and Average N-avg first, then Model, then the rest
49
  model_col_tuple = ('Model', 'Model') # Original name of the model column
50
+ rank_col_tuple = ('Avg-', 'Rank')
51
+ avg_navg_col_tuple = ('Avg-', ' N-avg')
52
 
53
  new_col_order = []
54
  if rank_col_tuple in LEADERBOARD_DF_ORIGINAL.columns:
 
63
  new_col_order.append(col)
64
  LEADERBOARD_DF_ORIGINAL = LEADERBOARD_DF_ORIGINAL[new_col_order]
65
 
66
+ # Sort by Rank ascending
67
+ if rank_col_tuple in LEADERBOARD_DF_ORIGINAL.columns:
68
+ LEADERBOARD_DF_ORIGINAL = LEADERBOARD_DF_ORIGINAL.sort_values(by=rank_col_tuple, ascending=True)
69
 
70
  # Function to prepare DataFrame for display (format headers, ensure Model column)
71
  def format_leaderboard_df_for_display(df_orig):
72
  df_display = df_orig.copy()
73
  new_columns = []
74
  for col_tuple in df_display.columns:
75
+ if col_tuple == ('Avg-', 'Rank'):
76
+ new_columns.append('Overall Rank')
77
+ elif col_tuple == ('Avg-', ' N-avg'):
78
+ new_columns.append('Average N-avg')
79
+ elif col_tuple == ('Model', 'Model'):
80
  new_columns.append('Model')
81
  else:
82
  new_columns.append(f"{col_tuple[0]}\n{col_tuple[1]}")
83
  df_display.columns = new_columns
84
+
85
+ # Create a new DataFrame with the formatted column names for display
86
+ # and apply formatting to the 'Average N-avg' data if it exists
87
+ temp_formatted_df = pd.DataFrame(df_display.values, columns=new_columns, index=df_display.index)
88
+ if 'Average N-avg' in temp_formatted_df.columns:
89
+ # Ensure the column is numeric before formatting, in case it became object type
90
+ temp_formatted_df['Average N-avg'] = pd.to_numeric(temp_formatted_df['Average N-avg'], errors='coerce')
91
+ temp_formatted_df['Average N-avg'] = temp_formatted_df['Average N-avg'].map(lambda x: f"{x:.4f}" if pd.notnull(x) else '-')
92
+
93
+ # Convert the 'Overall Rank' to integer string to avoid '.0'
94
+ if 'Overall Rank' in temp_formatted_df.columns:
95
+ temp_formatted_df['Overall Rank'] = temp_formatted_df['Overall Rank'].map(lambda x: f"{int(x)}" if pd.notnull(x) else '-')
96
+
97
+ return temp_formatted_df
98
 
99
  LEADERBOARD_DF_DISPLAY_INIT = format_leaderboard_df_for_display(LEADERBOARD_DF_ORIGINAL)
100
 
src/about.py CHANGED
@@ -31,7 +31,7 @@ TITLE = """
31
  INTRODUCTION_TEXT = """
32
 
33
  We introduce **LOTUS**, a leaderboard for evaluating detailed captions, addressing three main gaps in existing evaluations: lack of **standardized** criteria, **bias-aware** assessments, and **user preference** considerations.
34
- LOTUS comprehensively evaluates various aspects, including caption quality (\eg, alignment, descriptiveness), risks (\eg, hallucination), and societal biases (\eg, gender bias) while enabling preference-oriented evaluations by tailoring criteria to diverse user preferences.
35
 
36
  """
37
 
 
31
  INTRODUCTION_TEXT = """
32
 
33
  We introduce **LOTUS**, a leaderboard for evaluating detailed captions, addressing three main gaps in existing evaluations: lack of **standardized** criteria, **bias-aware** assessments, and **user preference** considerations.
34
+ LOTUS comprehensively evaluates various aspects, including caption quality (e.g., alignment, descriptiveness), risks (e.g., hallucination), and societal biases (e.g., gender bias) while enabling preference-oriented evaluations by tailoring criteria to diverse user preferences.
35
 
36
  """
37