[rank] adding rank
Browse files- app.py +28 -8
- src/about.py +1 -1
app.py
CHANGED
@@ -38,17 +38,17 @@ for col_tuple in n_avg_cols_to_average:
|
|
38 |
# Calculate average, handling cases where some N-avg columns might be missing
|
39 |
existing_n_avg_cols = [col for col in n_avg_cols_to_average if col in LEADERBOARD_DF_ORIGINAL.columns]
|
40 |
if existing_n_avg_cols:
|
41 |
-
LEADERBOARD_DF_ORIGINAL[('
|
42 |
-
LEADERBOARD_DF_ORIGINAL[('
|
43 |
else:
|
44 |
-
LEADERBOARD_DF_ORIGINAL[('
|
45 |
-
LEADERBOARD_DF_ORIGINAL[('
|
46 |
|
47 |
|
48 |
# Reorder columns to put Rank and Average N-avg first, then Model, then the rest
|
49 |
model_col_tuple = ('Model', 'Model') # Original name of the model column
|
50 |
-
rank_col_tuple = ('
|
51 |
-
avg_navg_col_tuple = ('
|
52 |
|
53 |
new_col_order = []
|
54 |
if rank_col_tuple in LEADERBOARD_DF_ORIGINAL.columns:
|
@@ -63,18 +63,38 @@ for col in LEADERBOARD_DF_ORIGINAL.columns:
|
|
63 |
new_col_order.append(col)
|
64 |
LEADERBOARD_DF_ORIGINAL = LEADERBOARD_DF_ORIGINAL[new_col_order]
|
65 |
|
|
|
|
|
|
|
66 |
|
67 |
# Function to prepare DataFrame for display (format headers, ensure Model column)
|
68 |
def format_leaderboard_df_for_display(df_orig):
|
69 |
df_display = df_orig.copy()
|
70 |
new_columns = []
|
71 |
for col_tuple in df_display.columns:
|
72 |
-
if col_tuple == ('
|
|
|
|
|
|
|
|
|
73 |
new_columns.append('Model')
|
74 |
else:
|
75 |
new_columns.append(f"{col_tuple[0]}\n{col_tuple[1]}")
|
76 |
df_display.columns = new_columns
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
LEADERBOARD_DF_DISPLAY_INIT = format_leaderboard_df_for_display(LEADERBOARD_DF_ORIGINAL)
|
80 |
|
|
|
38 |
# Calculate average, handling cases where some N-avg columns might be missing
|
39 |
existing_n_avg_cols = [col for col in n_avg_cols_to_average if col in LEADERBOARD_DF_ORIGINAL.columns]
|
40 |
if existing_n_avg_cols:
|
41 |
+
LEADERBOARD_DF_ORIGINAL[('Avg-', ' N-avg')] = LEADERBOARD_DF_ORIGINAL[existing_n_avg_cols].mean(axis=1)
|
42 |
+
LEADERBOARD_DF_ORIGINAL[('Avg-', 'Rank')] = LEADERBOARD_DF_ORIGINAL[('Avg-', ' N-avg')].rank(method='min', ascending=False).astype(int)
|
43 |
else:
|
44 |
+
LEADERBOARD_DF_ORIGINAL[('Avg-', ' N-avg')] = np.nan
|
45 |
+
LEADERBOARD_DF_ORIGINAL[('Avg-', 'Rank')] = np.nan
|
46 |
|
47 |
|
48 |
# Reorder columns to put Rank and Average N-avg first, then Model, then the rest
|
49 |
model_col_tuple = ('Model', 'Model') # Original name of the model column
|
50 |
+
rank_col_tuple = ('Avg-', 'Rank')
|
51 |
+
avg_navg_col_tuple = ('Avg-', ' N-avg')
|
52 |
|
53 |
new_col_order = []
|
54 |
if rank_col_tuple in LEADERBOARD_DF_ORIGINAL.columns:
|
|
|
63 |
new_col_order.append(col)
|
64 |
LEADERBOARD_DF_ORIGINAL = LEADERBOARD_DF_ORIGINAL[new_col_order]
|
65 |
|
66 |
+
# Sort by Rank ascending
|
67 |
+
if rank_col_tuple in LEADERBOARD_DF_ORIGINAL.columns:
|
68 |
+
LEADERBOARD_DF_ORIGINAL = LEADERBOARD_DF_ORIGINAL.sort_values(by=rank_col_tuple, ascending=True)
|
69 |
|
70 |
# Function to prepare DataFrame for display (format headers, ensure Model column)
|
71 |
def format_leaderboard_df_for_display(df_orig):
|
72 |
df_display = df_orig.copy()
|
73 |
new_columns = []
|
74 |
for col_tuple in df_display.columns:
|
75 |
+
if col_tuple == ('Avg-', 'Rank'):
|
76 |
+
new_columns.append('Overall Rank')
|
77 |
+
elif col_tuple == ('Avg-', ' N-avg'):
|
78 |
+
new_columns.append('Average N-avg')
|
79 |
+
elif col_tuple == ('Model', 'Model'):
|
80 |
new_columns.append('Model')
|
81 |
else:
|
82 |
new_columns.append(f"{col_tuple[0]}\n{col_tuple[1]}")
|
83 |
df_display.columns = new_columns
|
84 |
+
|
85 |
+
# Create a new DataFrame with the formatted column names for display
|
86 |
+
# and apply formatting to the 'Average N-avg' data if it exists
|
87 |
+
temp_formatted_df = pd.DataFrame(df_display.values, columns=new_columns, index=df_display.index)
|
88 |
+
if 'Average N-avg' in temp_formatted_df.columns:
|
89 |
+
# Ensure the column is numeric before formatting, in case it became object type
|
90 |
+
temp_formatted_df['Average N-avg'] = pd.to_numeric(temp_formatted_df['Average N-avg'], errors='coerce')
|
91 |
+
temp_formatted_df['Average N-avg'] = temp_formatted_df['Average N-avg'].map(lambda x: f"{x:.4f}" if pd.notnull(x) else '-')
|
92 |
+
|
93 |
+
# Convert the 'Overall Rank' to integer string to avoid '.0'
|
94 |
+
if 'Overall Rank' in temp_formatted_df.columns:
|
95 |
+
temp_formatted_df['Overall Rank'] = temp_formatted_df['Overall Rank'].map(lambda x: f"{int(x)}" if pd.notnull(x) else '-')
|
96 |
+
|
97 |
+
return temp_formatted_df
|
98 |
|
99 |
LEADERBOARD_DF_DISPLAY_INIT = format_leaderboard_df_for_display(LEADERBOARD_DF_ORIGINAL)
|
100 |
|
src/about.py
CHANGED
@@ -31,7 +31,7 @@ TITLE = """
|
|
31 |
INTRODUCTION_TEXT = """
|
32 |
|
33 |
We introduce **LOTUS**, a leaderboard for evaluating detailed captions, addressing three main gaps in existing evaluations: lack of **standardized** criteria, **bias-aware** assessments, and **user preference** considerations.
|
34 |
-
LOTUS comprehensively evaluates various aspects, including caption quality (
|
35 |
|
36 |
"""
|
37 |
|
|
|
31 |
INTRODUCTION_TEXT = """
|
32 |
|
33 |
We introduce **LOTUS**, a leaderboard for evaluating detailed captions, addressing three main gaps in existing evaluations: lack of **standardized** criteria, **bias-aware** assessments, and **user preference** considerations.
|
34 |
+
LOTUS comprehensively evaluates various aspects, including caption quality (e.g., alignment, descriptiveness), risks (e.g., hallucination), and societal biases (e.g., gender bias) while enabling preference-oriented evaluations by tailoring criteria to diverse user preferences.
|
35 |
|
36 |
"""
|
37 |
|