babilong / draw_utils.py
yurakuratov's picture
group results by max eval length
b54190c
raw
history blame
2.58 kB
import pandas as pd
import numpy as np
PAGE_MARKDOWN = """
<style>
.reportview-container {
margin-top: -2em;
}
#MainMenu {visibility: hidden;}
.stDeployButton {display:none;}
footer {visibility: hidden;}
#stDecoration {display:none;}
</style>
"""
PAGE_INFO = """[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-lg.svg)](https://huggingface.co/datasets/RMT-team/babilong) | [GitHub](https://github.com/booydar/babilong) | [Paper](https://arxiv.org/abs/2406.10149) | [HF Dataset](https://huggingface.co/datasets/RMT-team/babilong) | [HF Dataset 1k samples per task](https://huggingface.co/datasets/RMT-team/babilong-1k-samples) |"""
LENGTHS = ['0k', '1k', '2k', '4k', '8k', '16k', '32k', '64k', '128k', '512k', '1M', '2M']
LENGTHS_32k = ['0k', '1k', '2k', '4k', '8k', '16k', '32k']
LENGTHS_128k = ['0k', '1k', '2k', '4k', '8k', '16k', '32k', '64k', '128k']
def load_results():
old_results_path = "data/leaderboard-v0_results.csv"
new_results_path = "babilong/babilong_results/all_results.csv"
old_results = pd.read_csv(old_results_path)
new_results = pd.read_csv(new_results_path)
res = pd.concat([old_results, new_results])
res.replace(-1, np.nan, inplace=True)
res['<=32k'] = res[LENGTHS_32k].mean(axis=1)
res['<=128k'] = res[LENGTHS_128k].mean(axis=1)
# Calculate the maximum length with non-NaN values for each model
res['max_eval_length_idx'] = res.apply(
lambda row: max([LENGTHS.index(col) for col in LENGTHS if not pd.isna(row[col])], default=-1), axis=1)
res['max_eval_length'] = res['max_eval_length_idx'].apply(lambda x: LENGTHS[x])
# Sort first by max length (descending) and then by average score (descending)
res.sort_values(['max_eval_length_idx', '<=128k'], ascending=[False, False], inplace=True)
return res
def style_dataframe(df):
"""
Style a pandas DataFrame with a color gradient.
"""
styled_df = df.copy()
numeric_columns = styled_df.columns[1:]
def color_scale(val):
if pd.isna(val):
return 'background-color: white; color: white'
min_val = 0
max_val = 100
normalized = (val - min_val) / (max_val - min_val) if max_val > min_val else 0.5
r = int(255 * (1 - normalized) + 144 * normalized)
g = int(204 * (1 - normalized) + 238 * normalized)
b = int(204 * (1 - normalized) + 180 * normalized)
return f'background-color: rgb({r}, {g}, {b})'
styled = styled_df.style.map(color_scale, subset=numeric_columns)
return styled