ArmBench-LLM / data_handler.py
Bagratuni's picture
mmlu api models update
95ec4a1
raw
history blame
4.03 kB
import gradio as gr
import pandas as pd
import plotly.express as px
from model_handler import ModelHandler
def unified_exam_result_table(unified_exam_df):
df = unified_exam_df.copy()
df = df.sort_values(by='Average', ascending=False).reset_index(drop=True)
df.insert(0, 'Rank', range(1, len(df) + 1))
cols = df.columns.tolist()
cols.insert(2, cols.pop(cols.index('Average')))
df = df[cols]
df.rename(columns={'Armenian language and literature': 'Armenian language\nand literature'}, inplace=True)
df = df.round(4)
return df
def mmlu_result_table(mmlu_df):
df = mmlu_df.copy()
df = df.sort_values(by='Average', ascending=False).reset_index(drop=True)
df.insert(0, 'Rank', range(1, len(df) + 1))
cols = df.columns.tolist()
cols.insert(2, cols.pop(cols.index('Average')))
cols.append(cols.pop(cols.index('Other')))
df = df[cols]
df = df.round(4)
return df
def unified_exam_chart(unified_exam_df, plot_column):
if plot_column == 'Armenian language and literature':
plot_column = 'Armenian language\nand literature'
df = unified_exam_df.copy()
df = df.sort_values(by=[plot_column, 'Model'], ascending=[False, True]).reset_index(drop=True)
x_col = plot_column
title = f'{plot_column}'
x_range_max = 20
def get_label(score):
if score < 8:
return "Fail"
elif 8 <= score <= 18:
return "Pass"
else:
return "Distinction"
df['Test Result'] = df[plot_column].apply(get_label)
color_discrete_map = {
"Fail": "#ff5f56",
"Pass": "#ffbd2e",
"Distinction": "#27c93f"
}
fig = px.bar(df,
x=x_col,
y='Model',
color=df['Test Result'],
color_discrete_map=color_discrete_map,
labels={x_col: 'Score', 'Model': 'Model'},
title=title,
orientation='h'
)
bar_height_px = 28
fig.update_layout(
height=bar_height_px * len(df) + 120,
margin=dict(l=220, r=40, t=60, b=40),
xaxis=dict(range=[0, x_range_max]),
title=dict(text=title, font=dict(size=16)),
xaxis_title=dict(font=dict(size=12)),
yaxis_title=dict(font=dict(size=12)),
yaxis=dict(
automargin=True,
tickmode="array",
tickvals=df["Model"],
ticktext=df["Model"],
dtick=1,
autorange="reversed"
),
width=1000
)
fig.update_yaxes(tickfont=dict(size=10))
return fig
def mmlu_chart(mmlu_df, plot_column):
df = mmlu_df.copy()
subject_cols = [
'Biology', 'Business', 'Chemistry', 'Computer Science', 'Economics',
'Engineering', 'Health', 'History', 'Law', 'Math', 'Other',
'Philosophy', 'Physics', 'Psychology'
]
df['Average'] = df[subject_cols].mean(axis=1)
df = df.sort_values(by=[plot_column, 'Model'],
ascending=[False, True]
).reset_index(drop=True)
x_col = plot_column
title = f'{plot_column}'
x_range_max = 1.0
bar_height_px = 28
fig = px.bar(
df,
x=x_col,
y='Model',
color=x_col,
color_continuous_scale='Viridis',
labels={x_col: 'Accuracy', 'Model': 'Model'},
title=title,
orientation='h',
range_color=[0, 1]
)
fig.update_layout(
height=bar_height_px * len(df) + 120,
margin=dict(l=220, r=40, t=60, b=40),
width=1000,
xaxis=dict(range=[0, x_range_max]),
title=dict(text=title, font=dict(size=16)),
xaxis_title=dict(font=dict(size=12)),
yaxis_title=dict(font=dict(size=12)),
yaxis=dict(
automargin=True,
tickmode='array',
tickvals=df['Model'],
ticktext=df['Model'],
dtick=1,
autorange='reversed'
)
)
fig.update_yaxes(tickfont=dict(size=10))
return fig