ArmBench-LLM / app.py
daniel7an
commit
4781b83
raw
history blame
4.73 kB
import gradio as gr
import pandas as pd
import plotly.express as px
def display_table(exam_type):
if exam_type == "Armenian Exams":
df = pd.read_csv('unified_exam_results.csv')
df = df.sort_values(by='Average score', ascending=False)
cols = df.columns.tolist()
cols.insert(1, cols.pop(cols.index('Average score')))
df = df[cols]
elif exam_type == "MMLU-Pro-Hy":
df = pd.read_csv('mmlu_pro_hy_results.csv')
df = df.sort_values(by='Accuracy', ascending=False)
return df
def create_bar_chart(exam_type, plot_column):
if exam_type == "Armenian Exams":
df = pd.read_csv('unified_exam_results.csv')
df = df.sort_values(by='Average score', ascending=False)
df = df.sort_values(by=[plot_column, 'Model'], ascending=[False, True]).reset_index(drop=True)
x_col = plot_column
title = f'{plot_column} per Model'
if plot_column == 'Average score':
range_max = 20
x_range_max = 20
else:
range_max = 20
x_range_max = 20
def get_label(score):
if score < 8:
return "Fail"
elif 8 <= score <= 18:
return "Pass"
else:
return "Distinction"
df['Test Result'] = df[plot_column].apply(get_label)
if plot_column in ['Average score', 'Accuracy']:
fig = px.bar(df,
x=x_col,
y='Model',
color=x_col,
color_continuous_scale='tealrose_r',
labels={x_col: plot_column, 'Model': 'Model'},
title=title,
orientation='h',
range_color=[0, range_max])
else:
color_discrete_map = {
"Fail": "#d15d80",
"Pass": "#edd8be",
"Distinction": "#059492"
}
fig = px.bar(df,
x=x_col,
y='Model',
color=df['Test Result'],
color_discrete_map=color_discrete_map,
labels={x_col: plot_column, 'Model': 'Model'},
title=title,
orientation='h')
fig.update_layout(
xaxis=dict(range=[0, x_range_max]),
title=dict(text=title, font=dict(size=16)),
xaxis_title=dict(font=dict(size=12)),
yaxis_title=dict(font=dict(size=12)),
yaxis=dict(autorange="reversed")
)
return fig
elif exam_type == "MMLU-Pro-Hy":
df = pd.read_csv('mmlu_pro_hy_results.csv')
df = df.sort_values(by='Accuracy', ascending=False)
x_col = 'Accuracy'
title = 'Accuracy per Model (MMLU-Pro-Hy)'
range_max = 1.0
x_range_max = 1.0
if plot_column != 'Accuracy':
def get_label(accuracy):
if accuracy < 0.5:
return "Low"
elif 0.5 <= accuracy <= 0.8:
return "Medium"
else:
return "High"
df['Test Result'] = df['Accuracy'].apply(get_label)
fig = px.bar(df,
x=x_col,
y='Model',
color=x_col,
color_continuous_scale='tealrose_r',
labels={x_col: plot_column, 'Model': 'Model'},
title=title,
orientation='h',
range_color=[0, range_max])
fig.update_layout(
xaxis=dict(range=[0, x_range_max]),
title=dict(text=title, font=dict(size=16)),
xaxis_title=dict(font=dict(size=12)),
yaxis_title=dict(font=dict(size=12)),
yaxis=dict(autorange="reversed")
)
return fig
with gr.Blocks() as app:
with gr.Tabs():
with gr.TabItem("Armenian Unified Exams"):
table_output_armenian = gr.DataFrame(value=lambda: display_table("Armenian Exams"))
plot_column_dropdown = gr.Dropdown(choices=['Average score', 'Armenian language exam score', 'Armenian history exam score', 'Mathematics exam score'], value='Average score', label='Select Column to Plot')
plot_output_armenian = gr.Plot(lambda column: create_bar_chart("Armenian Exams", column), inputs=plot_column_dropdown)
with gr.TabItem("MMLU-Pro-Hy"):
table_output_mmlu = gr.DataFrame(value=lambda: display_table("MMLU-Pro-Hy"))
plot_output_mmlu = gr.Plot(lambda: create_bar_chart("MMLU-Pro-Hy", 'Accuracy'))
app.launch(share=True)