Spaces:

Metric-AI
/

ArmBench-LLM

Running

File size: 4,027 Bytes

import gradio as gr
import pandas as pd
import plotly.express as px
from model_handler import ModelHandler

def unified_exam_result_table(unified_exam_df):
    df = unified_exam_df.copy()
    df = df.sort_values(by='Average', ascending=False).reset_index(drop=True)
    df.insert(0, 'Rank', range(1, len(df) + 1))
    cols = df.columns.tolist()
    cols.insert(2, cols.pop(cols.index('Average')))
    df = df[cols]
    df.rename(columns={'Armenian language and literature': 'Armenian language\nand literature'}, inplace=True)
    df = df.round(4)
    return df

def mmlu_result_table(mmlu_df):
    df = mmlu_df.copy()
    df = df.sort_values(by='Average', ascending=False).reset_index(drop=True)
    df.insert(0, 'Rank', range(1, len(df) + 1)) 
    cols = df.columns.tolist()
    cols.insert(2, cols.pop(cols.index('Average')))
    cols.append(cols.pop(cols.index('Other')))
    df = df[cols]
    df = df.round(4)
    return df

def unified_exam_chart(unified_exam_df, plot_column):
    if plot_column == 'Armenian language and literature':
        plot_column = 'Armenian language\nand literature'
    df = unified_exam_df.copy()
    df = df.sort_values(by=[plot_column, 'Model'], ascending=[False, True]).reset_index(drop=True)
    x_col = plot_column
    title = f'{plot_column}'
    x_range_max = 20
    def get_label(score):
        if score < 8:
            return "Fail"
        elif 8 <= score <= 18:
            return "Pass"
        else:
            return "Distinction"
    df['Test Result'] = df[plot_column].apply(get_label)
    color_discrete_map = {
        "Fail": "#ff5f56",
        "Pass": "#ffbd2e",
        "Distinction": "#27c93f"
    }
    fig = px.bar(df,
        x=x_col,
        y='Model',
        color=df['Test Result'],
        color_discrete_map=color_discrete_map,
        labels={x_col: 'Score', 'Model': 'Model'},
        title=title,
        orientation='h'
    )

    bar_height_px = 28                       

    fig.update_layout(
        height=bar_height_px * len(df) + 120,
        margin=dict(l=220, r=40, t=60, b=40),
        xaxis=dict(range=[0, x_range_max]),
        title=dict(text=title, font=dict(size=16)),
        xaxis_title=dict(font=dict(size=12)),
        yaxis_title=dict(font=dict(size=12)),
        yaxis=dict(
            automargin=True,
            tickmode="array",
            tickvals=df["Model"],
            ticktext=df["Model"],
            dtick=1,
            autorange="reversed"   
        ),
        width=1000
    )

    fig.update_yaxes(tickfont=dict(size=10))

    return fig

def mmlu_chart(mmlu_df, plot_column):
    df = mmlu_df.copy()

    subject_cols = [
        'Biology', 'Business', 'Chemistry', 'Computer Science', 'Economics',
        'Engineering', 'Health', 'History', 'Law', 'Math', 'Other',
        'Philosophy', 'Physics', 'Psychology'
    ]
    df['Average'] = df[subject_cols].mean(axis=1)

    df = df.sort_values(by=[plot_column, 'Model'],
                        ascending=[False, True]
               ).reset_index(drop=True)
    
    x_col        = plot_column
    title        = f'{plot_column}'
    x_range_max  = 1.0
    bar_height_px = 28            

    fig = px.bar(
        df,
        x=x_col,
        y='Model',
        color=x_col,
        color_continuous_scale='Viridis',
        labels={x_col: 'Accuracy', 'Model': 'Model'},
        title=title,
        orientation='h',
        range_color=[0, 1]
    )

    fig.update_layout(
        height=bar_height_px * len(df) + 120,
        margin=dict(l=220, r=40, t=60, b=40),
        width=1000,

        xaxis=dict(range=[0, x_range_max]),
        title=dict(text=title, font=dict(size=16)),
        xaxis_title=dict(font=dict(size=12)),
        yaxis_title=dict(font=dict(size=12)),

        yaxis=dict(
            automargin=True,
            tickmode='array',
            tickvals=df['Model'],
            ticktext=df['Model'],
            dtick=1,
            autorange='reversed'      
        )
    )

    fig.update_yaxes(tickfont=dict(size=10))

    return fig