File size: 4,027 Bytes
779cbde
 
 
a4d362f
779cbde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119f7c7
 
779cbde
001dc3f
 
779cbde
 
 
 
119f7c7
001dc3f
 
119f7c7
 
001dc3f
 
119f7c7
29617b1
779cbde
119f7c7
 
 
779cbde
 
 
 
33a913f
 
 
 
 
 
779cbde
33a913f
 
 
 
 
 
 
 
 
 
 
 
779cbde
 
 
 
 
 
 
33a913f
779cbde
 
 
33a913f
 
 
 
779cbde
 
 
 
33a913f
 
 
 
 
 
 
 
 
779cbde
33a913f
 
 
779cbde
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import gradio as gr
import pandas as pd
import plotly.express as px
from model_handler import ModelHandler

def unified_exam_result_table(unified_exam_df):
    df = unified_exam_df.copy()
    df = df.sort_values(by='Average', ascending=False).reset_index(drop=True)
    df.insert(0, 'Rank', range(1, len(df) + 1))
    cols = df.columns.tolist()
    cols.insert(2, cols.pop(cols.index('Average')))
    df = df[cols]
    df.rename(columns={'Armenian language and literature': 'Armenian language\nand literature'}, inplace=True)
    df = df.round(4)
    return df

def mmlu_result_table(mmlu_df):
    df = mmlu_df.copy()
    df = df.sort_values(by='Average', ascending=False).reset_index(drop=True)
    df.insert(0, 'Rank', range(1, len(df) + 1)) 
    cols = df.columns.tolist()
    cols.insert(2, cols.pop(cols.index('Average')))
    cols.append(cols.pop(cols.index('Other')))
    df = df[cols]
    df = df.round(4)
    return df

def unified_exam_chart(unified_exam_df, plot_column):
    if plot_column == 'Armenian language and literature':
        plot_column = 'Armenian language\nand literature'
    df = unified_exam_df.copy()
    df = df.sort_values(by=[plot_column, 'Model'], ascending=[False, True]).reset_index(drop=True)
    x_col = plot_column
    title = f'{plot_column}'
    x_range_max = 20
    def get_label(score):
        if score < 8:
            return "Fail"
        elif 8 <= score <= 18:
            return "Pass"
        else:
            return "Distinction"
    df['Test Result'] = df[plot_column].apply(get_label)
    color_discrete_map = {
        "Fail": "#ff5f56",
        "Pass": "#ffbd2e",
        "Distinction": "#27c93f"
    }
    fig = px.bar(df,
        x=x_col,
        y='Model',
        color=df['Test Result'],
        color_discrete_map=color_discrete_map,
        labels={x_col: 'Score', 'Model': 'Model'},
        title=title,
        orientation='h'
    )

    bar_height_px = 28                       

    fig.update_layout(
        height=bar_height_px * len(df) + 120,
        margin=dict(l=220, r=40, t=60, b=40),
        xaxis=dict(range=[0, x_range_max]),
        title=dict(text=title, font=dict(size=16)),
        xaxis_title=dict(font=dict(size=12)),
        yaxis_title=dict(font=dict(size=12)),
        yaxis=dict(
            automargin=True,
            tickmode="array",
            tickvals=df["Model"],
            ticktext=df["Model"],
            dtick=1,
            autorange="reversed"   
        ),
        width=1000
    )

    fig.update_yaxes(tickfont=dict(size=10))

    return fig

def mmlu_chart(mmlu_df, plot_column):
    df = mmlu_df.copy()

    subject_cols = [
        'Biology', 'Business', 'Chemistry', 'Computer Science', 'Economics',
        'Engineering', 'Health', 'History', 'Law', 'Math', 'Other',
        'Philosophy', 'Physics', 'Psychology'
    ]
    df['Average'] = df[subject_cols].mean(axis=1)

    df = df.sort_values(by=[plot_column, 'Model'],
                        ascending=[False, True]
               ).reset_index(drop=True)
    
    x_col        = plot_column
    title        = f'{plot_column}'
    x_range_max  = 1.0
    bar_height_px = 28            

    fig = px.bar(
        df,
        x=x_col,
        y='Model',
        color=x_col,
        color_continuous_scale='Viridis',
        labels={x_col: 'Accuracy', 'Model': 'Model'},
        title=title,
        orientation='h',
        range_color=[0, 1]
    )

    fig.update_layout(
        height=bar_height_px * len(df) + 120,
        margin=dict(l=220, r=40, t=60, b=40),
        width=1000,

        xaxis=dict(range=[0, x_range_max]),
        title=dict(text=title, font=dict(size=16)),
        xaxis_title=dict(font=dict(size=12)),
        yaxis_title=dict(font=dict(size=12)),

        yaxis=dict(
            automargin=True,
            tickmode='array',
            tickvals=df['Model'],
            ticktext=df['Model'],
            dtick=1,
            autorange='reversed'      
        )
    )

    fig.update_yaxes(tickfont=dict(size=10))

    return fig