Spaces:

Metric-AI
/

ArmBench-LLM

Running

ArmBench-LLM / app.py

daniel7an

commit

4781b83 4 months ago

4.73 kB

	import gradio as gr
	import pandas as pd
	import plotly.express as px

	def display_table(exam_type):
	if exam_type == "Armenian Exams":
	df = pd.read_csv('unified_exam_results.csv')
	df = df.sort_values(by='Average score', ascending=False)
	cols = df.columns.tolist()
	cols.insert(1, cols.pop(cols.index('Average score')))
	df = df[cols]
	elif exam_type == "MMLU-Pro-Hy":
	df = pd.read_csv('mmlu_pro_hy_results.csv')
	df = df.sort_values(by='Accuracy', ascending=False)
	return df

	def create_bar_chart(exam_type, plot_column):
	if exam_type == "Armenian Exams":
	df = pd.read_csv('unified_exam_results.csv')
	df = df.sort_values(by='Average score', ascending=False)
	df = df.sort_values(by=[plot_column, 'Model'], ascending=[False, True]).reset_index(drop=True)

	x_col = plot_column
	title = f'{plot_column} per Model'
	if plot_column == 'Average score':
	range_max = 20
	x_range_max = 20
	else:
	range_max = 20
	x_range_max = 20
	def get_label(score):
	if score < 8:
	return "Fail"
	elif 8 <= score <= 18:
	return "Pass"
	else:
	return "Distinction"
	df['Test Result'] = df[plot_column].apply(get_label)

	if plot_column in ['Average score', 'Accuracy']:
	fig = px.bar(df,
	x=x_col,
	y='Model',
	color=x_col,
	color_continuous_scale='tealrose_r',
	labels={x_col: plot_column, 'Model': 'Model'},
	title=title,
	orientation='h',
	range_color=[0, range_max])
	else:
	color_discrete_map = {
	"Fail": "#d15d80",
	"Pass": "#edd8be",
	"Distinction": "#059492"
	}
	fig = px.bar(df,
	x=x_col,
	y='Model',
	color=df['Test Result'],
	color_discrete_map=color_discrete_map,
	labels={x_col: plot_column, 'Model': 'Model'},
	title=title,
	orientation='h')

	fig.update_layout(
	xaxis=dict(range=[0, x_range_max]),
	title=dict(text=title, font=dict(size=16)),
	xaxis_title=dict(font=dict(size=12)),
	yaxis_title=dict(font=dict(size=12)),
	yaxis=dict(autorange="reversed")
	)

	return fig

	elif exam_type == "MMLU-Pro-Hy":
	df = pd.read_csv('mmlu_pro_hy_results.csv')
	df = df.sort_values(by='Accuracy', ascending=False)
	x_col = 'Accuracy'
	title = 'Accuracy per Model (MMLU-Pro-Hy)'
	range_max = 1.0
	x_range_max = 1.0
	if plot_column != 'Accuracy':
	def get_label(accuracy):
	if accuracy < 0.5:
	return "Low"
	elif 0.5 <= accuracy <= 0.8:
	return "Medium"
	else:
	return "High"
	df['Test Result'] = df['Accuracy'].apply(get_label)

	fig = px.bar(df,
	x=x_col,
	y='Model',
	color=x_col,
	color_continuous_scale='tealrose_r',
	labels={x_col: plot_column, 'Model': 'Model'},
	title=title,
	orientation='h',
	range_color=[0, range_max])

	fig.update_layout(
	xaxis=dict(range=[0, x_range_max]),
	title=dict(text=title, font=dict(size=16)),
	xaxis_title=dict(font=dict(size=12)),
	yaxis_title=dict(font=dict(size=12)),
	yaxis=dict(autorange="reversed")
	)

	return fig

	with gr.Blocks() as app:
	with gr.Tabs():
	with gr.TabItem("Armenian Unified Exams"):
	table_output_armenian = gr.DataFrame(value=lambda: display_table("Armenian Exams"))
	plot_column_dropdown = gr.Dropdown(choices=['Average score', 'Armenian language exam score', 'Armenian history exam score', 'Mathematics exam score'], value='Average score', label='Select Column to Plot')
	plot_output_armenian = gr.Plot(lambda column: create_bar_chart("Armenian Exams", column), inputs=plot_column_dropdown)
	with gr.TabItem("MMLU-Pro-Hy"):
	table_output_mmlu = gr.DataFrame(value=lambda: display_table("MMLU-Pro-Hy"))
	plot_output_mmlu = gr.Plot(lambda: create_bar_chart("MMLU-Pro-Hy", 'Accuracy'))

	app.launch(share=True)