Spaces:

zsyJosh
/

stark

Sleeping

stark / app.py

Shiyu Zhao

Update space

031d82b 11 months ago

3.51 kB

	import gradio as gr
	import pandas as pd
	import numpy as np

	# Sample data based on your table
	data = {
	'Method': ['BM25', 'DPR (roberta)', 'ANCE (roberta)', 'QAGNN (roberta)', 'ada-002', 'voyage-l2-instruct', 'LLM2Vec', 'GritLM-7b', 'multi-ada-002', 'ColBERTv2', 'Claude3 Reranker', 'GPT4 Reranker'],
	'STARK-AMAZON_Hit@1': [27.16, 16.05, 25.93, 22.22, 39.50, 35.80, 29.63, 40.74, 46.91, 33.33, 53.09, 50.62],
	'STARK-AMAZON_Hit@5': [51.85, 39.51, 54.32, 49.38, 64.19, 62.96, 46.91, 71.60, 72.84, 55.56, 74.07, 75.31],
	'STARK-AMAZON_R@20': [29.23, 15.23, 23.69, 21.54, 35.46, 33.01, 21.21, 36.30, 40.22, 29.03, 35.46, 35.46],
	'STARK-AMAZON_MRR': [18.79, 27.21, 37.12, 31.33, 52.65, 47.84, 38.61, 53.21, 58.74, 43.77, 62.11, 61.06],
	'STARK-MAG_Hit@1': [32.14, 4.72, 25.00, 20.24, 28.57, 22.62, 16.67, 34.52, 23.81, 33.33, 38.10, 36.90],
	'STARK-MAG_Hit@5': [41.67, 9.52, 30.95, 26.19, 41.67, 36.90, 28.57, 44.04, 41.67, 36.90, 45.24, 46.43],
	'STARK-MAG_R@20': [32.46, 25.00, 27.24, 28.76, 35.95, 32.44, 21.74, 34.57, 39.85, 30.50, 35.95, 35.95],
	'STARK-MAG_MRR': [37.42, 7.90, 27.98, 25.53, 35.81, 29.68, 21.59, 38.72, 31.43, 35.97, 42.00, 40.65],
	'STARK-PRIME_Hit@1': [22.45, 2.04, 7.14, 6.12, 17.35, 16.33, 9.18, 25.51, 24.49, 15.31, 28.57, 28.57],
	'STARK-PRIME_Hit@5': [41.84, 9.18, 13.27, 13.27, 34.69, 32.65, 21.43, 41.84, 39.80, 26.53, 46.94, 44.90],
	'STARK-PRIME_R@20': [42.32, 10.69, 11.72, 17.62, 41.09, 39.01, 26.77, 48.10, 47.21, 25.56, 41.61, 41.61],
	'STARK-PRIME_MRR': [30.37, 7.05, 10.07, 9.39, 26.35, 24.33, 15.24, 34.28, 32.98, 19.67, 36.32, 34.82]
	}

	df = pd.DataFrame(data)

	def format_dataframe(df, dataset):
	# Filter the dataframe for the selected dataset
	columns = ['Method'] + [col for col in df.columns if dataset in col]
	filtered_df = df[columns].copy()

	# Rename columns
	filtered_df.columns = [col.split('_')[-1] if '_' in col else col for col in filtered_df.columns]

	# Sort by MRR
	filtered_df = filtered_df.sort_values('MRR', ascending=False)

	return filtered_df

	css = """
	table > thead {
	white-space: normal
	}

	table {
	--cell-width-1: 250px
	}

	table > tbody > tr > td:nth-child(2) > div {
	overflow-x: auto
	}
	"""

	with gr.Blocks(css=css) as demo:
	gr.Markdown("# Semi-structured Retrieval Benchmark (STaRK) Leaderboard")
	gr.Markdown("Refer to the [STaRK paper](https://arxiv.org/pdf/2404.13207) for details on metrics, tasks and models.")

	with gr.Tabs() as outer_tabs:
	with gr.TabItem("Synthesized"):
	gr.Markdown("## Synthesized Data Results")
	with gr.Tabs() as inner_tabs_synthesized:
	with gr.TabItem("Amazon"):
	gr.DataFrame(format_dataframe(df, "STARK-AMAZON"))
	with gr.TabItem("MAG"):
	gr.DataFrame(format_dataframe(df, "STARK-MAG"))
	with gr.TabItem("Prime"):
	gr.DataFrame(format_dataframe(df, "STARK-PRIME"))

	with gr.TabItem("Human-Generated"):
	gr.Markdown("## Human-Generated Data Results")
	with gr.Tabs() as inner_tabs_human:
	with gr.TabItem("Amazon"):
	gr.DataFrame(format_dataframe(df, "STARK-AMAZON"))
	with gr.TabItem("MAG"):
	gr.DataFrame(format_dataframe(df, "STARK-MAG"))
	with gr.TabItem("Prime"):
	gr.DataFrame(format_dataframe(df, "STARK-PRIME"))

	demo.launch()