|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
|
|
|
|
data = { |
|
'Method': ['BM25', 'DPR (roberta)', 'ANCE (roberta)', 'QAGNN (roberta)', 'ada-002', 'voyage-l2-instruct', 'LLM2Vec', 'GritLM-7b', 'multi-ada-002', 'ColBERTv2', 'Claude3 Reranker', 'GPT4 Reranker'], |
|
'STARK-AMAZON_Hit@1': [27.16, 16.05, 25.93, 22.22, 39.50, 35.80, 29.63, 40.74, 46.91, 33.33, 53.09, 50.62], |
|
'STARK-AMAZON_Hit@5': [51.85, 39.51, 54.32, 49.38, 64.19, 62.96, 46.91, 71.60, 72.84, 55.56, 74.07, 75.31], |
|
'STARK-AMAZON_R@20': [29.23, 15.23, 23.69, 21.54, 35.46, 33.01, 21.21, 36.30, 40.22, 29.03, 35.46, 35.46], |
|
'STARK-AMAZON_MRR': [18.79, 27.21, 37.12, 31.33, 52.65, 47.84, 38.61, 53.21, 58.74, 43.77, 62.11, 61.06], |
|
'STARK-MAG_Hit@1': [32.14, 4.72, 25.00, 20.24, 28.57, 22.62, 16.67, 34.52, 23.81, 33.33, 38.10, 36.90], |
|
'STARK-MAG_Hit@5': [41.67, 9.52, 30.95, 26.19, 41.67, 36.90, 28.57, 44.04, 41.67, 36.90, 45.24, 46.43], |
|
'STARK-MAG_R@20': [32.46, 25.00, 27.24, 28.76, 35.95, 32.44, 21.74, 34.57, 39.85, 30.50, 35.95, 35.95], |
|
'STARK-MAG_MRR': [37.42, 7.90, 27.98, 25.53, 35.81, 29.68, 21.59, 38.72, 31.43, 35.97, 42.00, 40.65], |
|
'STARK-PRIME_Hit@1': [22.45, 2.04, 7.14, 6.12, 17.35, 16.33, 9.18, 25.51, 24.49, 15.31, 28.57, 28.57], |
|
'STARK-PRIME_Hit@5': [41.84, 9.18, 13.27, 13.27, 34.69, 32.65, 21.43, 41.84, 39.80, 26.53, 46.94, 44.90], |
|
'STARK-PRIME_R@20': [42.32, 10.69, 11.72, 17.62, 41.09, 39.01, 26.77, 48.10, 47.21, 25.56, 41.61, 41.61], |
|
'STARK-PRIME_MRR': [30.37, 7.05, 10.07, 9.39, 26.35, 24.33, 15.24, 34.28, 32.98, 19.67, 36.32, 34.82] |
|
} |
|
|
|
df = pd.DataFrame(data) |
|
|
|
def format_dataframe(df, dataset): |
|
|
|
columns = ['Method'] + [col for col in df.columns if dataset in col] |
|
filtered_df = df[columns].copy() |
|
|
|
|
|
filtered_df.columns = [col.split('_')[-1] if '_' in col else col for col in filtered_df.columns] |
|
|
|
|
|
filtered_df = filtered_df.sort_values('MRR', ascending=False) |
|
|
|
return filtered_df |
|
|
|
css = """ |
|
table > thead { |
|
white-space: normal |
|
} |
|
|
|
table { |
|
--cell-width-1: 250px |
|
} |
|
|
|
table > tbody > tr > td:nth-child(2) > div { |
|
overflow-x: auto |
|
} |
|
""" |
|
|
|
with gr.Blocks(css=css) as demo: |
|
gr.Markdown("# Semi-structured Retrieval Benchmark (STaRK) Leaderboard") |
|
gr.Markdown("Refer to the [STaRK paper](https://arxiv.org/pdf/2404.13207) for details on metrics, tasks and models.") |
|
|
|
with gr.Tabs() as outer_tabs: |
|
with gr.TabItem("Synthesized"): |
|
gr.Markdown("## Synthesized Data Results") |
|
with gr.Tabs() as inner_tabs_synthesized: |
|
with gr.TabItem("Amazon"): |
|
gr.DataFrame(format_dataframe(df, "STARK-AMAZON")) |
|
with gr.TabItem("MAG"): |
|
gr.DataFrame(format_dataframe(df, "STARK-MAG")) |
|
with gr.TabItem("Prime"): |
|
gr.DataFrame(format_dataframe(df, "STARK-PRIME")) |
|
|
|
with gr.TabItem("Human-Generated"): |
|
gr.Markdown("## Human-Generated Data Results") |
|
with gr.Tabs() as inner_tabs_human: |
|
with gr.TabItem("Amazon"): |
|
gr.DataFrame(format_dataframe(df, "STARK-AMAZON")) |
|
with gr.TabItem("MAG"): |
|
gr.DataFrame(format_dataframe(df, "STARK-MAG")) |
|
with gr.TabItem("Prime"): |
|
gr.DataFrame(format_dataframe(df, "STARK-PRIME")) |
|
|
|
demo.launch() |