File size: 3,508 Bytes
ad9f5e1
 
1aa50db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
031d82b
 
 
 
1aa50db
031d82b
 
1aa50db
031d82b
 
1aa50db
031d82b
1aa50db
3afe0be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
031d82b
 
3afe0be
031d82b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3afe0be
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import pandas as pd
import numpy as np

# Sample data based on your table
data = {
    'Method': ['BM25', 'DPR (roberta)', 'ANCE (roberta)', 'QAGNN (roberta)', 'ada-002', 'voyage-l2-instruct', 'LLM2Vec', 'GritLM-7b', 'multi-ada-002', 'ColBERTv2', 'Claude3 Reranker', 'GPT4 Reranker'],
    'STARK-AMAZON_Hit@1': [27.16, 16.05, 25.93, 22.22, 39.50, 35.80, 29.63, 40.74, 46.91, 33.33, 53.09, 50.62],
    'STARK-AMAZON_Hit@5': [51.85, 39.51, 54.32, 49.38, 64.19, 62.96, 46.91, 71.60, 72.84, 55.56, 74.07, 75.31],
    'STARK-AMAZON_R@20': [29.23, 15.23, 23.69, 21.54, 35.46, 33.01, 21.21, 36.30, 40.22, 29.03, 35.46, 35.46],
    'STARK-AMAZON_MRR': [18.79, 27.21, 37.12, 31.33, 52.65, 47.84, 38.61, 53.21, 58.74, 43.77, 62.11, 61.06],
    'STARK-MAG_Hit@1': [32.14, 4.72, 25.00, 20.24, 28.57, 22.62, 16.67, 34.52, 23.81, 33.33, 38.10, 36.90],
    'STARK-MAG_Hit@5': [41.67, 9.52, 30.95, 26.19, 41.67, 36.90, 28.57, 44.04, 41.67, 36.90, 45.24, 46.43],
    'STARK-MAG_R@20': [32.46, 25.00, 27.24, 28.76, 35.95, 32.44, 21.74, 34.57, 39.85, 30.50, 35.95, 35.95],
    'STARK-MAG_MRR': [37.42, 7.90, 27.98, 25.53, 35.81, 29.68, 21.59, 38.72, 31.43, 35.97, 42.00, 40.65],
    'STARK-PRIME_Hit@1': [22.45, 2.04, 7.14, 6.12, 17.35, 16.33, 9.18, 25.51, 24.49, 15.31, 28.57, 28.57],
    'STARK-PRIME_Hit@5': [41.84, 9.18, 13.27, 13.27, 34.69, 32.65, 21.43, 41.84, 39.80, 26.53, 46.94, 44.90],
    'STARK-PRIME_R@20': [42.32, 10.69, 11.72, 17.62, 41.09, 39.01, 26.77, 48.10, 47.21, 25.56, 41.61, 41.61],
    'STARK-PRIME_MRR': [30.37, 7.05, 10.07, 9.39, 26.35, 24.33, 15.24, 34.28, 32.98, 19.67, 36.32, 34.82]
}

df = pd.DataFrame(data)

def format_dataframe(df, dataset):
    # Filter the dataframe for the selected dataset
    columns = ['Method'] + [col for col in df.columns if dataset in col]
    filtered_df = df[columns].copy()
    
    # Rename columns
    filtered_df.columns = [col.split('_')[-1] if '_' in col else col for col in filtered_df.columns]
    
    # Sort by MRR
    filtered_df = filtered_df.sort_values('MRR', ascending=False)
    
    return filtered_df

css = """
table > thead {
    white-space: normal
}

table {
    --cell-width-1: 250px
}

table > tbody > tr > td:nth-child(2) > div {
    overflow-x: auto
}
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown("# Semi-structured Retrieval Benchmark (STaRK) Leaderboard")
    gr.Markdown("Refer to the [STaRK paper](https://arxiv.org/pdf/2404.13207) for details on metrics, tasks and models.")
    
    with gr.Tabs() as outer_tabs:
        with gr.TabItem("Synthesized"):
            gr.Markdown("## Synthesized Data Results")
            with gr.Tabs() as inner_tabs_synthesized:
                with gr.TabItem("Amazon"):
                    gr.DataFrame(format_dataframe(df, "STARK-AMAZON"))
                with gr.TabItem("MAG"):
                    gr.DataFrame(format_dataframe(df, "STARK-MAG"))
                with gr.TabItem("Prime"):
                    gr.DataFrame(format_dataframe(df, "STARK-PRIME"))
        
        with gr.TabItem("Human-Generated"):
            gr.Markdown("## Human-Generated Data Results")
            with gr.Tabs() as inner_tabs_human:
                with gr.TabItem("Amazon"):
                    gr.DataFrame(format_dataframe(df, "STARK-AMAZON"))
                with gr.TabItem("MAG"):
                    gr.DataFrame(format_dataframe(df, "STARK-MAG"))
                with gr.TabItem("Prime"):
                    gr.DataFrame(format_dataframe(df, "STARK-PRIME"))

demo.launch()