Shiyu Zhao commited on
Commit
031d82b
·
1 Parent(s): 5c7b480

Update space

Browse files
Files changed (1) hide show
  1. app.py +31 -31
app.py CHANGED
@@ -21,30 +21,18 @@ data = {
21
 
22
  df = pd.DataFrame(data)
23
 
24
- def format_dataframe(df):
25
- # Melt the dataframe to create 'Dataset', 'Metric', and 'Value' columns
26
- melted_df = df.melt(id_vars=['Method'], var_name='Dataset_Metric', value_name='Value')
27
- melted_df[['Dataset', 'Metric']] = melted_df['Dataset_Metric'].str.split('_', expand=True)
28
- melted_df = melted_df.drop('Dataset_Metric', axis=1)
29
 
30
- # Pivot the table to get the desired format
31
- pivoted_df = melted_df.pivot_table(values='Value', index=['Method', 'Dataset'], columns='Metric', aggfunc='first').reset_index()
32
- pivoted_df = pivoted_df.sort_values(['Dataset', 'MRR'], ascending=[True, False])
33
 
34
- # Reorder columns
35
- column_order = ['Method', 'Dataset', 'Hit@1', 'Hit@5', 'R@20', 'MRR']
36
- pivoted_df = pivoted_df.reindex(columns=column_order)
37
 
38
- return pivoted_df
39
-
40
- def create_leaderboard(dataset):
41
- formatted_df = format_dataframe(df)
42
- if dataset != 'All':
43
- formatted_df = formatted_df[formatted_df['Dataset'] == dataset]
44
- formatted_df = formatted_df.drop('Dataset', axis=1)
45
- return formatted_df
46
-
47
- datasets = ['All'] + sorted(df.columns[1:].str.split('_').str[0].unique().tolist())
48
 
49
  css = """
50
  table > thead {
@@ -61,16 +49,28 @@ table > tbody > tr > td:nth-child(2) > div {
61
  """
62
 
63
  with gr.Blocks(css=css) as demo:
64
- gr.Markdown("# STARK-Human(-Generated) Leaderboard")
65
- gr.Markdown("Testing results on STARK-Human(-Generated).")
66
-
67
- with gr.Row():
68
- dataset_dropdown = gr.Dropdown(choices=datasets, value='All', label="Select Dataset")
69
-
70
- leaderboard = gr.DataFrame(interactive=False)
71
-
72
- dataset_dropdown.change(create_leaderboard, inputs=dataset_dropdown, outputs=leaderboard)
73
 
74
- demo.load(create_leaderboard, inputs=dataset_dropdown, outputs=leaderboard)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  demo.launch()
 
21
 
22
  df = pd.DataFrame(data)
23
 
24
+ def format_dataframe(df, dataset):
25
+ # Filter the dataframe for the selected dataset
26
+ columns = ['Method'] + [col for col in df.columns if dataset in col]
27
+ filtered_df = df[columns].copy()
 
28
 
29
+ # Rename columns
30
+ filtered_df.columns = [col.split('_')[-1] if '_' in col else col for col in filtered_df.columns]
 
31
 
32
+ # Sort by MRR
33
+ filtered_df = filtered_df.sort_values('MRR', ascending=False)
 
34
 
35
+ return filtered_df
 
 
 
 
 
 
 
 
 
36
 
37
  css = """
38
  table > thead {
 
49
  """
50
 
51
  with gr.Blocks(css=css) as demo:
52
+ gr.Markdown("# Semi-structured Retrieval Benchmark (STaRK) Leaderboard")
53
+ gr.Markdown("Refer to the [STaRK paper](https://arxiv.org/pdf/2404.13207) for details on metrics, tasks and models.")
 
 
 
 
 
 
 
54
 
55
+ with gr.Tabs() as outer_tabs:
56
+ with gr.TabItem("Synthesized"):
57
+ gr.Markdown("## Synthesized Data Results")
58
+ with gr.Tabs() as inner_tabs_synthesized:
59
+ with gr.TabItem("Amazon"):
60
+ gr.DataFrame(format_dataframe(df, "STARK-AMAZON"))
61
+ with gr.TabItem("MAG"):
62
+ gr.DataFrame(format_dataframe(df, "STARK-MAG"))
63
+ with gr.TabItem("Prime"):
64
+ gr.DataFrame(format_dataframe(df, "STARK-PRIME"))
65
+
66
+ with gr.TabItem("Human-Generated"):
67
+ gr.Markdown("## Human-Generated Data Results")
68
+ with gr.Tabs() as inner_tabs_human:
69
+ with gr.TabItem("Amazon"):
70
+ gr.DataFrame(format_dataframe(df, "STARK-AMAZON"))
71
+ with gr.TabItem("MAG"):
72
+ gr.DataFrame(format_dataframe(df, "STARK-MAG"))
73
+ with gr.TabItem("Prime"):
74
+ gr.DataFrame(format_dataframe(df, "STARK-PRIME"))
75
 
76
  demo.launch()