Spaces:

SaylorTwift
/

OpenEvalsDetails

Running

App Files Files Community

Linker1907 commited on 24 days ago

Commit

e4b8ab6

1 Parent(s): c283445

init

Browse files

Files changed (1) hide show

app.py +59 -13

app.py CHANGED Viewed

@@ -66,7 +66,7 @@ def get_available_splits(repo, benchmark):
 def load_details_and_results(repo, subset, split):
     def worker(example):
         example["predictions"] = example["predictions"]
-        example["gold"] = example["gold"][0]
         example["metrics"] = example["metrics"]
         return example
@@ -198,16 +198,32 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         )
     with gr.Row():
-        benchmark = gr.Dropdown(
             label="Benchmark",
             choices=[],
-            info="Select the benchmark subset"
         )
         split = gr.Dropdown(
             label="Split",
             choices=[],
             info="Select evaluation."
         )
     with gr.Row():
         example_index = gr.Number(
@@ -229,6 +245,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     def get_active_repo(selection_method, dropdown_value, custom_value):
         return custom_value if selection_method == "Custom" else dropdown_value
     # Update the event handlers
     repo_select.change(
         fn=toggle_repo_input,
@@ -236,6 +263,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         outputs=[repo_dropdown, repo_custom]
     )
     # Update the repository change handler to update available benchmarks
     def update_benchmarks(selection_method, dropdown_value, custom_value):
         repo = get_active_repo(selection_method, dropdown_value, custom_value)
@@ -246,35 +279,48 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     repo_dropdown.change(
         fn=update_benchmarks,
         inputs=[repo_select, repo_dropdown, repo_custom],
-        outputs=benchmark
     )
     repo_custom.change(
         fn=update_benchmarks,
         inputs=[repo_select, repo_dropdown, repo_custom],
-        outputs=benchmark
     )
     # Update the benchmark change handler
-    benchmark.change(
-        fn=lambda selection_method, dropdown, custom, bench: update_splits(
             get_active_repo(selection_method, dropdown, custom),
-            bench
         ),
-        inputs=[repo_select, repo_dropdown, repo_custom, benchmark],
         outputs=split
     )
     # Display results
     output = gr.HTML()
     submit_btn.click(
-        fn=lambda selection_method, dropdown, custom, bench, split_val, idx: display_model_details(
-            get_active_repo(selection_method, dropdown, custom),
-            bench,
             split_val,
             idx
         ),
-        inputs=[repo_select, repo_dropdown, repo_custom, benchmark, split, example_index],
         outputs=output
     )

 def load_details_and_results(repo, subset, split):
     def worker(example):
         example["predictions"] = example["predictions"]
+        example["gold"] = example["gold"]
         example["metrics"] = example["metrics"]
         return example
         )
     with gr.Row():
+        benchmark_select = gr.Radio(
+            choices=["Choose from list", "Custom"],
+            label="Benchmark Selection Method",
+            value="Choose from list",
+            info="Select how you want to specify the benchmark"
+        )
+    with gr.Row():
+        benchmark_dropdown = gr.Dropdown(
             label="Benchmark",
             choices=[],
+            info="Select the benchmark subset",
+            visible=True
+        )
+        benchmark_custom = gr.Textbox(
+            label="Custom Benchmark",
+            placeholder="e.g. lighteval|gpqa:diamond|0",
+            visible=False,
+            info="Enter custom benchmark name"
         )
         split = gr.Dropdown(
             label="Split",
             choices=[],
             info="Select evaluation."
         )
+        load_splits_btn = gr.Button("Load Splits", variant="secondary")
     with gr.Row():
         example_index = gr.Number(
     def get_active_repo(selection_method, dropdown_value, custom_value):
         return custom_value if selection_method == "Custom" else dropdown_value
+    # Add this function to handle benchmark visibility toggling
+    def toggle_benchmark_input(choice):
+        return {
+            benchmark_dropdown: gr.update(visible=(choice == "Choose from list")),
+            benchmark_custom: gr.update(visible=(choice == "Custom"))
+        }
+    # Add this function to get the active benchmark name
+    def get_active_benchmark(selection_method, dropdown_value, custom_value):
+        return custom_value if selection_method == "Custom" else dropdown_value
     # Update the event handlers
     repo_select.change(
         fn=toggle_repo_input,
         outputs=[repo_dropdown, repo_custom]
     )
+    benchmark_select.change(
+        fn=toggle_benchmark_input,
+        inputs=[benchmark_select],
+        outputs=[benchmark_dropdown, benchmark_custom]
+    )
     # Update the repository change handler to update available benchmarks
     def update_benchmarks(selection_method, dropdown_value, custom_value):
         repo = get_active_repo(selection_method, dropdown_value, custom_value)
     repo_dropdown.change(
         fn=update_benchmarks,
         inputs=[repo_select, repo_dropdown, repo_custom],
+        outputs=benchmark_dropdown
     )
     repo_custom.change(
         fn=update_benchmarks,
         inputs=[repo_select, repo_dropdown, repo_custom],
+        outputs=benchmark_dropdown
     )
     # Update the benchmark change handler
+    benchmark_dropdown.change(
+        fn=lambda selection_method, dropdown, custom, bench: gr.Dropdown(choices=[], value=None),
+        inputs=[repo_select, repo_dropdown, repo_custom, benchmark_dropdown],
+        outputs=split
+    )
+    benchmark_custom.change(
+        fn=lambda selection_method, dropdown, custom, bench: gr.Dropdown(choices=[], value=None),
+        inputs=[repo_select, repo_dropdown, repo_custom, benchmark_custom],
+        outputs=split
+    )
+    # Add handler for the load splits button
+    load_splits_btn.click(
+        fn=lambda selection_method, dropdown, custom, bench_selection_method, bench_dropdown, bench_custom: update_splits(
             get_active_repo(selection_method, dropdown, custom),
+            get_active_benchmark(bench_selection_method, bench_dropdown, bench_custom)
         ),
+        inputs=[repo_select, repo_dropdown, repo_custom, benchmark_select, benchmark_dropdown, benchmark_custom],
         outputs=split
     )
     # Display results
     output = gr.HTML()
     submit_btn.click(
+        fn=lambda repo_selection_method, repo_dropdown, repo_custom, bench_selection_method, bench_dropdown, bench_custom, split_val, idx: display_model_details(
+            get_active_repo(repo_selection_method, repo_dropdown, repo_custom),
+            get_active_benchmark(bench_selection_method, bench_dropdown, bench_custom),
             split_val,
             idx
         ),
+        inputs=[repo_select, repo_dropdown, repo_custom, benchmark_select, benchmark_dropdown, benchmark_custom, split, example_index],
         outputs=output
     )