Linker1907 commited on
Commit
e4b8ab6
·
1 Parent(s): c283445
Files changed (1) hide show
  1. app.py +59 -13
app.py CHANGED
@@ -66,7 +66,7 @@ def get_available_splits(repo, benchmark):
66
  def load_details_and_results(repo, subset, split):
67
  def worker(example):
68
  example["predictions"] = example["predictions"]
69
- example["gold"] = example["gold"][0]
70
  example["metrics"] = example["metrics"]
71
  return example
72
 
@@ -198,16 +198,32 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
198
  )
199
 
200
  with gr.Row():
201
- benchmark = gr.Dropdown(
 
 
 
 
 
 
 
 
202
  label="Benchmark",
203
  choices=[],
204
- info="Select the benchmark subset"
 
 
 
 
 
 
 
205
  )
206
  split = gr.Dropdown(
207
  label="Split",
208
  choices=[],
209
  info="Select evaluation."
210
  )
 
211
 
212
  with gr.Row():
213
  example_index = gr.Number(
@@ -229,6 +245,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
229
  def get_active_repo(selection_method, dropdown_value, custom_value):
230
  return custom_value if selection_method == "Custom" else dropdown_value
231
 
 
 
 
 
 
 
 
 
 
 
 
232
  # Update the event handlers
233
  repo_select.change(
234
  fn=toggle_repo_input,
@@ -236,6 +263,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
236
  outputs=[repo_dropdown, repo_custom]
237
  )
238
 
 
 
 
 
 
 
239
  # Update the repository change handler to update available benchmarks
240
  def update_benchmarks(selection_method, dropdown_value, custom_value):
241
  repo = get_active_repo(selection_method, dropdown_value, custom_value)
@@ -246,35 +279,48 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
246
  repo_dropdown.change(
247
  fn=update_benchmarks,
248
  inputs=[repo_select, repo_dropdown, repo_custom],
249
- outputs=benchmark
250
  )
251
 
252
  repo_custom.change(
253
  fn=update_benchmarks,
254
  inputs=[repo_select, repo_dropdown, repo_custom],
255
- outputs=benchmark
256
  )
257
 
258
  # Update the benchmark change handler
259
- benchmark.change(
260
- fn=lambda selection_method, dropdown, custom, bench: update_splits(
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  get_active_repo(selection_method, dropdown, custom),
262
- bench
263
  ),
264
- inputs=[repo_select, repo_dropdown, repo_custom, benchmark],
265
  outputs=split
266
  )
267
 
268
  # Display results
269
  output = gr.HTML()
270
  submit_btn.click(
271
- fn=lambda selection_method, dropdown, custom, bench, split_val, idx: display_model_details(
272
- get_active_repo(selection_method, dropdown, custom),
273
- bench,
274
  split_val,
275
  idx
276
  ),
277
- inputs=[repo_select, repo_dropdown, repo_custom, benchmark, split, example_index],
278
  outputs=output
279
  )
280
 
 
66
  def load_details_and_results(repo, subset, split):
67
  def worker(example):
68
  example["predictions"] = example["predictions"]
69
+ example["gold"] = example["gold"]
70
  example["metrics"] = example["metrics"]
71
  return example
72
 
 
198
  )
199
 
200
  with gr.Row():
201
+ benchmark_select = gr.Radio(
202
+ choices=["Choose from list", "Custom"],
203
+ label="Benchmark Selection Method",
204
+ value="Choose from list",
205
+ info="Select how you want to specify the benchmark"
206
+ )
207
+
208
+ with gr.Row():
209
+ benchmark_dropdown = gr.Dropdown(
210
  label="Benchmark",
211
  choices=[],
212
+ info="Select the benchmark subset",
213
+ visible=True
214
+ )
215
+ benchmark_custom = gr.Textbox(
216
+ label="Custom Benchmark",
217
+ placeholder="e.g. lighteval|gpqa:diamond|0",
218
+ visible=False,
219
+ info="Enter custom benchmark name"
220
  )
221
  split = gr.Dropdown(
222
  label="Split",
223
  choices=[],
224
  info="Select evaluation."
225
  )
226
+ load_splits_btn = gr.Button("Load Splits", variant="secondary")
227
 
228
  with gr.Row():
229
  example_index = gr.Number(
 
245
  def get_active_repo(selection_method, dropdown_value, custom_value):
246
  return custom_value if selection_method == "Custom" else dropdown_value
247
 
248
+ # Add this function to handle benchmark visibility toggling
249
+ def toggle_benchmark_input(choice):
250
+ return {
251
+ benchmark_dropdown: gr.update(visible=(choice == "Choose from list")),
252
+ benchmark_custom: gr.update(visible=(choice == "Custom"))
253
+ }
254
+
255
+ # Add this function to get the active benchmark name
256
+ def get_active_benchmark(selection_method, dropdown_value, custom_value):
257
+ return custom_value if selection_method == "Custom" else dropdown_value
258
+
259
  # Update the event handlers
260
  repo_select.change(
261
  fn=toggle_repo_input,
 
263
  outputs=[repo_dropdown, repo_custom]
264
  )
265
 
266
+ benchmark_select.change(
267
+ fn=toggle_benchmark_input,
268
+ inputs=[benchmark_select],
269
+ outputs=[benchmark_dropdown, benchmark_custom]
270
+ )
271
+
272
  # Update the repository change handler to update available benchmarks
273
  def update_benchmarks(selection_method, dropdown_value, custom_value):
274
  repo = get_active_repo(selection_method, dropdown_value, custom_value)
 
279
  repo_dropdown.change(
280
  fn=update_benchmarks,
281
  inputs=[repo_select, repo_dropdown, repo_custom],
282
+ outputs=benchmark_dropdown
283
  )
284
 
285
  repo_custom.change(
286
  fn=update_benchmarks,
287
  inputs=[repo_select, repo_dropdown, repo_custom],
288
+ outputs=benchmark_dropdown
289
  )
290
 
291
  # Update the benchmark change handler
292
+ benchmark_dropdown.change(
293
+ fn=lambda selection_method, dropdown, custom, bench: gr.Dropdown(choices=[], value=None),
294
+ inputs=[repo_select, repo_dropdown, repo_custom, benchmark_dropdown],
295
+ outputs=split
296
+ )
297
+
298
+ benchmark_custom.change(
299
+ fn=lambda selection_method, dropdown, custom, bench: gr.Dropdown(choices=[], value=None),
300
+ inputs=[repo_select, repo_dropdown, repo_custom, benchmark_custom],
301
+ outputs=split
302
+ )
303
+
304
+ # Add handler for the load splits button
305
+ load_splits_btn.click(
306
+ fn=lambda selection_method, dropdown, custom, bench_selection_method, bench_dropdown, bench_custom: update_splits(
307
  get_active_repo(selection_method, dropdown, custom),
308
+ get_active_benchmark(bench_selection_method, bench_dropdown, bench_custom)
309
  ),
310
+ inputs=[repo_select, repo_dropdown, repo_custom, benchmark_select, benchmark_dropdown, benchmark_custom],
311
  outputs=split
312
  )
313
 
314
  # Display results
315
  output = gr.HTML()
316
  submit_btn.click(
317
+ fn=lambda repo_selection_method, repo_dropdown, repo_custom, bench_selection_method, bench_dropdown, bench_custom, split_val, idx: display_model_details(
318
+ get_active_repo(repo_selection_method, repo_dropdown, repo_custom),
319
+ get_active_benchmark(bench_selection_method, bench_dropdown, bench_custom),
320
  split_val,
321
  idx
322
  ),
323
+ inputs=[repo_select, repo_dropdown, repo_custom, benchmark_select, benchmark_dropdown, benchmark_custom, split, example_index],
324
  outputs=output
325
  )
326