AdnanElAssadi commited on
Commit
9069a07
Β·
verified Β·
1 Parent(s): f39c123

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -352
app.py CHANGED
@@ -286,369 +286,166 @@ def create_reranking_interface(task_data):
286
  inputs=[],
287
  outputs=document_containers
288
  )
289
-
290
- with gr.Row(equal_height=True):
291
- prev_btn = gr.Button("← Previous Query", size="lg", variant="secondary")
292
- submit_btn = gr.Button("πŸ’Ύ Submit Rankings", size="lg", variant="primary")
293
- next_btn = gr.Button("Next Query β†’", size="lg", variant="secondary")
294
-
295
- with gr.Row():
296
- save_btn = gr.Button("πŸ’Ύ Save All Results", variant="primary", size="lg")
297
- # Add a clear button to reset rankings
298
- clear_btn = gr.Button("πŸ”„ Reset Order", variant="secondary", size="lg")
299
-
300
- # Initialize the document list for the first sample
301
- status_val, progress_val = initialize_document_list(samples[0]["id"])
302
- status_box.value = status_val
303
- progress_text.value = progress_val
304
-
305
- # Navigation functions
306
- def nav_to_prev(current_id):
307
- current_sample = next((s for s in samples if s["id"] == current_id), None)
308
- if not current_sample:
 
 
 
 
 
 
309
  return current_id
310
 
311
- current_idx = samples.index(current_sample)
312
- if current_idx > 0:
313
- prev_sample = samples[current_idx - 1]
314
- return prev_sample["id"]
315
- return current_id
316
-
317
- def nav_to_next(current_id):
318
- current_sample = next((s for s in samples if s["id"] == current_id), None)
319
- if not current_sample:
320
  return current_id
321
 
322
- current_idx = samples.index(current_sample)
323
- if current_idx < len(samples) - 1:
324
- next_sample = samples[current_idx + 1]
325
- return next_sample["id"]
326
- return current_id
327
-
328
- # Connect navigation buttons
329
- prev_btn.click(
330
- nav_to_prev,
331
- inputs=[current_sample_id],
332
- outputs=[current_sample_id]
333
- ).then(
334
- lambda id: initialize_document_list(id),
335
- inputs=[current_sample_id],
336
- outputs=[status_box, progress_text]
337
- ).then(
338
- lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
339
- inputs=[current_sample_id],
340
- outputs=[query_text]
341
- )
342
-
343
- next_btn.click(
344
- nav_to_next,
345
- inputs=[current_sample_id],
346
- outputs=[current_sample_id]
347
- ).then(
348
- lambda id: initialize_document_list(id),
349
- inputs=[current_sample_id],
350
- outputs=[status_box, progress_text]
351
- ).then(
352
- lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
353
- inputs=[current_sample_id],
354
- outputs=[query_text]
355
- )
356
-
357
- # Submit button
358
- submit_btn.click(
359
- save_ranking,
360
- inputs=[current_sample_id],
361
- outputs=[status_box, progress_text]
362
- )
363
-
364
- # Add function to reset the current ranking order
365
- def reset_order():
366
- nonlocal current_order
367
- sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
368
- if not sample:
369
- return [gr.update() for _ in document_containers]
370
-
371
- docs = sample["candidates"]
372
- # Reset to original order
373
- current_order = list(range(len(docs)))
374
-
375
- # Update UI
376
- updates = []
377
- for i in range(len(document_containers)):
378
- if i < len(docs):
379
- doc_idx = current_order[i]
380
- rank_class = f"rank-{i+1}"
381
- updates.append(gr.update(
382
- value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}",
383
- visible=True,
384
- elem_classes=["document-text", rank_class]
385
- ))
386
- else:
387
- updates.append(gr.update(value="", visible=False))
 
 
 
 
 
 
 
 
 
 
 
388
 
389
- return updates
390
-
391
- # Connect reset button
392
- clear_btn.click(
393
- reset_order,
394
- inputs=[],
395
- outputs=document_containers
396
- )
397
-
398
- # Save all results
399
- def save_results():
400
- """Save all collected results to a file."""
401
- output_path = f"{task_data['task_name']}_human_results.json"
402
- with open(output_path, "w") as f:
403
- json.dump(results, f, indent=2)
404
- return f"βœ… Results saved to {output_path} ({len(results['annotations'])} annotations)"
405
-
406
- save_btn.click(save_results, outputs=[status_box])
407
 
408
  return demo
409
 
410
- # Main app with file upload capability
411
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  gr.Markdown("# MTEB Human Evaluation Demo")
413
 
414
- with gr.Tabs():
415
- with gr.TabItem("Demo"):
416
- gr.Markdown("""
417
- ## MTEB Human Evaluation Interface
418
-
419
- This interface allows you to evaluate the relevance of documents for reranking tasks.
420
- """)
421
-
422
- # Function to get the most recent task file
423
- def get_latest_task_file():
424
- # Check first in uploaded_tasks directory
425
- os.makedirs("uploaded_tasks", exist_ok=True)
426
- uploaded_tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
427
-
428
- if uploaded_tasks:
429
- # Sort by modification time, newest first
430
- uploaded_tasks.sort(key=lambda x: os.path.getmtime(os.path.join("uploaded_tasks", x)), reverse=True)
431
- return os.path.join("uploaded_tasks", uploaded_tasks[0])
432
-
433
- # Fall back to default example
434
- return "AskUbuntuDupQuestions_human_eval.json"
435
-
436
- # Load the task file
437
- task_file = get_latest_task_file()
438
-
439
- try:
440
- with open(task_file, "r") as f:
441
- task_data = json.load(f)
442
-
443
- # Show which task is currently loaded
444
- gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
445
-
446
- # Display the interface
447
- reranking_demo = create_reranking_interface(task_data)
448
- except Exception as e:
449
- gr.Markdown(f"**Error loading task: {str(e)}**")
450
- gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
451
 
452
- with gr.TabItem("Upload & Evaluate"):
453
- gr.Markdown("""
454
- ## Upload Your Own Task File
455
-
456
- If you have a prepared task file, you can upload it here to create an evaluation interface.
457
- """)
458
-
459
- with gr.Row():
460
- with gr.Column(scale=1):
461
- file_input = gr.File(label="Upload a task file (JSON)")
462
- load_btn = gr.Button("Load Task")
463
- upload_message = gr.Textbox(label="Status", interactive=False, value="")
464
-
465
- # Add task list for previously uploaded tasks
466
- gr.Markdown("### Previous Uploads")
467
-
468
- # Function to list existing task files in the tasks directory
469
- def list_task_files():
470
- os.makedirs("uploaded_tasks", exist_ok=True)
471
- tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
472
- if not tasks:
473
- return "No task files uploaded yet."
474
- return "\n".join([f"- {t}" for t in tasks])
475
-
476
- task_list = gr.Markdown(list_task_files())
477
- refresh_btn = gr.Button("Refresh List")
478
-
479
- # Add results management section
480
- gr.Markdown("### Results Management")
481
-
482
- # Function to list existing result files
483
- def list_result_files():
484
- results = [f for f in os.listdir(".") if f.endswith("_human_results.json")]
485
- if not results:
486
- return "No result files available yet."
487
-
488
- result_links = []
489
- for r in results:
490
- # Calculate completion stats
491
- try:
492
- with open(r, "r") as f:
493
- result_data = json.load(f)
494
- annotation_count = len(result_data.get("annotations", []))
495
- task_name = result_data.get("task_name", "Unknown")
496
- result_links.append(f"- {r} ({annotation_count} annotations for {task_name})")
497
- except:
498
- result_links.append(f"- {r}")
499
-
500
- return "\n".join(result_links)
501
-
502
- results_list = gr.Markdown(list_result_files())
503
- download_results_btn = gr.Button("Download Results")
504
-
505
- # Right side - will contain the actual interface
506
- with gr.Column(scale=2):
507
- task_container = gr.HTML()
508
-
509
- # Handle file upload and storage
510
- def handle_upload(file):
511
- if not file:
512
- return "Please upload a task file", task_list.value, task_container.value
513
-
514
- try:
515
- # Create directory if it doesn't exist
516
- os.makedirs("uploaded_tasks", exist_ok=True)
517
-
518
- # Read the uploaded file
519
- with open(file.name, "r") as f:
520
- task_data = json.load(f)
521
-
522
- # Validate task format
523
- if "task_name" not in task_data or "samples" not in task_data:
524
- return "Invalid task file format. Must contain 'task_name' and 'samples' fields.", task_list.value, task_container.value
525
-
526
- # Save to a consistent location
527
- task_filename = f"uploaded_tasks/{task_data['task_name']}_task.json"
528
- with open(task_filename, "w") as f:
529
- json.dump(task_data, f, indent=2)
530
-
531
- # Instead of trying to create the interface here,
532
- # we'll return a message with instructions
533
- return f"Task '{task_data['task_name']}' uploaded successfully with {len(task_data['samples'])} samples. Please refresh the app and use the Demo tab to evaluate it.", list_task_files(), f"""
534
- <div style="padding: 20px; background-color: #f0f0f0; border-radius: 10px;">
535
- <h3>Task uploaded successfully!</h3>
536
- <p>Task Name: {task_data['task_name']}</p>
537
- <p>Samples: {len(task_data['samples'])}</p>
538
- <p>To evaluate this task:</p>
539
- <ol>
540
- <li>Refresh the app</li>
541
- <li>The Demo tab will now use your uploaded task</li>
542
- <li>Complete your evaluations</li>
543
- <li>Results will be saved as {task_data['task_name']}_human_results.json</li>
544
- </ol>
545
- </div>
546
- """
547
- except Exception as e:
548
- return f"Error processing task file: {str(e)}", task_list.value, task_container.value
549
-
550
- # Function to prepare results for download
551
- def prepare_results_for_download():
552
- results = [f for f in os.listdir(".") if f.endswith("_human_results.json")]
553
- if not results:
554
- return None
555
-
556
- # Create a zip file with all results
557
- import zipfile
558
- zip_path = "mteb_human_eval_results.zip"
559
- with zipfile.ZipFile(zip_path, 'w') as zipf:
560
- for r in results:
561
- zipf.write(r)
562
-
563
- return zip_path
564
-
565
- # Connect events
566
- load_btn.click(handle_upload, inputs=[file_input], outputs=[upload_message, task_list, task_container])
567
- refresh_btn.click(list_task_files, outputs=[task_list])
568
- download_results_btn.click(prepare_results_for_download, outputs=[gr.File(label="Download Results")])
569
 
570
- with gr.TabItem("Results Management"):
571
- gr.Markdown("""
572
- ## Manage Evaluation Results
573
-
574
- View, download, and analyze your evaluation results.
575
- """)
576
-
577
- # Function to load and display result stats
578
- def get_result_stats():
579
- results = [f for f in os.listdir(".") if f.endswith("_human_results.json")]
580
- if not results:
581
- return "No result files available yet."
582
-
583
- stats = []
584
- for r in results:
585
- try:
586
- with open(r, "r") as f:
587
- result_data = json.load(f)
588
-
589
- task_name = result_data.get("task_name", "Unknown")
590
- annotations = result_data.get("annotations", [])
591
- annotation_count = len(annotations)
592
-
593
- # Calculate completion percentage
594
- sample_ids = set(a.get("sample_id") for a in annotations)
595
-
596
- # Try to get the total sample count from the corresponding task file
597
- total_samples = 0
598
- task_file = f"uploaded_tasks/{task_name}_task.json"
599
- if os.path.exists(task_file):
600
- with open(task_file, "r") as f:
601
- task_data = json.load(f)
602
- total_samples = len(task_data.get("samples", []))
603
-
604
- completion = f"{len(sample_ids)}/{total_samples}" if total_samples else f"{len(sample_ids)} samples"
605
-
606
- stats.append(f"### {task_name}\n- Annotations: {annotation_count}\n- Completion: {completion}\n- File: {r}")
607
- except Exception as e:
608
- stats.append(f"### {r}\n- Error loading results: {str(e)}")
609
-
610
- return "\n\n".join(stats)
611
-
612
- result_stats = gr.Markdown(get_result_stats())
613
- refresh_results_btn = gr.Button("Refresh Results")
614
-
615
- # Add download options
616
- with gr.Row():
617
- download_all_btn = gr.Button("Download All Results (ZIP)")
618
- result_select = gr.Dropdown(choices=[f for f in os.listdir(".") if f.endswith("_human_results.json")], label="Select Result to Download")
619
- download_selected_btn = gr.Button("Download Selected")
620
-
621
- # Add results visualization placeholder
622
- gr.Markdown("### Results Visualization")
623
- gr.Markdown("*Visualization features will be added in a future update.*")
624
-
625
- # Connect events
626
- refresh_results_btn.click(get_result_stats, outputs=[result_stats])
627
-
628
- # Function to prepare all results for download as ZIP
629
- def prepare_all_results():
630
- import zipfile
631
- zip_path = "mteb_human_eval_results.zip"
632
- with zipfile.ZipFile(zip_path, 'w') as zipf:
633
- for r in [f for f in os.listdir(".") if f.endswith("_human_results.json")]:
634
- zipf.write(r)
635
- return zip_path
636
-
637
- # Function to return a single result file
638
- def get_selected_result(filename):
639
- if not filename:
640
- return None
641
- if os.path.exists(filename):
642
- return filename
643
- return None
644
-
645
- # Update dropdown when refreshing results
646
- def update_result_dropdown():
647
- return gr.Dropdown.update(choices=[f for f in os.listdir(".") if f.endswith("_human_results.json")])
648
-
649
- refresh_results_btn.click(update_result_dropdown, outputs=[result_select])
650
- download_all_btn.click(prepare_all_results, outputs=[gr.File(label="Download All Results")])
651
- download_selected_btn.click(get_selected_result, inputs=[result_select], outputs=[gr.File(label="Download Selected Result")])
652
 
653
  if __name__ == "__main__":
654
- demo.launch()
 
286
  inputs=[],
287
  outputs=document_containers
288
  )
289
+
290
+ with gr.Row(equal_height=True):
291
+ prev_btn = gr.Button("← Previous Query", size="lg", variant="secondary")
292
+ submit_btn = gr.Button("πŸ’Ύ Submit Rankings", size="lg", variant="primary")
293
+ next_btn = gr.Button("Next Query β†’", size="lg", variant="secondary")
294
+
295
+ with gr.Row():
296
+ save_btn = gr.Button("πŸ’Ύ Save All Results", variant="primary", size="lg")
297
+ # Add a clear button to reset rankings
298
+ clear_btn = gr.Button("πŸ”„ Reset Order", variant="secondary", size="lg")
299
+
300
+ # Initialize the document list for the first sample
301
+ status_val, progress_val = initialize_document_list(samples[0]["id"])
302
+ status_box.value = status_val
303
+ progress_text.value = progress_val
304
+
305
+ # Navigation functions
306
+ def nav_to_prev(current_id):
307
+ current_sample = next((s for s in samples if s["id"] == current_id), None)
308
+ if not current_sample:
309
+ return current_id
310
+
311
+ current_idx = samples.index(current_sample)
312
+ if current_idx > 0:
313
+ prev_sample = samples[current_idx - 1]
314
+ return prev_sample["id"]
315
  return current_id
316
 
317
+ def nav_to_next(current_id):
318
+ current_sample = next((s for s in samples if s["id"] == current_id), None)
319
+ if not current_sample:
320
+ return current_id
321
+
322
+ current_idx = samples.index(current_sample)
323
+ if current_idx < len(samples) - 1:
324
+ next_sample = samples[current_idx + 1]
325
+ return next_sample["id"]
326
  return current_id
327
 
328
+ # Connect navigation buttons
329
+ prev_btn.click(
330
+ nav_to_prev,
331
+ inputs=[current_sample_id],
332
+ outputs=[current_sample_id]
333
+ ).then(
334
+ lambda id: initialize_document_list(id),
335
+ inputs=[current_sample_id],
336
+ outputs=[status_box, progress_text]
337
+ ).then(
338
+ lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
339
+ inputs=[current_sample_id],
340
+ outputs=[query_text]
341
+ )
342
+
343
+ next_btn.click(
344
+ nav_to_next,
345
+ inputs=[current_sample_id],
346
+ outputs=[current_sample_id]
347
+ ).then(
348
+ lambda id: initialize_document_list(id),
349
+ inputs=[current_sample_id],
350
+ outputs=[status_box, progress_text]
351
+ ).then(
352
+ lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
353
+ inputs=[current_sample_id],
354
+ outputs=[query_text]
355
+ )
356
+
357
+ # Submit button
358
+ submit_btn.click(
359
+ save_ranking,
360
+ inputs=[current_sample_id],
361
+ outputs=[status_box, progress_text]
362
+ )
363
+
364
+ # Add function to reset the current ranking order
365
+ def reset_order():
366
+ nonlocal current_order
367
+ sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
368
+ if not sample:
369
+ return [gr.update() for _ in document_containers]
370
+
371
+ docs = sample["candidates"]
372
+ # Reset to original order
373
+ current_order = list(range(len(docs)))
374
+
375
+ # Update UI
376
+ updates = []
377
+ for i in range(len(document_containers)):
378
+ if i < len(docs):
379
+ doc_idx = current_order[i]
380
+ rank_class = f"rank-{i+1}"
381
+ updates.append(gr.update(
382
+ value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}",
383
+ visible=True,
384
+ elem_classes=["document-text", rank_class]
385
+ ))
386
+ else:
387
+ updates.append(gr.update(value="", visible=False))
388
+
389
+ return updates
390
+
391
+ # Connect reset button
392
+ clear_btn.click(
393
+ reset_order,
394
+ inputs=[],
395
+ outputs=document_containers
396
+ )
397
+
398
+ # Save all results
399
+ def save_results():
400
+ """Save all collected results to a file."""
401
+ output_path = f"{task_data['task_name']}_human_results.json"
402
+ with open(output_path, "w") as f:
403
+ json.dump(results, f, indent=2)
404
+ return f"βœ… Results saved to {output_path} ({len(results['annotations'])} annotations)"
405
 
406
+ save_btn.click(save_results, outputs=[status_box])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
 
408
  return demo
409
 
410
+ # Just use a simplified version with the main demo functionality
411
+ def get_task_file():
412
+ """Get the task file to use for the demo."""
413
+ # Try to load the default example
414
+ default_task = "AskUbuntuDupQuestions_human_eval.json"
415
+ if os.path.exists(default_task):
416
+ return default_task
417
+
418
+ # If default doesn't exist, look for any other task files
419
+ task_files = [f for f in os.listdir(".") if f.endswith("_human_eval.json")]
420
+ if task_files:
421
+ return task_files[0]
422
+
423
+ # Raise error if no task files found
424
+ raise FileNotFoundError("No task files found. Please ensure there's a *_human_eval.json file in the current directory.")
425
+
426
+ # Main app with simplified structure
427
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
428
  gr.Markdown("# MTEB Human Evaluation Demo")
429
 
430
+ try:
431
+ # Load the task file
432
+ task_file = get_task_file()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
 
434
+ with open(task_file, "r") as f:
435
+ task_data = json.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436
 
437
+ # Show which task is currently loaded
438
+ gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
439
+
440
+ # Display the interface
441
+ reranking_demo = create_reranking_interface(task_data)
442
+ except Exception as e:
443
+ gr.Markdown(f"**Error loading task: {str(e)}**")
444
+ error_details = gr.Textbox(
445
+ value=str(e),
446
+ label="Error Details",
447
+ interactive=False
448
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
 
450
  if __name__ == "__main__":
451
+ app.launch()