Spaces:
Sleeping
Sleeping
import gradio as gr | |
import json | |
import os | |
from pathlib import Path | |
def create_reranking_interface(task_data): | |
"""Create a Gradio interface for reranking evaluation.""" | |
samples = task_data["samples"] | |
results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []} | |
completed_samples = {s["id"]: False for s in samples} | |
# Store the current document order for the active sample | |
current_order = [] | |
def save_ranking(sample_id): | |
"""Save the current document ordering as rankings.""" | |
try: | |
if not current_order: | |
return "β οΈ No document ordering found", f"Progress: {sum(completed_samples.values())}/{len(samples)}" | |
# Convert document positions to rankings (position in list -> document's rank) | |
# First document (position 0) gets rank 1, etc. | |
rankings = [] | |
for i, doc_idx in enumerate(current_order): | |
rankings.append(i + 1) # Convert to 1-based ranks | |
# Store this annotation in memory | |
existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None) | |
if existing_idx is not None: | |
results["annotations"][existing_idx] = { | |
"sample_id": sample_id, | |
"rankings": rankings | |
} | |
else: | |
results["annotations"].append({ | |
"sample_id": sample_id, | |
"rankings": rankings | |
}) | |
completed_samples[sample_id] = True | |
# Try to save to file, but continue even if it fails | |
try: | |
output_path = f"{task_data['task_name']}_human_results.json" | |
with open(output_path, "w") as f: | |
json.dump(results, f, indent=2) | |
return f"β Rankings saved successfully (in memory and to file)", f"Progress: {sum(completed_samples.values())}/{len(samples)}" | |
except: | |
# If file saving fails, still mark as success since we saved in memory | |
return f"β Rankings saved in memory (file save failed)", f"Progress: {sum(completed_samples.values())}/{len(samples)}" | |
except Exception as e: | |
# Return specific error message | |
return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}" | |
with gr.Blocks(theme=gr.themes.Soft(), css=""" | |
.document-text { | |
font-size: 16px; | |
padding: 10px; | |
border-radius: 8px; | |
background-color: #f8f9fa; | |
border-left: 5px solid #4CAF50; | |
} | |
.query-box { | |
background-color: #e3f2fd; | |
padding: 16px; | |
border-radius: 8px; | |
border-left: 5px solid #2196F3; | |
font-size: 18px; | |
margin-bottom: 20px; | |
} | |
.progress-indicator { | |
font-weight: bold; | |
text-align: center; | |
padding: 12px; | |
background-color: #f1f8e9; | |
border-radius: 8px; | |
margin: 10px 0; | |
} | |
.status-box { | |
font-weight: bold; | |
text-align: center; | |
} | |
.ranking-box { | |
border: 1px solid #e0e0e0; | |
border-radius: 8px; | |
padding: 15px; | |
margin-bottom: 12px; | |
transition: all 0.3s; | |
} | |
.ranking-box:hover { | |
box-shadow: 0 4px 8px rgba(0,0,0,0.1); | |
} | |
/* Add different colors for rank levels */ | |
.rank-1, .rank-2, .rank-3 { | |
border-left: 5px solid #4CAF50; /* Green for top ranks */ | |
} | |
.rank-4, .rank-5, .rank-6, .rank-7 { | |
border-left: 5px solid #FFC107; /* Yellow for mid ranks */ | |
} | |
.rank-8, .rank-9, .rank-10 { | |
border-left: 5px solid #FF5722; /* Orange for lower ranks */ | |
} | |
.rank-11, .rank-12, .rank-13, .rank-14, .rank-15, .rank-16, .rank-17, .rank-18, .rank-19, .rank-20 { | |
border-left: 5px solid #9E9E9E; /* Gray for lowest ranks */ | |
} | |
""") as demo: | |
gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation") | |
with gr.Accordion("Instructions", open=True): | |
gr.Markdown(""" | |
## Task Instructions | |
{instructions} | |
### How to use this interface: | |
1. Read the query at the top | |
2. Review each document in the list | |
3. Use the "Move Up" and "Move Down" buttons to arrange documents by relevance | |
(most relevant at the top, least relevant at the bottom) | |
4. Click "Submit Rankings" when you're done with the current query | |
5. Use "Previous" and "Next" to navigate between queries | |
6. Click "Save All Results" periodically to ensure your work is saved | |
""".format(instructions=task_data["instructions"])) | |
current_sample_id = gr.State(value=samples[0]["id"]) | |
with gr.Row(elem_classes=["progress-indicator"]): | |
progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False) | |
status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False, elem_classes=["status-box"]) | |
with gr.Group(): | |
gr.Markdown("## Query:") | |
query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False, elem_classes=["query-box"]) | |
gr.Markdown("## Documents (Arrange in order of relevance, most relevant at top):") | |
# Create simple document list with move up/down buttons | |
document_containers = [] | |
# Function to initialize the document list for a sample | |
def initialize_document_list(sample_id): | |
nonlocal current_order | |
sample = next((s for s in samples if s["id"] == sample_id), None) | |
if not sample: | |
return "Query not found", f"Progress: {sum(completed_samples.values())}/{len(samples)}" | |
# Get the documents for this sample | |
docs = sample["candidates"] | |
# Initialize document order (0, 1, 2, ..., n-1) | |
current_order = list(range(len(docs))) | |
# Check if this sample has already been annotated to restore ordering | |
existing_annotation = next((a for a in results["annotations"] if a["sample_id"] == sample_id), None) | |
if existing_annotation and "rankings" in existing_annotation: | |
# Create pairs of (doc_idx, rank) | |
ranked_docs = [] | |
for doc_idx, rank in enumerate(existing_annotation["rankings"]): | |
ranked_docs.append((doc_idx, rank)) | |
# Sort by rank (ascending) | |
ranked_docs.sort(key=lambda x: x[1]) | |
# Extract document indices in rank order | |
current_order = [doc[0] for doc in ranked_docs] | |
# Update UI | |
for i in range(len(document_containers)): | |
if i < len(docs): | |
doc_idx = current_order[i] | |
rank_class = f"rank-{i+1}" | |
document_containers[i].update( | |
value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}", | |
visible=True, | |
elem_classes=["document-text", rank_class] | |
) | |
else: | |
document_containers[i].update(value="", visible=False) | |
# Status message | |
status = f"Viewing query {samples.index(sample) + 1} of {len(samples)}" | |
if completed_samples[sample_id]: | |
status += " (already completed)" | |
return status, f"Progress: {sum(completed_samples.values())}/{len(samples)}" | |
# Create document display containers with up/down buttons | |
with gr.Column(): | |
gr.Markdown(""" | |
### Instructions for Ranking: | |
- Documents are initially shown in their original order | |
- The most relevant document should be at the TOP (Rank 1) | |
- Use the "Move Up" and "Move Down" buttons to rearrange documents | |
- The rank is shown at the beginning of each document: [RANK X] | |
- When you're satisfied with the order, click "Submit Rankings" | |
""") | |
for i in range(20): # Now handling up to 20 documents per sample | |
with gr.Group(): | |
with gr.Box(): | |
with gr.Row(): | |
rank_label = gr.Markdown(f"**Document #{i+1}**") | |
with gr.Column(scale=1): | |
up_btn = gr.Button("β¬οΈ Move Up", size="sm", variant="primary") | |
down_btn = gr.Button("β¬οΈ Move Down", size="sm", variant="secondary") | |
doc_text = gr.Textbox( | |
label="", | |
interactive=False, | |
elem_id=f"doc-text-{i}", | |
elem_classes=["document-text"] | |
) | |
document_containers.append(doc_text) | |
# Create a closure that properly captures the current index | |
def make_up_handler(idx): | |
def up_handler(): | |
nonlocal current_order | |
# Only move if index is valid | |
if idx < len(current_order) and idx > 0: | |
# Swap with the document above | |
current_order[idx], current_order[idx-1] = current_order[idx-1], current_order[idx] | |
# Update all document displays with new order | |
sample = next((s for s in samples if s["id"] == current_sample_id.value), None) | |
if sample: | |
docs = sample["candidates"] | |
updates = [] | |
for j in range(len(document_containers)): | |
if j < len(current_order) and j < len(docs): | |
doc_idx = current_order[j] | |
# Add rank-specific class for styling | |
rank_class = f"rank-{j+1}" | |
updates.append(gr.update( | |
value=f"[RANK {j+1}] Document {doc_idx+1}: {docs[doc_idx]}", | |
visible=True, | |
elem_classes=["document-text", rank_class] | |
)) | |
else: | |
updates.append(gr.update(value="", visible=False)) | |
return updates | |
return [gr.update() for _ in document_containers] | |
return up_handler | |
def make_down_handler(idx): | |
def down_handler(): | |
nonlocal current_order | |
# Only move if index is valid | |
if idx < len(current_order) - 1: | |
# Swap with the document below | |
current_order[idx], current_order[idx+1] = current_order[idx+1], current_order[idx] | |
# Update all document displays with new order | |
sample = next((s for s in samples if s["id"] == current_sample_id.value), None) | |
if sample: | |
docs = sample["candidates"] | |
updates = [] | |
for j in range(len(document_containers)): | |
if j < len(current_order) and j < len(docs): | |
doc_idx = current_order[j] | |
# Add rank-specific class for styling | |
rank_class = f"rank-{j+1}" | |
updates.append(gr.update( | |
value=f"[RANK {j+1}] Document {doc_idx+1}: {docs[doc_idx]}", | |
visible=True, | |
elem_classes=["document-text", rank_class] | |
)) | |
else: | |
updates.append(gr.update(value="", visible=False)) | |
return updates | |
return [gr.update() for _ in document_containers] | |
return down_handler | |
# Connect buttons with properly created handlers | |
up_btn.click( | |
make_up_handler(i), | |
inputs=[], | |
outputs=document_containers | |
) | |
down_btn.click( | |
make_down_handler(i), | |
inputs=[], | |
outputs=document_containers | |
) | |
with gr.Row(equal_height=True): | |
prev_btn = gr.Button("β Previous Query", size="lg", variant="secondary") | |
submit_btn = gr.Button("πΎ Submit Rankings", size="lg", variant="primary") | |
next_btn = gr.Button("Next Query β", size="lg", variant="secondary") | |
with gr.Row(): | |
save_btn = gr.Button("πΎ Save All Results", variant="primary", size="lg") | |
# Add a clear button to reset rankings | |
clear_btn = gr.Button("π Reset Order", variant="secondary", size="lg") | |
# Initialize the document list for the first sample | |
status_val, progress_val = initialize_document_list(samples[0]["id"]) | |
status_box.value = status_val | |
progress_text.value = progress_val | |
# Navigation functions | |
def nav_to_prev(current_id): | |
current_sample = next((s for s in samples if s["id"] == current_id), None) | |
if not current_sample: | |
return current_id | |
current_idx = samples.index(current_sample) | |
if current_idx > 0: | |
prev_sample = samples[current_idx - 1] | |
return prev_sample["id"] | |
return current_id | |
def nav_to_next(current_id): | |
current_sample = next((s for s in samples if s["id"] == current_id), None) | |
if not current_sample: | |
return current_id | |
current_idx = samples.index(current_sample) | |
if current_idx < len(samples) - 1: | |
next_sample = samples[current_idx + 1] | |
return next_sample["id"] | |
return current_id | |
# Connect navigation buttons | |
prev_btn.click( | |
nav_to_prev, | |
inputs=[current_sample_id], | |
outputs=[current_sample_id] | |
).then( | |
lambda id: initialize_document_list(id), | |
inputs=[current_sample_id], | |
outputs=[status_box, progress_text] | |
).then( | |
lambda id: next((s["query"] for s in samples if s["id"] == id), ""), | |
inputs=[current_sample_id], | |
outputs=[query_text] | |
) | |
next_btn.click( | |
nav_to_next, | |
inputs=[current_sample_id], | |
outputs=[current_sample_id] | |
).then( | |
lambda id: initialize_document_list(id), | |
inputs=[current_sample_id], | |
outputs=[status_box, progress_text] | |
).then( | |
lambda id: next((s["query"] for s in samples if s["id"] == id), ""), | |
inputs=[current_sample_id], | |
outputs=[query_text] | |
) | |
# Submit button | |
submit_btn.click( | |
save_ranking, | |
inputs=[current_sample_id], | |
outputs=[status_box, progress_text] | |
) | |
# Add function to reset the current ranking order | |
def reset_order(): | |
nonlocal current_order | |
sample = next((s for s in samples if s["id"] == current_sample_id.value), None) | |
if not sample: | |
return [gr.update() for _ in document_containers] | |
docs = sample["candidates"] | |
# Reset to original order | |
current_order = list(range(len(docs))) | |
# Update UI | |
updates = [] | |
for i in range(len(document_containers)): | |
if i < len(docs): | |
doc_idx = current_order[i] | |
rank_class = f"rank-{i+1}" | |
updates.append(gr.update( | |
value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}", | |
visible=True, | |
elem_classes=["document-text", rank_class] | |
)) | |
else: | |
updates.append(gr.update(value="", visible=False)) | |
return updates | |
# Connect reset button | |
clear_btn.click( | |
reset_order, | |
inputs=[], | |
outputs=document_containers | |
) | |
# Save all results | |
def save_results(): | |
"""Save all collected results to a file.""" | |
output_path = f"{task_data['task_name']}_human_results.json" | |
with open(output_path, "w") as f: | |
json.dump(results, f, indent=2) | |
return f"β Results saved to {output_path} ({len(results['annotations'])} annotations)" | |
save_btn.click(save_results, outputs=[status_box]) | |
return demo | |
# Just use a simplified version with the main demo functionality | |
def get_task_file(): | |
"""Get the task file to use for the demo.""" | |
# Try to load the default example | |
default_task = "AskUbuntuDupQuestions_human_eval.json" | |
if os.path.exists(default_task): | |
return default_task | |
# If default doesn't exist, look for any other task files | |
task_files = [f for f in os.listdir(".") if f.endswith("_human_eval.json")] | |
if task_files: | |
return task_files[0] | |
# Raise error if no task files found | |
raise FileNotFoundError("No task files found. Please ensure there's a *_human_eval.json file in the current directory.") | |
# Main app with simplified structure | |
with gr.Blocks(theme=gr.themes.Soft()) as app: | |
gr.Markdown("# MTEB Human Evaluation Demo") | |
try: | |
# Load the task file | |
task_file = get_task_file() | |
with open(task_file, "r") as f: | |
task_data = json.load(f) | |
# Show which task is currently loaded | |
gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)") | |
# Display the interface | |
reranking_demo = create_reranking_interface(task_data) | |
except Exception as e: | |
gr.Markdown(f"**Error loading task: {str(e)}**") | |
error_details = gr.Textbox( | |
value=str(e), | |
label="Error Details", | |
interactive=False | |
) | |
if __name__ == "__main__": | |
app.launch() | |