AdnanElAssadi's picture
Update app.py
9069a07 verified
raw
history blame
21.2 kB
import gradio as gr
import json
import os
from pathlib import Path
def create_reranking_interface(task_data):
"""Create a Gradio interface for reranking evaluation."""
samples = task_data["samples"]
results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
completed_samples = {s["id"]: False for s in samples}
# Store the current document order for the active sample
current_order = []
def save_ranking(sample_id):
"""Save the current document ordering as rankings."""
try:
if not current_order:
return "⚠️ No document ordering found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
# Convert document positions to rankings (position in list -> document's rank)
# First document (position 0) gets rank 1, etc.
rankings = []
for i, doc_idx in enumerate(current_order):
rankings.append(i + 1) # Convert to 1-based ranks
# Store this annotation in memory
existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
if existing_idx is not None:
results["annotations"][existing_idx] = {
"sample_id": sample_id,
"rankings": rankings
}
else:
results["annotations"].append({
"sample_id": sample_id,
"rankings": rankings
})
completed_samples[sample_id] = True
# Try to save to file, but continue even if it fails
try:
output_path = f"{task_data['task_name']}_human_results.json"
with open(output_path, "w") as f:
json.dump(results, f, indent=2)
return f"βœ… Rankings saved successfully (in memory and to file)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
except:
# If file saving fails, still mark as success since we saved in memory
return f"βœ… Rankings saved in memory (file save failed)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
except Exception as e:
# Return specific error message
return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
with gr.Blocks(theme=gr.themes.Soft(), css="""
.document-text {
font-size: 16px;
padding: 10px;
border-radius: 8px;
background-color: #f8f9fa;
border-left: 5px solid #4CAF50;
}
.query-box {
background-color: #e3f2fd;
padding: 16px;
border-radius: 8px;
border-left: 5px solid #2196F3;
font-size: 18px;
margin-bottom: 20px;
}
.progress-indicator {
font-weight: bold;
text-align: center;
padding: 12px;
background-color: #f1f8e9;
border-radius: 8px;
margin: 10px 0;
}
.status-box {
font-weight: bold;
text-align: center;
}
.ranking-box {
border: 1px solid #e0e0e0;
border-radius: 8px;
padding: 15px;
margin-bottom: 12px;
transition: all 0.3s;
}
.ranking-box:hover {
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
}
/* Add different colors for rank levels */
.rank-1, .rank-2, .rank-3 {
border-left: 5px solid #4CAF50; /* Green for top ranks */
}
.rank-4, .rank-5, .rank-6, .rank-7 {
border-left: 5px solid #FFC107; /* Yellow for mid ranks */
}
.rank-8, .rank-9, .rank-10 {
border-left: 5px solid #FF5722; /* Orange for lower ranks */
}
.rank-11, .rank-12, .rank-13, .rank-14, .rank-15, .rank-16, .rank-17, .rank-18, .rank-19, .rank-20 {
border-left: 5px solid #9E9E9E; /* Gray for lowest ranks */
}
""") as demo:
gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
with gr.Accordion("Instructions", open=True):
gr.Markdown("""
## Task Instructions
{instructions}
### How to use this interface:
1. Read the query at the top
2. Review each document in the list
3. Use the "Move Up" and "Move Down" buttons to arrange documents by relevance
(most relevant at the top, least relevant at the bottom)
4. Click "Submit Rankings" when you're done with the current query
5. Use "Previous" and "Next" to navigate between queries
6. Click "Save All Results" periodically to ensure your work is saved
""".format(instructions=task_data["instructions"]))
current_sample_id = gr.State(value=samples[0]["id"])
with gr.Row(elem_classes=["progress-indicator"]):
progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False, elem_classes=["status-box"])
with gr.Group():
gr.Markdown("## Query:")
query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False, elem_classes=["query-box"])
gr.Markdown("## Documents (Arrange in order of relevance, most relevant at top):")
# Create simple document list with move up/down buttons
document_containers = []
# Function to initialize the document list for a sample
def initialize_document_list(sample_id):
nonlocal current_order
sample = next((s for s in samples if s["id"] == sample_id), None)
if not sample:
return "Query not found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
# Get the documents for this sample
docs = sample["candidates"]
# Initialize document order (0, 1, 2, ..., n-1)
current_order = list(range(len(docs)))
# Check if this sample has already been annotated to restore ordering
existing_annotation = next((a for a in results["annotations"] if a["sample_id"] == sample_id), None)
if existing_annotation and "rankings" in existing_annotation:
# Create pairs of (doc_idx, rank)
ranked_docs = []
for doc_idx, rank in enumerate(existing_annotation["rankings"]):
ranked_docs.append((doc_idx, rank))
# Sort by rank (ascending)
ranked_docs.sort(key=lambda x: x[1])
# Extract document indices in rank order
current_order = [doc[0] for doc in ranked_docs]
# Update UI
for i in range(len(document_containers)):
if i < len(docs):
doc_idx = current_order[i]
rank_class = f"rank-{i+1}"
document_containers[i].update(
value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}",
visible=True,
elem_classes=["document-text", rank_class]
)
else:
document_containers[i].update(value="", visible=False)
# Status message
status = f"Viewing query {samples.index(sample) + 1} of {len(samples)}"
if completed_samples[sample_id]:
status += " (already completed)"
return status, f"Progress: {sum(completed_samples.values())}/{len(samples)}"
# Create document display containers with up/down buttons
with gr.Column():
gr.Markdown("""
### Instructions for Ranking:
- Documents are initially shown in their original order
- The most relevant document should be at the TOP (Rank 1)
- Use the "Move Up" and "Move Down" buttons to rearrange documents
- The rank is shown at the beginning of each document: [RANK X]
- When you're satisfied with the order, click "Submit Rankings"
""")
for i in range(20): # Now handling up to 20 documents per sample
with gr.Group():
with gr.Box():
with gr.Row():
rank_label = gr.Markdown(f"**Document #{i+1}**")
with gr.Column(scale=1):
up_btn = gr.Button("⬆️ Move Up", size="sm", variant="primary")
down_btn = gr.Button("⬇️ Move Down", size="sm", variant="secondary")
doc_text = gr.Textbox(
label="",
interactive=False,
elem_id=f"doc-text-{i}",
elem_classes=["document-text"]
)
document_containers.append(doc_text)
# Create a closure that properly captures the current index
def make_up_handler(idx):
def up_handler():
nonlocal current_order
# Only move if index is valid
if idx < len(current_order) and idx > 0:
# Swap with the document above
current_order[idx], current_order[idx-1] = current_order[idx-1], current_order[idx]
# Update all document displays with new order
sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
if sample:
docs = sample["candidates"]
updates = []
for j in range(len(document_containers)):
if j < len(current_order) and j < len(docs):
doc_idx = current_order[j]
# Add rank-specific class for styling
rank_class = f"rank-{j+1}"
updates.append(gr.update(
value=f"[RANK {j+1}] Document {doc_idx+1}: {docs[doc_idx]}",
visible=True,
elem_classes=["document-text", rank_class]
))
else:
updates.append(gr.update(value="", visible=False))
return updates
return [gr.update() for _ in document_containers]
return up_handler
def make_down_handler(idx):
def down_handler():
nonlocal current_order
# Only move if index is valid
if idx < len(current_order) - 1:
# Swap with the document below
current_order[idx], current_order[idx+1] = current_order[idx+1], current_order[idx]
# Update all document displays with new order
sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
if sample:
docs = sample["candidates"]
updates = []
for j in range(len(document_containers)):
if j < len(current_order) and j < len(docs):
doc_idx = current_order[j]
# Add rank-specific class for styling
rank_class = f"rank-{j+1}"
updates.append(gr.update(
value=f"[RANK {j+1}] Document {doc_idx+1}: {docs[doc_idx]}",
visible=True,
elem_classes=["document-text", rank_class]
))
else:
updates.append(gr.update(value="", visible=False))
return updates
return [gr.update() for _ in document_containers]
return down_handler
# Connect buttons with properly created handlers
up_btn.click(
make_up_handler(i),
inputs=[],
outputs=document_containers
)
down_btn.click(
make_down_handler(i),
inputs=[],
outputs=document_containers
)
with gr.Row(equal_height=True):
prev_btn = gr.Button("← Previous Query", size="lg", variant="secondary")
submit_btn = gr.Button("πŸ’Ύ Submit Rankings", size="lg", variant="primary")
next_btn = gr.Button("Next Query β†’", size="lg", variant="secondary")
with gr.Row():
save_btn = gr.Button("πŸ’Ύ Save All Results", variant="primary", size="lg")
# Add a clear button to reset rankings
clear_btn = gr.Button("πŸ”„ Reset Order", variant="secondary", size="lg")
# Initialize the document list for the first sample
status_val, progress_val = initialize_document_list(samples[0]["id"])
status_box.value = status_val
progress_text.value = progress_val
# Navigation functions
def nav_to_prev(current_id):
current_sample = next((s for s in samples if s["id"] == current_id), None)
if not current_sample:
return current_id
current_idx = samples.index(current_sample)
if current_idx > 0:
prev_sample = samples[current_idx - 1]
return prev_sample["id"]
return current_id
def nav_to_next(current_id):
current_sample = next((s for s in samples if s["id"] == current_id), None)
if not current_sample:
return current_id
current_idx = samples.index(current_sample)
if current_idx < len(samples) - 1:
next_sample = samples[current_idx + 1]
return next_sample["id"]
return current_id
# Connect navigation buttons
prev_btn.click(
nav_to_prev,
inputs=[current_sample_id],
outputs=[current_sample_id]
).then(
lambda id: initialize_document_list(id),
inputs=[current_sample_id],
outputs=[status_box, progress_text]
).then(
lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
inputs=[current_sample_id],
outputs=[query_text]
)
next_btn.click(
nav_to_next,
inputs=[current_sample_id],
outputs=[current_sample_id]
).then(
lambda id: initialize_document_list(id),
inputs=[current_sample_id],
outputs=[status_box, progress_text]
).then(
lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
inputs=[current_sample_id],
outputs=[query_text]
)
# Submit button
submit_btn.click(
save_ranking,
inputs=[current_sample_id],
outputs=[status_box, progress_text]
)
# Add function to reset the current ranking order
def reset_order():
nonlocal current_order
sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
if not sample:
return [gr.update() for _ in document_containers]
docs = sample["candidates"]
# Reset to original order
current_order = list(range(len(docs)))
# Update UI
updates = []
for i in range(len(document_containers)):
if i < len(docs):
doc_idx = current_order[i]
rank_class = f"rank-{i+1}"
updates.append(gr.update(
value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}",
visible=True,
elem_classes=["document-text", rank_class]
))
else:
updates.append(gr.update(value="", visible=False))
return updates
# Connect reset button
clear_btn.click(
reset_order,
inputs=[],
outputs=document_containers
)
# Save all results
def save_results():
"""Save all collected results to a file."""
output_path = f"{task_data['task_name']}_human_results.json"
with open(output_path, "w") as f:
json.dump(results, f, indent=2)
return f"βœ… Results saved to {output_path} ({len(results['annotations'])} annotations)"
save_btn.click(save_results, outputs=[status_box])
return demo
# Just use a simplified version with the main demo functionality
def get_task_file():
"""Get the task file to use for the demo."""
# Try to load the default example
default_task = "AskUbuntuDupQuestions_human_eval.json"
if os.path.exists(default_task):
return default_task
# If default doesn't exist, look for any other task files
task_files = [f for f in os.listdir(".") if f.endswith("_human_eval.json")]
if task_files:
return task_files[0]
# Raise error if no task files found
raise FileNotFoundError("No task files found. Please ensure there's a *_human_eval.json file in the current directory.")
# Main app with simplified structure
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("# MTEB Human Evaluation Demo")
try:
# Load the task file
task_file = get_task_file()
with open(task_file, "r") as f:
task_data = json.load(f)
# Show which task is currently loaded
gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
# Display the interface
reranking_demo = create_reranking_interface(task_data)
except Exception as e:
gr.Markdown(f"**Error loading task: {str(e)}**")
error_details = gr.Textbox(
value=str(e),
label="Error Details",
interactive=False
)
if __name__ == "__main__":
app.launch()