Spaces:

AdnanElAssadi
/

MTEB-Human-Eval-Demo

Sleeping

App Files Files Community

AdnanElAssadi commited on Apr 6

Commit

4872c01

verified ·

1 Parent(s): bd8112a

Update app.py

Browse files

Files changed (1) hide show

app.py +244 -164

app.py CHANGED Viewed

@@ -9,35 +9,33 @@ def create_reranking_interface(task_data):
     results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
     completed_samples = {s["id"]: False for s in samples}
-    def save_ranking(rankings, sample_id):
-        """Save the current set of rankings."""
         try:
-            # Check if all documents have rankings
-            all_ranked = all(r is not None and r != "" for r in rankings)
-            if not all_ranked:
-                return "⚠️ Please assign a rank to all documents before submitting", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-            # Convert rankings to integers with better error handling
-            try:
-                processed_rankings = [int(r) for r in rankings]
-            except ValueError:
-                return "⚠️ Invalid ranking value. Please use only numbers.", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-            # Check for duplicate rankings
-            if len(set(processed_rankings)) != len(processed_rankings):
-                return "⚠️ Each document must have a unique rank. Please review your rankings.", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
             # Store this annotation in memory
             existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
             if existing_idx is not None:
                 results["annotations"][existing_idx] = {
                     "sample_id": sample_id,
-                    "rankings": processed_rankings
                 }
             else:
                 results["annotations"].append({
                     "sample_id": sample_id,
-                    "rankings": processed_rankings
                 })
             completed_samples[sample_id] = True
@@ -55,6 +53,160 @@ def create_reranking_interface(task_data):
             # Return specific error message
             return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
@@ -66,12 +218,12 @@ def create_reranking_interface(task_data):
             ### How to use this interface:
             1. Read the query at the top
-            2. Review each document carefully
-            3. Assign a rank to each document (1 = most relevant, higher numbers = less relevant)
-            4. Each document must have a unique rank
-            5. Click "Submit Rankings" when you're done with the current query
-            6. Use "Previous" and "Next" to navigate between queries
-            7. Click "Save All Results" periodically to ensure your work is saved
             """.format(instructions=task_data["instructions"]))
         current_sample_id = gr.State(value=samples[0]["id"])
@@ -84,75 +236,17 @@ def create_reranking_interface(task_data):
             gr.Markdown("## Query:")
             query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
-            gr.Markdown("## Documents to Rank:")
-            # Create document displays and ranking dropdowns in synchronized pairs
-            doc_containers = []
-            ranking_dropdowns = []
-            with gr.Column():
-                for i, doc in enumerate(samples[0]["candidates"]):
-                    with gr.Row():
-                        doc_box = gr.Textbox(
-                            value=doc,
-                            label=f"Document {i+1}",
-                            interactive=False
-                        )
-                        doc_containers.append(doc_box)
-                        # Use Dropdown instead of Number for ranking
-                        # Ranks from 1 to N (number of candidates)
-                        rank_dropdown = gr.Dropdown(
-                            choices=[str(j) for j in range(1, len(samples[0]["candidates"])+1)],
-                            label=f"Rank",
-                            value=None,
-                            interactive=True
-                        )
-                        ranking_dropdowns.append(rank_dropdown)
-                        # Add quick rank buttons
-                        with gr.Row():
-                            rank_high_btn = gr.Button("Rank High (1-3)", size="sm")
-                            rank_med_btn = gr.Button("Rank Medium (4-7)", size="sm")
-                            rank_low_btn = gr.Button("Rank Low (8+)", size="sm")
-                        # Set ranks directly instead of incrementing/decrementing
-                        # Helper functions to quickly set ranks in different ranges
-                        def set_high_rank(i):
-                            def set_rank():
-                                return ["1", "2", "3"][min(i, 2)]  # First 3 docs get ranks 1,2,3
-                            return set_rank
-                        def set_medium_rank(i):
-                            def set_rank():
-                                base = 4
-                                return str(min(base + i % 4, len(samples[0]["candidates"])))
-                            return set_rank
-                        def set_low_rank(i):
-                            def set_rank():
-                                base = 8
-                                return str(min(base + i % 10, len(samples[0]["candidates"])))
-                            return set_rank
-                        # Connect rank buttons
-                        rank_high_btn.click(
-                            set_high_rank(i),
-                            inputs=[],
-                            outputs=[rank_dropdown]
-                        )
-                        rank_med_btn.click(
-                            set_medium_rank(i),
-                            inputs=[],
-                            outputs=[rank_dropdown]
-                        )
-                        rank_low_btn.click(
-                            set_low_rank(i),
-                            inputs=[],
-                            outputs=[rank_dropdown]
-                        )
             with gr.Row():
                 prev_btn = gr.Button("← Previous Query", size="sm")
@@ -161,107 +255,93 @@ def create_reranking_interface(task_data):
             save_btn = gr.Button("💾 Save All Results", variant="secondary")
         def load_sample(sample_id):
-            """Load a specific sample into the interface."""
-            sample = next((s for s in samples if s["id"] == sample_id), None)
-            if not sample:
-                return [query_text.value] + [d.value for d in doc_containers] + [None] * len(ranking_dropdowns) + [current_sample_id.value, progress_text.value, status_box.value]
-            # Update query
-            new_query = sample["query"]
-            # Update documents
-            new_docs = []
-            for i, doc in enumerate(sample["candidates"]):
-                if i < len(doc_containers):
-                    new_docs.append(doc)
-            # Initialize rankings
-            new_rankings = [None] * len(ranking_dropdowns)
-            # Check if this sample has already been annotated
-            existing_annotation = next((a for a in results["annotations"] if a["sample_id"] == sample_id), None)
-            if existing_annotation:
-                # Restore previous rankings
-                for i, rank in enumerate(existing_annotation["rankings"]):
-                    if i < len(new_rankings) and rank is not None:
-                        new_rankings[i] = str(rank)  # Convert to string for dropdown
-            # Update progress
-            current_idx = samples.index(sample)
-            new_progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-            new_status = f"Viewing query {current_idx + 1} of {len(samples)}"
-            if completed_samples[sample_id]:
-                new_status += " (already completed)"
-            return [new_query] + new_docs + new_rankings + [sample["id"], new_progress, new_status]
-        def next_sample(current_id):
-            """Load the next sample."""
             current_sample = next((s for s in samples if s["id"] == current_id), None)
             if not current_sample:
                 return current_id
             current_idx = samples.index(current_sample)
-            if current_idx < len(samples) - 1:
                 next_sample = samples[current_idx + 1]
                 return next_sample["id"]
-            return current_id
-        def prev_sample(current_id):
-            """Load the previous sample."""
-            current_sample = next((s for s in samples if s["id"] == current_id), None)
-            if not current_sample:
-                return current_id
-            current_idx = samples.index(current_sample)
-            if current_idx > 0:
                 prev_sample = samples[current_idx - 1]
                 return prev_sample["id"]
             return current_id
-        def save_results():
-            """Save all collected results to a file."""
-            output_path = f"{task_data['task_name']}_human_results.json"
-            with open(output_path, "w") as f:
-                json.dump(results, f, indent=2)
-            return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
-        # Define a wrapper function that collects all the dropdown values into a list
-        def save_ranking_wrapper(*args):
-            # The last argument is the sample_id, all others are rankings
-            rankings = args[:-1]
-            sample_id = args[-1]
-            return save_ranking(rankings, sample_id)
-        # Connect events
-        submit_btn.click(
-            save_ranking_wrapper,
-            inputs=ranking_dropdowns + [current_sample_id],
-            outputs=[status_box, progress_text]
-        )
         next_btn.click(
-            next_sample,
             inputs=[current_sample_id],
             outputs=[current_sample_id]
         ).then(
             load_sample,
             inputs=[current_sample_id],
-            outputs=[query_text] + doc_containers + ranking_dropdowns + [current_sample_id, progress_text, status_box]
         )
         prev_btn.click(
-            prev_sample,
             inputs=[current_sample_id],
             outputs=[current_sample_id]
         ).then(
             load_sample,
             inputs=[current_sample_id],
-            outputs=[query_text] + doc_containers + ranking_dropdowns + [current_sample_id, progress_text, status_box]
         )
         save_btn.click(save_results, outputs=[status_box])
     return demo

     results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
     completed_samples = {s["id"]: False for s in samples}
+    # Track the current ordering of documents
+    current_document_order = gr.State([])
+    def save_ranking(sample_id, doc_order):
+        """Save the current document ordering as rankings."""
         try:
+            if not doc_order:
+                return "⚠️ No document ordering found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+            # Convert document positions to rankings
+            # The first document (position 0) gets rank 1, etc.
+            rankings = []
+            for i in range(len(doc_order)):
+                doc_idx = doc_order.index(i)
+                rankings.append(doc_idx + 1)  # Convert to 1-based ranks
             # Store this annotation in memory
             existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
             if existing_idx is not None:
                 results["annotations"][existing_idx] = {
                     "sample_id": sample_id,
+                    "rankings": rankings
                 }
             else:
                 results["annotations"].append({
                     "sample_id": sample_id,
+                    "rankings": rankings
                 })
             completed_samples[sample_id] = True
             # Return specific error message
             return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+    def initialize_documents(sample_id):
+        """Initialize the document order and content for a sample."""
+        sample = next((s for s in samples if s["id"] == sample_id), None)
+        if not sample:
+            return [], "", "Query not found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+        # Get the documents for this sample
+        docs = sample["candidates"]
+        # Initialize the document order (0, 1, 2, ..., n-1)
+        doc_order = list(range(len(docs)))
+        # Check if this sample has already been annotated to restore ordering
+        existing_annotation = next((a for a in results["annotations"] if a["sample_id"] == sample_id), None)
+        if existing_annotation and "rankings" in existing_annotation:
+            # Convert rankings back to positions
+            # If document 0 has rank 3, it should be in position 2
+            sorted_positions = []
+            for i in range(len(existing_annotation["rankings"])):
+                rank = existing_annotation["rankings"][i]
+                sorted_positions.append((i, rank))
+            # Sort by rank (ascending)
+            sorted_positions.sort(key=lambda x: x[1])
+            # Extract the original indices in their ranked order
+            doc_order = [pos[0] for pos in sorted_positions]
+        # Current sample query
+        query = sample["query"]
+        # Status message
+        status = f"Viewing query {samples.index(sample) + 1} of {len(samples)}"
+        if completed_samples[sample_id]:
+            status += " (already completed)"
+        return doc_order, query, status, f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+    def move_document(doc_order, doc_idx, direction):
+        """Move a document up or down in the order."""
+        if not doc_order:
+            return doc_order
+        # Create a copy of the order to avoid reference issues
+        new_order = doc_order.copy()
+        # Find the current position of the document in the order
+        current_pos = new_order.index(doc_idx)
+        # Calculate the new position
+        if direction == "up" and current_pos > 0:
+            # Swap with the document above
+            new_order[current_pos], new_order[current_pos - 1] = new_order[current_pos - 1], new_order[current_pos]
+        elif direction == "down" and current_pos < len(new_order) - 1:
+            # Swap with the document below
+            new_order[current_pos], new_order[current_pos + 1] = new_order[current_pos + 1], new_order[current_pos]
+        return new_order
+    def render_documents(doc_order, sample_id):
+        """Render the documents in the specified order."""
+        sample = next((s for s in samples if s["id"] == sample_id), None)
+        if not sample or not doc_order:
+            return gr.HTML.update(value="<p>No documents to display</p>")
+        docs = sample["candidates"]
+        # Build HTML for the document list
+        html = "<div class='document-list'>"
+        for pos, doc_idx in enumerate(doc_order):
+            if doc_idx < len(docs):
+                doc_text = docs[doc_idx]
+                # Calculate the rank (position + 1)
+                rank = pos + 1
+                # Create a container for each document with buttons and rank
+                html += f"""
+                <div class='document-item' id='doc-{doc_idx}'>
+                    <div class='document-controls'>
+                        <div class='rank-display'>Rank: {rank}</div>
+                        <button class='move-up-btn' onclick='moveDocument({doc_idx}, "up")'>↑ Move Up</button>
+                        <button class='move-down-btn' onclick='moveDocument({doc_idx}, "down")'>↓ Move Down</button>
+                    </div>
+                    <div class='document-content'>
+                        <p><strong>Document {doc_idx + 1}:</strong> {doc_text}</p>
+                    </div>
+                </div>
+                """
+        html += "</div>"
+        # Add custom CSS
+        html += """
+        <style>
+        .document-list {
+            display: flex;
+            flex-direction: column;
+            gap: 10px;
+        }
+        .document-item {
+            border: 1px solid #ddd;
+            border-radius: 8px;
+            padding: 15px;
+            background-color: #f9f9f9;
+            display: flex;
+            flex-direction: column;
+        }
+        .document-controls {
+            display: flex;
+            align-items: center;
+            gap: 10px;
+            margin-bottom: 10px;
+        }
+        .rank-display {
+            font-weight: bold;
+            min-width: 80px;
+        }
+        .document-content {
+            padding: 5px;
+            background-color: white;
+            border-radius: 4px;
+        }
+        button {
+            padding: 5px 10px;
+            border-radius: 4px;
+            border: 1px solid #ccc;
+            cursor: pointer;
+        }
+        .move-up-btn {
+            background-color: #e0f7fa;
+        }
+        .move-down-btn {
+            background-color: #fff3e0;
+        }
+        </style>
+        """
+        # Add JavaScript to handle button clicks
+        html += """
+        <script>
+        function moveDocument(docIdx, direction) {
+            // Call the Python function via Gradio's API
+            const event = new CustomEvent('move-document', {
+                detail: { docIdx: docIdx, direction: direction }
+            });
+            document.dispatchEvent(event);
+        }
+        </script>
+        """
+        return gr.HTML.update(value=html)
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
             ### How to use this interface:
             1. Read the query at the top
+            2. Review each document in the list
+            3. Use the "Move Up" and "Move Down" buttons to arrange documents by relevance
+               (most relevant at the top, least relevant at the bottom)
+            4. Click "Submit Rankings" when you're done with the current query
+            5. Use "Previous" and "Next" to navigate between queries
+            6. Click "Save All Results" periodically to ensure your work is saved
             """.format(instructions=task_data["instructions"]))
         current_sample_id = gr.State(value=samples[0]["id"])
             gr.Markdown("## Query:")
             query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
+            gr.Markdown("## Documents (Arrange in order of relevance, most relevant at top):")
+            # Container to display documents in their current order
+            document_list = gr.HTML()
+            # For Gradio's event handling, we need actual buttons
+            # These are invisible and triggered by JavaScript
+            with gr.Row(visible=False):
+                up_btn = gr.Button("Up")
+                down_btn = gr.Button("Down")
+                doc_index = gr.Number(0, label="Document Index")
             with gr.Row():
                 prev_btn = gr.Button("← Previous Query", size="sm")
             save_btn = gr.Button("💾 Save All Results", variant="secondary")
+        # Initialize the document order for the first sample
+        doc_order, query_val, status_val, progress_val = initialize_documents(samples[0]["id"])
+        current_document_order.value = doc_order
+        query_text.value = query_val
+        status_box.value = status_val
+        progress_text.value = progress_val
+        # Render the documents in their initial order
+        document_list.value = render_documents(doc_order, samples[0]["id"]).value
+        # Connect events for up/down buttons (these are triggered by JavaScript)
+        def doc_move_handler(doc_idx, direction, current_order, sample_id):
+            new_order = move_document(current_order, doc_idx, direction)
+            html_update = render_documents(new_order, sample_id)
+            return new_order, html_update
+        up_btn.click(
+            doc_move_handler,
+            inputs=[doc_index, gr.Textbox(value="up"), current_document_order, current_sample_id],
+            outputs=[current_document_order, document_list]
+        )
+        down_btn.click(
+            doc_move_handler,
+            inputs=[doc_index, gr.Textbox(value="down"), current_document_order, current_sample_id],
+            outputs=[current_document_order, document_list]
+        )
+        # Load a sample and update the interface
         def load_sample(sample_id):
+            doc_order, query_val, status_val, progress_val = initialize_documents(sample_id)
+            html_update = render_documents(doc_order, sample_id)
+            return doc_order, query_val, html_update, status_val, progress_val
+        # Navigation events
+        def nav_sample(current_id, direction):
+            """Navigate to the previous or next sample."""
             current_sample = next((s for s in samples if s["id"] == current_id), None)
             if not current_sample:
                 return current_id
             current_idx = samples.index(current_sample)
+            if direction == "next" and current_idx < len(samples) - 1:
                 next_sample = samples[current_idx + 1]
                 return next_sample["id"]
+            elif direction == "prev" and current_idx > 0:
                 prev_sample = samples[current_idx - 1]
                 return prev_sample["id"]
             return current_id
         next_btn.click(
+            lambda id: nav_sample(id, "next"),
             inputs=[current_sample_id],
             outputs=[current_sample_id]
         ).then(
             load_sample,
             inputs=[current_sample_id],
+            outputs=[current_document_order, query_text, document_list, status_box, progress_text]
         )
         prev_btn.click(
+            lambda id: nav_sample(id, "prev"),
             inputs=[current_sample_id],
             outputs=[current_sample_id]
         ).then(
             load_sample,
             inputs=[current_sample_id],
+            outputs=[current_document_order, query_text, document_list, status_box, progress_text]
+        )
+        # Submit rankings
+        submit_btn.click(
+            save_ranking,
+            inputs=[current_sample_id, current_document_order],
+            outputs=[status_box, progress_text]
         )
+        # Save all results
+        def save_results():
+            """Save all collected results to a file."""
+            output_path = f"{task_data['task_name']}_human_results.json"
+            with open(output_path, "w") as f:
+                json.dump(results, f, indent=2)
+            return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
         save_btn.click(save_results, outputs=[status_box])
     return demo