Spaces:

AdnanElAssadi
/

MTEB-Human-Eval-Demo

Sleeping

App Files Files Community

AdnanElAssadi commited on Apr 7

Commit

5cee7bc

verified ·

1 Parent(s): c7a3012

Update app.py

Browse files

Files changed (1) hide show

app.py +250 -226

app.py CHANGED Viewed

@@ -4,172 +4,26 @@ import os
 from pathlib import Path
 def create_reranking_interface(task_data):
-    """Create a Gradio interface for reranking evaluation using drag and drop."""
     samples = task_data["samples"]
     results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
     completed_samples = {s["id"]: False for s in samples}
-    # Define helper functions before UI elements are created
-    def generate_sortable_html(candidates, existing_ranks=None):
-        """Generate HTML with simple dropdowns for ranking."""
-        # Use existing ranks if available
-        ranks = [0] * len(candidates)
-        if existing_ranks and len(existing_ranks) == len(candidates):
-            ranks = existing_ranks.copy()
-        # Generate a unique ID for this set of dropdowns to avoid conflicts
-        import random
-        import time
-        dropdown_group_id = f"rank_group_{int(time.time())}_{random.randint(1000, 9999)}"
-        html = f"""
-        <div class="ranking-simple">
-            <input type="hidden" id="rank-order-state" value="">
-            <div class="rank-instructions">Select a rank (1-{len(candidates)}) for each document.</div>
-        """
-        # Add each document with a dropdown selector
-        for i, doc in enumerate(candidates):
-            import html as html_escaper
-            escaped_doc = html_escaper.escape(doc)
-            current_rank = ranks[i] if ranks[i] > 0 else i + 1
-            html += f"""
-            <div class="rank-item" data-doc-id="{i}">
-                <div class="rank-selector">
-                    <select class="rank-dropdown" data-doc-id="{i}" onchange="updateRankOrder('{dropdown_group_id}')">
-            """
-            # Add options 1 through N
-            for rank in range(1, len(candidates) + 1):
-                selected = "selected" if rank == current_rank else ""
-                html += f'<option value="{rank}" {selected}>{rank}</option>'
-            html += f"""
-                    </select>
-                </div>
-                <div class="doc-content">{escaped_doc}</div>
-            </div>
-            """
-        # Add the JavaScript for handling rank updates
-        html += f"""
-        <script>
-        // Function to update the hidden state when dropdowns change
-        function updateRankOrder(groupId) {{
-            const items = document.querySelectorAll('.rank-item');
-            const selectedRanks = new Map();
-            const docOrder = [];
-            // First collect all selected ranks
-            items.forEach(item => {{
-                const docId = parseInt(item.getAttribute('data-doc-id'));
-                const dropdown = item.querySelector('.rank-dropdown');
-                const rank = parseInt(dropdown.value);
-                selectedRanks.set(docId, rank);
-            }});
-            // Sort documents by their selected rank
-            const sortedDocs = Array.from(selectedRanks.entries())
-                .sort((a, b) => a[1] - b[1])
-                .map(entry => entry[0]);
-            // Update the order state
-            const orderInput = document.querySelector('#current-order textarea');
-            if (orderInput) {{
-                orderInput.value = JSON.stringify(sortedDocs);
-                const event = new Event('input', {{ bubbles: true }});
-                orderInput.dispatchEvent(event);
-            }}
-        }}
-        // Initialize on page load
-        document.addEventListener('DOMContentLoaded', function() {{
-            updateRankOrder('{dropdown_group_id}');
-        }});
-        // Backup initialization for iframe environments
-        setTimeout(function() {{
-            updateRankOrder('{dropdown_group_id}');
-        }}, 1000);
-        </script>
-        </div>
-        """
-        return html
-    def save_ranking(order_json, sample_id):
-        """Save the current ranking to results."""
-        try:
-            if not order_json or order_json == "[]":
-                return "⚠️ Drag documents to set the ranking before submitting.", progress_text.value
-            order = json.loads(order_json)
-            num_candidates = len(next(s["candidates"] for s in samples if s["id"] == sample_id))
-            if len(order) != num_candidates:
-                return f"⚠️ Ranking order length mismatch. Expected {num_candidates}, got {len(order)}.", progress_text.value
-            rankings = [0] * num_candidates
-            for rank_minus_1, doc_idx in enumerate(order):
-                if doc_idx < num_candidates:
-                    rankings[doc_idx] = rank_minus_1 + 1
-                else:
-                    raise ValueError(f"Invalid document index {doc_idx} found in order.")
-            if sorted(rankings) != list(range(1, num_candidates + 1)):
-                return "⚠️ Ranking validation failed. Ranks are not 1 to N.", progress_text.value
-            annotation = {"sample_id": sample_id, "rankings": rankings}
-            existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
-            if existing_idx is not None:
-                results["annotations"][existing_idx] = annotation
-            else:
-                results["annotations"].append(annotation)
-            completed_samples[sample_id] = True
-            output_path = f"{task_data['task_name']}_human_results.json"
-            with open(output_path, "w") as f:
-                json.dump(results, f, indent=2)
-            return f"✅ Rankings saved successfully ({len(results['annotations'])}/{len(samples)} completed)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-        except json.JSONDecodeError:
-            return "⚠️ Error decoding ranking order. Please try again.", progress_text.value
-        except Exception as e:
-            import traceback
-            print(traceback.format_exc())
-            return f"Error saving ranking: {str(e)}", progress_text.value
-    def load_sample(sample_id):
-        """Load a sample into the interface."""
         try:
-            sample = next((s for s in samples if s["id"] == sample_id), None)
-            if not sample:
-                return gr.update(), gr.update(value="[]"), gr.update(), gr.update()
-            existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
-            new_html = generate_sortable_html(sample["candidates"], existing_ranking)
-            status = "Ready to rank" if not completed_samples.get(sample_id, False) else "Already ranked"
-            progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-            return sample["query"], new_html, "[]", progress, status
         except Exception as e:
-            return gr.update(), gr.update(value="[]"), gr.update(), gr.update(value=f"Error loading sample: {str(e)}")
-    def next_sample_id(current_id):
-        current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
-        if current_idx == -1:
-            return current_id
-        next_idx = min(current_idx + 1, len(samples) - 1)
-        return samples[next_idx]["id"]
-    def prev_sample_id(current_id):
-        current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
-        if current_idx == -1:
-            return current_id
-        prev_idx = max(current_idx - 1, 0)
-        return samples[prev_idx]["id"]
-    def save_results():
-        output_path = f"{task_data['task_name']}_human_results.json"
-        try:
-            with open(output_path, "w") as f:
-                json.dump(results, f, indent=2)
-            return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
-        except Exception as e:
-            return f"⚠️ Error saving results file: {str(e)}"
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
         with gr.Accordion("Instructions", open=True):
@@ -180,107 +34,277 @@ def create_reranking_interface(task_data):
             ### How to use this interface:
             1. Read the query at the top
-            2. Drag and drop documents to reorder them based on relevance
-            3. Top document = Rank 1, Second = Rank 2, etc.
             4. Click "Submit Rankings" when you're done with the current query
             5. Use "Previous" and "Next" to navigate between queries
             6. Click "Save All Results" periodically to ensure your work is saved
             """.format(instructions=task_data["instructions"]))
         current_sample_id = gr.State(value=samples[0]["id"])
         with gr.Row():
-            progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
             status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
         with gr.Group():
             gr.Markdown("## Query:")
-            query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
-            gr.Markdown("## Documents to Rank (Drag to Reorder):")
-            sortable_list = gr.HTML(generate_sortable_html(samples[0]["candidates"], []), elem_id="sortable-list-container")
-            order_state = gr.Textbox(value="[]", visible=False, elem_id="current-order")
             with gr.Row():
-                prev_btn = gr.Button("← Previous Query", size="sm", elem_id="prev-btn")
-                submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary", elem_id="submit-btn")
-                next_btn = gr.Button("Next Query →", size="sm", elem_id="next-btn")
             save_btn = gr.Button("💾 Save All Results", variant="secondary")
-        js_code = """
-        <style>
-        /* Simple dropdown ranking styles */
-        .ranking-simple {
-            width: 100%;
-            max-width: 100%;
-            margin: 0 auto;
-        }
-        .rank-instructions {
-            margin-bottom: 15px;
-            padding: 10px;
-            background-color: #f0f9ff;
-            border-left: 4px solid #3b82f6;
-            border-radius: 4px;
-        }
-        .rank-item {
-            display: flex;
-            align-items: flex-start;
-            padding: 12px;
-            margin-bottom: 10px;
-            background: white;
-            border: 1px solid #e0e0e0;
-            border-radius: 6px;
-        }
-        .rank-selector {
-            margin-right: 15px;
-            min-width: 70px;
-        }
-        .rank-dropdown {
-            width: 60px;
-            padding: 6px;
-            border: 1px solid #d1d5db;
-            border-radius: 4px;
-            background-color: white;
-            font-size: 14px;
-        }
-        .doc-content {
-            flex: 1;
-            line-height: 1.5;
-            padding: 5px 0;
-        }
-        </style>
-        """
-        gr.HTML(js_code)
         submit_btn.click(
-            save_ranking,
-            inputs=[order_state, current_sample_id],
             outputs=[status_box, progress_text]
         )
         next_btn.click(
-            next_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
         ).then(
             load_sample,
             inputs=[current_sample_id],
-            outputs=[query_text, sortable_list, order_state, progress_text, status_box]
         )
         prev_btn.click(
-            prev_sample_id, inputs=[current_sample_id], outputs=[current_sample_id]
         ).then(
             load_sample,
             inputs=[current_sample_id],
-            outputs=[query_text, sortable_list, order_state, progress_text, status_box]
         )
         save_btn.click(save_results, outputs=[status_box])
-        demo.load(lambda: load_sample(samples[0]['id']),
-                  outputs=[query_text, sortable_list, order_state, progress_text, status_box])
     return demo

 from pathlib import Path
 def create_reranking_interface(task_data):
+    """Create a Gradio interface for reranking evaluation."""
     samples = task_data["samples"]
     results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
     completed_samples = {s["id"]: False for s in samples}
+    # Try to load existing results
+    output_path = f"{task_data['task_name']}_human_results.json"
+    if os.path.exists(output_path):
         try:
+            with open(output_path, "r") as f:
+                existing_results = json.load(f)
+                results = existing_results
+                # Update completed samples based on existing annotations
+                for anno in results.get("annotations", []):
+                    if "sample_id" in anno:
+                        completed_samples[anno["sample_id"]] = True
         except Exception as e:
+            print(f"Error loading existing results: {str(e)}")
+    # Create the main interface
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
         with gr.Accordion("Instructions", open=True):
             ### How to use this interface:
             1. Read the query at the top
+            2. For each document, select its rank (1 = most relevant)
+            3. Make sure each document has a unique rank (1 to N)
             4. Click "Submit Rankings" when you're done with the current query
             5. Use "Previous" and "Next" to navigate between queries
             6. Click "Save All Results" periodically to ensure your work is saved
             """.format(instructions=task_data["instructions"]))
+        # State variables
         current_sample_id = gr.State(value=samples[0]["id"])
+        # Progress tracking
         with gr.Row():
+            progress_text = gr.Textbox(label="Progress", value=f"Progress: {sum(completed_samples.values())}/{len(samples)}", interactive=False)
             status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
+        # Query display
         with gr.Group():
             gr.Markdown("## Query:")
+            query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False, lines=3)
+            # Validation
             with gr.Row():
+                validate_btn = gr.Button("Validate Rankings", variant="secondary")
+                validation_text = gr.Textbox(label="Validation", interactive=False)
+            # Document ranking section
+            gr.Markdown("## Documents to Rank:")
+            # Container for document elements
+            doc_containers = []
+            rank_inputs = []
+            doc_texts = []
+            # Create a container for up to 10 documents
+            max_docs = 10
+            for i in range(max_docs):
+                with gr.Group(visible=(i < len(samples[0]["candidates"]))) as doc_container:
+                    doc_containers.append(doc_container)
+                    with gr.Row():
+                        # Rank selection
+                        with gr.Column(scale=1, min_width=100):
+                            rank_input = gr.Number(
+                                value=i+1,
+                                label=f"Rank",
+                                minimum=1,
+                                maximum=len(samples[0]["candidates"]),
+                                step=1,
+                                interactive=True
+                            )
+                            rank_inputs.append(rank_input)
+                        # Document text
+                        with gr.Column(scale=4):
+                            doc_text = gr.Textbox(
+                                value=samples[0]["candidates"][i] if i < len(samples[0]["candidates"]) else "",
+                                label=f"Document {i+1}",
+                                lines=4,
+                                interactive=False
+                            )
+                            doc_texts.append(doc_text)
+                    gr.Markdown("---")
+            # Navigation and submission buttons
+            with gr.Row():
+                prev_btn = gr.Button("← Previous Query", size="sm")
+                submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary")
+                next_btn = gr.Button("Next →", size="sm")
             save_btn = gr.Button("💾 Save All Results", variant="secondary")
+        # Function to validate rankings
+        def validate_rankings(*ranks):
+            try:
+                # Filter out None values
+                valid_ranks = [int(r) for r in ranks if r is not None]
+                # Check for duplicates
+                if len(set(valid_ranks)) != len(valid_ranks):
+                    # Find duplicate ranks
+                    dupes = {}
+                    for r in valid_ranks:
+                        dupes[r] = dupes.get(r, 0) + 1
+                    duplicates = [r for r, count in dupes.items() if count > 1]
+                    return f"��️ Duplicate ranks found: {', '.join(str(d) for d in sorted(duplicates))}. Each document must have a unique rank."
+                # Check for complete ranking
+                max_rank = max(valid_ranks) if valid_ranks else 0
+                expected_ranks = set(range(1, max_rank + 1))
+                if set(valid_ranks) != expected_ranks:
+                    missing = sorted(expected_ranks - set(valid_ranks))
+                    if missing:
+                        return f"⚠️ Missing ranks: {', '.join(str(m) for m in missing)}. Ranks must be consecutive integers from 1 to {max_rank}."
+                return "✓ Rankings are valid! Ready to submit."
+            except Exception as e:
+                return f"Error validating rankings: {str(e)}"
+        # Function to load a sample
+        def load_sample(sample_id):
+            try:
+                sample = next((s for s in samples if s["id"] == sample_id), None)
+                if not sample:
+                    return [gr.update()] * (3 + 2*max_docs)
+                candidates = sample["candidates"]
+                num_docs = len(candidates)
+                # Get existing ranking if available
+                existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
+                # Set default ranks (from existing or sequential)
+                ranks = []
+                for i in range(num_docs):
+                    if existing_ranking and i < len(existing_ranking):
+                        ranks.append(existing_ranking[i])
+                    else:
+                        ranks.append(i + 1)
+                # Set container visibility
+                container_visibility = [i < num_docs for i in range(max_docs)]
+                # Update maximum values for number inputs
+                for input_field in rank_inputs:
+                    input_field.maximum = num_docs
+                # Fill in document contents
+                docs = [candidates[i] if i < num_docs else "" for i in range(max_docs)]
+                # Update visuals based on completed status
+                status = "Already ranked" if completed_samples.get(sample_id, False) else "Ready to rank"
+                progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+                # Prepare all outputs
+                outputs = [sample["query"], progress, status]
+                outputs.extend(ranks)  # Rank values
+                outputs.extend(docs)   # Document texts
+                outputs.extend(container_visibility)  # Container visibilities
+                return outputs
+            except Exception as e:
+                import traceback
+                print(traceback.format_exc())
+                return [gr.update(value=f"Error loading sample: {str(e)}")] + [gr.update()] * (2 + 2*max_docs)
+        # Function to save rankings
+        def save_rankings(sample_id, *ranks):
+            try:
+                # Get the sample
+                sample = next((s for s in samples if s["id"] == sample_id), None)
+                if not sample:
+                    return "⚠️ Sample not found", progress_text.value
+                num_candidates = len(sample["candidates"])
+                # Get the rankings for just this sample
+                valid_ranks = [int(r) for r in ranks[:num_candidates] if r is not None]
+                # Validate rankings
+                if len(valid_ranks) != num_candidates:
+                    return f"⚠️ Not all documents have ranks. Expected {num_candidates}, got {len(valid_ranks)}.", progress_text.value
+                if sorted(valid_ranks) != list(range(1, num_candidates + 1)):
+                    return "⚠️ Rankings must include all integers from 1 to " + str(num_candidates), progress_text.value
+                # Create annotation
+                annotation = {"sample_id": sample_id, "rankings": valid_ranks}
+                # Update or add the annotation
+                existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
+                if existing_idx is not None:
+                    results["annotations"][existing_idx] = annotation
+                else:
+                    results["annotations"].append(annotation)
+                # Mark sample as completed
+                completed_samples[sample_id] = True
+                # Save to file
+                with open(output_path, "w") as f:
+                    json.dump(results, f, indent=2)
+                # Update progress
+                progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+                return f"✅ Rankings saved successfully! ({sum(completed_samples.values())}/{len(samples)} completed)", progress
+            except Exception as e:
+                import traceback
+                print(traceback.format_exc())
+                return f"Error saving rankings: {str(e)}", progress_text.value
+        # Function to navigate to next sample
+        def next_sample_id(current_id):
+            current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
+            if current_idx == -1:
+                return current_id
+            next_idx = min(current_idx + 1, len(samples) - 1)
+            return samples[next_idx]["id"]
+        # Function to navigate to previous sample
+        def prev_sample_id(current_id):
+            current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
+            if current_idx == -1:
+                return current_id
+            prev_idx = max(current_idx - 1, 0)
+            return samples[prev_idx]["id"]
+        # Function to save all results
+        def save_results():
+            try:
+                with open(output_path, "w") as f:
+                    json.dump(results, f, indent=2)
+                return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
+            except Exception as e:
+                return f"⚠️ Error saving results file: {str(e)}"
+        # Connect validation button
+        validate_btn.click(
+            validate_rankings,
+            inputs=rank_inputs,
+            outputs=validation_text
+        )
+        # Connect submission button
         submit_btn.click(
+            save_rankings,
+            inputs=[current_sample_id] + rank_inputs,
             outputs=[status_box, progress_text]
         )
+        # Connect navigation buttons
         next_btn.click(
+            next_sample_id,
+            inputs=[current_sample_id],
+            outputs=[current_sample_id]
         ).then(
             load_sample,
             inputs=[current_sample_id],
+            outputs=[query_text, progress_text, status_box] +
+                    rank_inputs +
+                    doc_texts +
+                    doc_containers
         )
         prev_btn.click(
+            prev_sample_id,
+            inputs=[current_sample_id],
+            outputs=[current_sample_id]
         ).then(
             load_sample,
             inputs=[current_sample_id],
+            outputs=[query_text, progress_text, status_box] +
+                    rank_inputs +
+                    doc_texts +
+                    doc_containers
         )
+        # Connect save button
         save_btn.click(save_results, outputs=[status_box])
+        # Initialize interface with first sample
+        demo.load(
+            lambda: load_sample(samples[0]['id']),
+            outputs=[query_text, progress_text, status_box] +
+                    rank_inputs +
+                    doc_texts +
+                    doc_containers
+        )
+        # Add CSS styling
+        demo.load(lambda: gr.Accordion.update(open=True), outputs=[])
     return demo