Spaces:

AdnanElAssadi
/

MTEB-Human-Eval-Demo

Sleeping

App Files Files Community

AdnanElAssadi commited on Apr 7

Commit

9069a07

verified ·

1 Parent(s): f39c123

Update app.py

Browse files

Files changed (1) hide show

app.py +149 -352

app.py CHANGED Viewed

@@ -286,369 +286,166 @@ def create_reranking_interface(task_data):
                             inputs=[],
                             outputs=document_containers
                         )
-            with gr.Row(equal_height=True):
-                prev_btn = gr.Button("← Previous Query", size="lg", variant="secondary")
-                submit_btn = gr.Button("💾 Submit Rankings", size="lg", variant="primary")
-                next_btn = gr.Button("Next Query →", size="lg", variant="secondary")
-            with gr.Row():
-                save_btn = gr.Button("💾 Save All Results", variant="primary", size="lg")
-                # Add a clear button to reset rankings
-                clear_btn = gr.Button("🔄 Reset Order", variant="secondary", size="lg")
-        # Initialize the document list for the first sample
-        status_val, progress_val = initialize_document_list(samples[0]["id"])
-        status_box.value = status_val
-        progress_text.value = progress_val
-        # Navigation functions
-        def nav_to_prev(current_id):
-            current_sample = next((s for s in samples if s["id"] == current_id), None)
-            if not current_sample:
                 return current_id
-            current_idx = samples.index(current_sample)
-            if current_idx > 0:
-                prev_sample = samples[current_idx - 1]
-                return prev_sample["id"]
-            return current_id
-        def nav_to_next(current_id):
-            current_sample = next((s for s in samples if s["id"] == current_id), None)
-            if not current_sample:
                 return current_id
-            current_idx = samples.index(current_sample)
-            if current_idx < len(samples) - 1:
-                next_sample = samples[current_idx + 1]
-                return next_sample["id"]
-            return current_id
-        # Connect navigation buttons
-        prev_btn.click(
-            nav_to_prev,
-            inputs=[current_sample_id],
-            outputs=[current_sample_id]
-        ).then(
-            lambda id: initialize_document_list(id),
-            inputs=[current_sample_id],
-            outputs=[status_box, progress_text]
-        ).then(
-            lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
-            inputs=[current_sample_id],
-            outputs=[query_text]
-        )
-        next_btn.click(
-            nav_to_next,
-            inputs=[current_sample_id],
-            outputs=[current_sample_id]
-        ).then(
-            lambda id: initialize_document_list(id),
-            inputs=[current_sample_id],
-            outputs=[status_box, progress_text]
-        ).then(
-            lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
-            inputs=[current_sample_id],
-            outputs=[query_text]
-        )
-        # Submit button
-        submit_btn.click(
-            save_ranking,
-            inputs=[current_sample_id],
-            outputs=[status_box, progress_text]
-        )
-        # Add function to reset the current ranking order
-        def reset_order():
-            nonlocal current_order
-            sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
-            if not sample:
-                return [gr.update() for _ in document_containers]
-            docs = sample["candidates"]
-            # Reset to original order
-            current_order = list(range(len(docs)))
-            # Update UI
-            updates = []
-            for i in range(len(document_containers)):
-                if i < len(docs):
-                    doc_idx = current_order[i]
-                    rank_class = f"rank-{i+1}"
-                    updates.append(gr.update(
-                        value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}",
-                        visible=True,
-                        elem_classes=["document-text", rank_class]
-                    ))
-                else:
-                    updates.append(gr.update(value="", visible=False))
-            return updates
-        # Connect reset button
-        clear_btn.click(
-            reset_order,
-            inputs=[],
-            outputs=document_containers
-        )
-        # Save all results
-        def save_results():
-            """Save all collected results to a file."""
-            output_path = f"{task_data['task_name']}_human_results.json"
-            with open(output_path, "w") as f:
-                json.dump(results, f, indent=2)
-            return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
-        save_btn.click(save_results, outputs=[status_box])
     return demo
-# Main app with file upload capability
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# MTEB Human Evaluation Demo")
-    with gr.Tabs():
-        with gr.TabItem("Demo"):
-            gr.Markdown("""
-            ## MTEB Human Evaluation Interface
-            This interface allows you to evaluate the relevance of documents for reranking tasks.
-            """)
-            # Function to get the most recent task file
-            def get_latest_task_file():
-                # Check first in uploaded_tasks directory
-                os.makedirs("uploaded_tasks", exist_ok=True)
-                uploaded_tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
-                if uploaded_tasks:
-                    # Sort by modification time, newest first
-                    uploaded_tasks.sort(key=lambda x: os.path.getmtime(os.path.join("uploaded_tasks", x)), reverse=True)
-                    return os.path.join("uploaded_tasks", uploaded_tasks[0])
-                # Fall back to default example
-                return "AskUbuntuDupQuestions_human_eval.json"
-            # Load the task file
-            task_file = get_latest_task_file()
-            try:
-                with open(task_file, "r") as f:
-                    task_data = json.load(f)
-                # Show which task is currently loaded
-                gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
-                # Display the interface
-                reranking_demo = create_reranking_interface(task_data)
-            except Exception as e:
-                gr.Markdown(f"**Error loading task: {str(e)}**")
-                gr.Markdown("Please upload a valid task file in the 'Upload & Evaluate' tab.")
-        with gr.TabItem("Upload & Evaluate"):
-            gr.Markdown("""
-            ## Upload Your Own Task File
-            If you have a prepared task file, you can upload it here to create an evaluation interface.
-            """)
-            with gr.Row():
-                with gr.Column(scale=1):
-                    file_input = gr.File(label="Upload a task file (JSON)")
-                    load_btn = gr.Button("Load Task")
-                    upload_message = gr.Textbox(label="Status", interactive=False, value="")
-                    # Add task list for previously uploaded tasks
-                    gr.Markdown("### Previous Uploads")
-                    # Function to list existing task files in the tasks directory
-                    def list_task_files():
-                        os.makedirs("uploaded_tasks", exist_ok=True)
-                        tasks = [f for f in os.listdir("uploaded_tasks") if f.endswith(".json")]
-                        if not tasks:
-                            return "No task files uploaded yet."
-                        return "\n".join([f"- {t}" for t in tasks])
-                    task_list = gr.Markdown(list_task_files())
-                    refresh_btn = gr.Button("Refresh List")
-                    # Add results management section
-                    gr.Markdown("### Results Management")
-                    # Function to list existing result files
-                    def list_result_files():
-                        results = [f for f in os.listdir(".") if f.endswith("_human_results.json")]
-                        if not results:
-                            return "No result files available yet."
-                        result_links = []
-                        for r in results:
-                            # Calculate completion stats
-                            try:
-                                with open(r, "r") as f:
-                                    result_data = json.load(f)
-                                annotation_count = len(result_data.get("annotations", []))
-                                task_name = result_data.get("task_name", "Unknown")
-                                result_links.append(f"- {r} ({annotation_count} annotations for {task_name})")
-                            except:
-                                result_links.append(f"- {r}")
-                        return "\n".join(result_links)
-                    results_list = gr.Markdown(list_result_files())
-                    download_results_btn = gr.Button("Download Results")
-                # Right side - will contain the actual interface
-                with gr.Column(scale=2):
-                    task_container = gr.HTML()
-            # Handle file upload and storage
-            def handle_upload(file):
-                if not file:
-                    return "Please upload a task file", task_list.value, task_container.value
-                try:
-                    # Create directory if it doesn't exist
-                    os.makedirs("uploaded_tasks", exist_ok=True)
-                    # Read the uploaded file
-                    with open(file.name, "r") as f:
-                        task_data = json.load(f)
-                    # Validate task format
-                    if "task_name" not in task_data or "samples" not in task_data:
-                        return "Invalid task file format. Must contain 'task_name' and 'samples' fields.", task_list.value, task_container.value
-                    # Save to a consistent location
-                    task_filename = f"uploaded_tasks/{task_data['task_name']}_task.json"
-                    with open(task_filename, "w") as f:
-                        json.dump(task_data, f, indent=2)
-                    # Instead of trying to create the interface here,
-                    # we'll return a message with instructions
-                    return f"Task '{task_data['task_name']}' uploaded successfully with {len(task_data['samples'])} samples. Please refresh the app and use the Demo tab to evaluate it.", list_task_files(), f"""
-                    <div style="padding: 20px; background-color: #f0f0f0; border-radius: 10px;">
-                        <h3>Task uploaded successfully!</h3>
-                        <p>Task Name: {task_data['task_name']}</p>
-                        <p>Samples: {len(task_data['samples'])}</p>
-                        <p>To evaluate this task:</p>
-                        <ol>
-                            <li>Refresh the app</li>
-                            <li>The Demo tab will now use your uploaded task</li>
-                            <li>Complete your evaluations</li>
-                            <li>Results will be saved as {task_data['task_name']}_human_results.json</li>
-                        </ol>
-                    </div>
-                    """
-                except Exception as e:
-                    return f"Error processing task file: {str(e)}", task_list.value, task_container.value
-            # Function to prepare results for download
-            def prepare_results_for_download():
-                results = [f for f in os.listdir(".") if f.endswith("_human_results.json")]
-                if not results:
-                    return None
-                # Create a zip file with all results
-                import zipfile
-                zip_path = "mteb_human_eval_results.zip"
-                with zipfile.ZipFile(zip_path, 'w') as zipf:
-                    for r in results:
-                        zipf.write(r)
-                return zip_path
-            # Connect events
-            load_btn.click(handle_upload, inputs=[file_input], outputs=[upload_message, task_list, task_container])
-            refresh_btn.click(list_task_files, outputs=[task_list])
-            download_results_btn.click(prepare_results_for_download, outputs=[gr.File(label="Download Results")])
-        with gr.TabItem("Results Management"):
-            gr.Markdown("""
-            ## Manage Evaluation Results
-            View, download, and analyze your evaluation results.
-            """)
-            # Function to load and display result stats
-            def get_result_stats():
-                results = [f for f in os.listdir(".") if f.endswith("_human_results.json")]
-                if not results:
-                    return "No result files available yet."
-                stats = []
-                for r in results:
-                    try:
-                        with open(r, "r") as f:
-                            result_data = json.load(f)
-                        task_name = result_data.get("task_name", "Unknown")
-                        annotations = result_data.get("annotations", [])
-                        annotation_count = len(annotations)
-                        # Calculate completion percentage
-                        sample_ids = set(a.get("sample_id") for a in annotations)
-                        # Try to get the total sample count from the corresponding task file
-                        total_samples = 0
-                        task_file = f"uploaded_tasks/{task_name}_task.json"
-                        if os.path.exists(task_file):
-                            with open(task_file, "r") as f:
-                                task_data = json.load(f)
-                            total_samples = len(task_data.get("samples", []))
-                        completion = f"{len(sample_ids)}/{total_samples}" if total_samples else f"{len(sample_ids)} samples"
-                        stats.append(f"### {task_name}\n- Annotations: {annotation_count}\n- Completion: {completion}\n- File: {r}")
-                    except Exception as e:
-                        stats.append(f"### {r}\n- Error loading results: {str(e)}")
-                return "\n\n".join(stats)
-            result_stats = gr.Markdown(get_result_stats())
-            refresh_results_btn = gr.Button("Refresh Results")
-            # Add download options
-            with gr.Row():
-                download_all_btn = gr.Button("Download All Results (ZIP)")
-                result_select = gr.Dropdown(choices=[f for f in os.listdir(".") if f.endswith("_human_results.json")], label="Select Result to Download")
-                download_selected_btn = gr.Button("Download Selected")
-            # Add results visualization placeholder
-            gr.Markdown("### Results Visualization")
-            gr.Markdown("*Visualization features will be added in a future update.*")
-            # Connect events
-            refresh_results_btn.click(get_result_stats, outputs=[result_stats])
-            # Function to prepare all results for download as ZIP
-            def prepare_all_results():
-                import zipfile
-                zip_path = "mteb_human_eval_results.zip"
-                with zipfile.ZipFile(zip_path, 'w') as zipf:
-                    for r in [f for f in os.listdir(".") if f.endswith("_human_results.json")]:
-                        zipf.write(r)
-                return zip_path
-            # Function to return a single result file
-            def get_selected_result(filename):
-                if not filename:
-                    return None
-                if os.path.exists(filename):
-                    return filename
-                return None
-            # Update dropdown when refreshing results
-            def update_result_dropdown():
-                return gr.Dropdown.update(choices=[f for f in os.listdir(".") if f.endswith("_human_results.json")])
-            refresh_results_btn.click(update_result_dropdown, outputs=[result_select])
-            download_all_btn.click(prepare_all_results, outputs=[gr.File(label="Download All Results")])
-            download_selected_btn.click(get_selected_result, inputs=[result_select], outputs=[gr.File(label="Download Selected Result")])
 if __name__ == "__main__":
-    demo.launch()

                             inputs=[],
                             outputs=document_containers
                         )
+                with gr.Row(equal_height=True):
+                    prev_btn = gr.Button("← Previous Query", size="lg", variant="secondary")
+                    submit_btn = gr.Button("💾 Submit Rankings", size="lg", variant="primary")
+                    next_btn = gr.Button("Next Query →", size="lg", variant="secondary")
+                with gr.Row():
+                    save_btn = gr.Button("💾 Save All Results", variant="primary", size="lg")
+                    # Add a clear button to reset rankings
+                    clear_btn = gr.Button("🔄 Reset Order", variant="secondary", size="lg")
+            # Initialize the document list for the first sample
+            status_val, progress_val = initialize_document_list(samples[0]["id"])
+            status_box.value = status_val
+            progress_text.value = progress_val
+            # Navigation functions
+            def nav_to_prev(current_id):
+                current_sample = next((s for s in samples if s["id"] == current_id), None)
+                if not current_sample:
+                    return current_id
+                current_idx = samples.index(current_sample)
+                if current_idx > 0:
+                    prev_sample = samples[current_idx - 1]
+                    return prev_sample["id"]
                 return current_id
+            def nav_to_next(current_id):
+                current_sample = next((s for s in samples if s["id"] == current_id), None)
+                if not current_sample:
+                    return current_id
+                current_idx = samples.index(current_sample)
+                if current_idx < len(samples) - 1:
+                    next_sample = samples[current_idx + 1]
+                    return next_sample["id"]
                 return current_id
+            # Connect navigation buttons
+            prev_btn.click(
+                nav_to_prev,
+                inputs=[current_sample_id],
+                outputs=[current_sample_id]
+            ).then(
+                lambda id: initialize_document_list(id),
+                inputs=[current_sample_id],
+                outputs=[status_box, progress_text]
+            ).then(
+                lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
+                inputs=[current_sample_id],
+                outputs=[query_text]
+            )
+            next_btn.click(
+                nav_to_next,
+                inputs=[current_sample_id],
+                outputs=[current_sample_id]
+            ).then(
+                lambda id: initialize_document_list(id),
+                inputs=[current_sample_id],
+                outputs=[status_box, progress_text]
+            ).then(
+                lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
+                inputs=[current_sample_id],
+                outputs=[query_text]
+            )
+            # Submit button
+            submit_btn.click(
+                save_ranking,
+                inputs=[current_sample_id],
+                outputs=[status_box, progress_text]
+            )
+            # Add function to reset the current ranking order
+            def reset_order():
+                nonlocal current_order
+                sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
+                if not sample:
+                    return [gr.update() for _ in document_containers]
+                docs = sample["candidates"]
+                # Reset to original order
+                current_order = list(range(len(docs)))
+                # Update UI
+                updates = []
+                for i in range(len(document_containers)):
+                    if i < len(docs):
+                        doc_idx = current_order[i]
+                        rank_class = f"rank-{i+1}"
+                        updates.append(gr.update(
+                            value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}",
+                            visible=True,
+                            elem_classes=["document-text", rank_class]
+                        ))
+                    else:
+                        updates.append(gr.update(value="", visible=False))
+                return updates
+            # Connect reset button
+            clear_btn.click(
+                reset_order,
+                inputs=[],
+                outputs=document_containers
+            )
+            # Save all results
+            def save_results():
+                """Save all collected results to a file."""
+                output_path = f"{task_data['task_name']}_human_results.json"
+                with open(output_path, "w") as f:
+                    json.dump(results, f, indent=2)
+                return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
+            save_btn.click(save_results, outputs=[status_box])
     return demo
+# Just use a simplified version with the main demo functionality
+def get_task_file():
+    """Get the task file to use for the demo."""
+    # Try to load the default example
+    default_task = "AskUbuntuDupQuestions_human_eval.json"
+    if os.path.exists(default_task):
+        return default_task
+    # If default doesn't exist, look for any other task files
+    task_files = [f for f in os.listdir(".") if f.endswith("_human_eval.json")]
+    if task_files:
+        return task_files[0]
+    # Raise error if no task files found
+    raise FileNotFoundError("No task files found. Please ensure there's a *_human_eval.json file in the current directory.")
+# Main app with simplified structure
+with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("# MTEB Human Evaluation Demo")
+    try:
+        # Load the task file
+        task_file = get_task_file()
+        with open(task_file, "r") as f:
+            task_data = json.load(f)
+        # Show which task is currently loaded
+        gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
+        # Display the interface
+        reranking_demo = create_reranking_interface(task_data)
+    except Exception as e:
+        gr.Markdown(f"**Error loading task: {str(e)}**")
+        error_details = gr.Textbox(
+            value=str(e),
+            label="Error Details",
+            interactive=False
+        )
 if __name__ == "__main__":
+    app.launch()