Spaces:

AdnanElAssadi
/

MTEB-Human-Eval-Demo

Sleeping

App Files Files Community

AdnanElAssadi commited on Apr 7

Commit

e1f1819

verified ·

1 Parent(s): 9069a07

Update app.py

Browse files

Files changed (1) hide show

app.py +179 -282

app.py CHANGED Viewed

@@ -3,6 +3,37 @@ import json
 import os
 from pathlib import Path
 def create_reranking_interface(task_data):
     """Create a Gradio interface for reranking evaluation."""
     samples = task_data["samples"]
@@ -18,7 +49,7 @@ def create_reranking_interface(task_data):
             if not current_order:
                 return "⚠️ No document ordering found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-            # Convert document positions to rankings (position in list -> document's rank)
             # First document (position 0) gets rank 1, etc.
             rankings = []
             for i, doc_idx in enumerate(current_order):
@@ -45,65 +76,16 @@ def create_reranking_interface(task_data):
                 with open(output_path, "w") as f:
                     json.dump(results, f, indent=2)
                 return f"✅ Rankings saved successfully (in memory and to file)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-            except:
                 # If file saving fails, still mark as success since we saved in memory
-                return f"✅ Rankings saved in memory (file save failed)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
         except Exception as e:
             # Return specific error message
             return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-    with gr.Blocks(theme=gr.themes.Soft(), css="""
-        .document-text {
-            font-size: 16px;
-            padding: 10px;
-            border-radius: 8px;
-            background-color: #f8f9fa;
-            border-left: 5px solid #4CAF50;
-        }
-        .query-box {
-            background-color: #e3f2fd;
-            padding: 16px;
-            border-radius: 8px;
-            border-left: 5px solid #2196F3;
-            font-size: 18px;
-            margin-bottom: 20px;
-        }
-        .progress-indicator {
-            font-weight: bold;
-            text-align: center;
-            padding: 12px;
-            background-color: #f1f8e9;
-            border-radius: 8px;
-            margin: 10px 0;
-        }
-        .status-box {
-            font-weight: bold;
-            text-align: center;
-        }
-        .ranking-box {
-            border: 1px solid #e0e0e0;
-            border-radius: 8px;
-            padding: 15px;
-            margin-bottom: 12px;
-            transition: all 0.3s;
-        }
-        .ranking-box:hover {
-            box-shadow: 0 4px 8px rgba(0,0,0,0.1);
-        }
-        /* Add different colors for rank levels */
-        .rank-1, .rank-2, .rank-3 {
-            border-left: 5px solid #4CAF50; /* Green for top ranks */
-        }
-        .rank-4, .rank-5, .rank-6, .rank-7 {
-            border-left: 5px solid #FFC107; /* Yellow for mid ranks */
-        }
-        .rank-8, .rank-9, .rank-10 {
-            border-left: 5px solid #FF5722; /* Orange for lower ranks */
-        }
-        .rank-11, .rank-12, .rank-13, .rank-14, .rank-15, .rank-16, .rank-17, .rank-18, .rank-19, .rank-20 {
-            border-left: 5px solid #9E9E9E; /* Gray for lowest ranks */
-        }
-    """) as demo:
         gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
         with gr.Accordion("Instructions", open=True):
@@ -119,18 +101,17 @@ def create_reranking_interface(task_data):
                (most relevant at the top, least relevant at the bottom)
             4. Click "Submit Rankings" when you're done with the current query
             5. Use "Previous" and "Next" to navigate between queries
-            6. Click "Save All Results" periodically to ensure your work is saved
             """.format(instructions=task_data["instructions"]))
         current_sample_id = gr.State(value=samples[0]["id"])
-        with gr.Row(elem_classes=["progress-indicator"]):
             progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
-            status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False, elem_classes=["status-box"])
         with gr.Group():
             gr.Markdown("## Query:")
-            query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False, elem_classes=["query-box"])
             gr.Markdown("## Documents (Arrange in order of relevance, most relevant at top):")
@@ -140,190 +121,167 @@ def create_reranking_interface(task_data):
             # Function to initialize the document list for a sample
             def initialize_document_list(sample_id):
                 nonlocal current_order
-                sample = next((s for s in samples if s["id"] == sample_id), None)
-                if not sample:
-                    return "Query not found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
-                # Get the documents for this sample
-                docs = sample["candidates"]
-                # Initialize document order (0, 1, 2, ..., n-1)
-                current_order = list(range(len(docs)))
-                # Check if this sample has already been annotated to restore ordering
-                existing_annotation = next((a for a in results["annotations"] if a["sample_id"] == sample_id), None)
-                if existing_annotation and "rankings" in existing_annotation:
-                    # Create pairs of (doc_idx, rank)
-                    ranked_docs = []
-                    for doc_idx, rank in enumerate(existing_annotation["rankings"]):
-                        ranked_docs.append((doc_idx, rank))
-                    # Sort by rank (ascending)
-                    ranked_docs.sort(key=lambda x: x[1])
-                    # Extract document indices in rank order
-                    current_order = [doc[0] for doc in ranked_docs]
-                # Update UI
-                for i in range(len(document_containers)):
-                    if i < len(docs):
-                        doc_idx = current_order[i]
-                        rank_class = f"rank-{i+1}"
-                        document_containers[i].update(
-                            value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}",
-                            visible=True,
-                            elem_classes=["document-text", rank_class]
-                        )
-                    else:
-                        document_containers[i].update(value="", visible=False)
-                # Status message
-                status = f"Viewing query {samples.index(sample) + 1} of {len(samples)}"
-                if completed_samples[sample_id]:
-                    status += " (already completed)"
-                return status, f"Progress: {sum(completed_samples.values())}/{len(samples)}"
             # Create document display containers with up/down buttons
             with gr.Column():
-                gr.Markdown("""
-                ### Instructions for Ranking:
-                - Documents are initially shown in their original order
-                - The most relevant document should be at the TOP (Rank 1)
-                - Use the "Move Up" and "Move Down" buttons to rearrange documents
-                - The rank is shown at the beginning of each document: [RANK X]
-                - When you're satisfied with the order, click "Submit Rankings"
-                """)
-                for i in range(20):  # Now handling up to 20 documents per sample
                     with gr.Group():
-                        with gr.Box():
-                            with gr.Row():
-                                rank_label = gr.Markdown(f"**Document #{i+1}**")
-                                with gr.Column(scale=1):
-                                    up_btn = gr.Button("⬆️ Move Up", size="sm", variant="primary")
-                                    down_btn = gr.Button("⬇️ Move Down", size="sm", variant="secondary")
-                            doc_text = gr.Textbox(
-                                label="",
-                                interactive=False,
-                                elem_id=f"doc-text-{i}",
-                                elem_classes=["document-text"]
-                            )
-                            document_containers.append(doc_text)
-                        # Create a closure that properly captures the current index
                         def make_up_handler(idx):
                             def up_handler():
                                 nonlocal current_order
-                                # Only move if index is valid
-                                if idx < len(current_order) and idx > 0:
-                                    # Swap with the document above
                                     current_order[idx], current_order[idx-1] = current_order[idx-1], current_order[idx]
-                                    # Update all document displays with new order
                                     sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
                                     if sample:
                                         docs = sample["candidates"]
-                                        updates = []
                                         for j in range(len(document_containers)):
-                                            if j < len(current_order) and j < len(docs):
                                                 doc_idx = current_order[j]
-                                                # Add rank-specific class for styling
-                                                rank_class = f"rank-{j+1}"
-                                                updates.append(gr.update(
-                                                    value=f"[RANK {j+1}] Document {doc_idx+1}: {docs[doc_idx]}",
-                                                    visible=True,
-                                                    elem_classes=["document-text", rank_class]
-                                                ))
-                                            else:
-                                                updates.append(gr.update(value="", visible=False))
-                                        return updates
-                                return [gr.update() for _ in document_containers]
                             return up_handler
                         def make_down_handler(idx):
                             def down_handler():
                                 nonlocal current_order
-                                # Only move if index is valid
                                 if idx < len(current_order) - 1:
-                                    # Swap with the document below
                                     current_order[idx], current_order[idx+1] = current_order[idx+1], current_order[idx]
-                                    # Update all document displays with new order
                                     sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
                                     if sample:
                                         docs = sample["candidates"]
-                                        updates = []
                                         for j in range(len(document_containers)):
-                                            if j < len(current_order) and j < len(docs):
                                                 doc_idx = current_order[j]
-                                                # Add rank-specific class for styling
-                                                rank_class = f"rank-{j+1}"
-                                                updates.append(gr.update(
-                                                    value=f"[RANK {j+1}] Document {doc_idx+1}: {docs[doc_idx]}",
-                                                    visible=True,
-                                                    elem_classes=["document-text", rank_class]
-                                                ))
-                                            else:
-                                                updates.append(gr.update(value="", visible=False))
-                                        return updates
-                                return [gr.update() for _ in document_containers]
                             return down_handler
-                        # Connect buttons with properly created handlers
                         up_btn.click(
-                            make_up_handler(i),
-                            inputs=[],
-                            outputs=document_containers
                         )
                         down_btn.click(
-                            make_down_handler(i),
-                            inputs=[],
-                            outputs=document_containers
                         )
-                with gr.Row(equal_height=True):
-                    prev_btn = gr.Button("← Previous Query", size="lg", variant="secondary")
-                    submit_btn = gr.Button("💾 Submit Rankings", size="lg", variant="primary")
-                    next_btn = gr.Button("Next Query →", size="lg", variant="secondary")
                 with gr.Row():
-                    save_btn = gr.Button("💾 Save All Results", variant="primary", size="lg")
-                    # Add a clear button to reset rankings
-                    clear_btn = gr.Button("🔄 Reset Order", variant="secondary", size="lg")
-            # Initialize the document list for the first sample
-            status_val, progress_val = initialize_document_list(samples[0]["id"])
-            status_box.value = status_val
-            progress_text.value = progress_val
             # Navigation functions
             def nav_to_prev(current_id):
-                current_sample = next((s for s in samples if s["id"] == current_id), None)
-                if not current_sample:
                     return current_id
-                current_idx = samples.index(current_sample)
-                if current_idx > 0:
-                    prev_sample = samples[current_idx - 1]
-                    return prev_sample["id"]
-                return current_id
             def nav_to_next(current_id):
-                current_sample = next((s for s in samples if s["id"] == current_id), None)
-                if not current_sample:
                     return current_id
-                current_idx = samples.index(current_sample)
-                if current_idx < len(samples) - 1:
-                    next_sample = samples[current_idx + 1]
-                    return next_sample["id"]
-                return current_id
             # Connect navigation buttons
             prev_btn.click(
@@ -331,11 +289,11 @@ def create_reranking_interface(task_data):
                 inputs=[current_sample_id],
                 outputs=[current_sample_id]
             ).then(
-                lambda id: initialize_document_list(id),
                 inputs=[current_sample_id],
                 outputs=[status_box, progress_text]
             ).then(
-                lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
                 inputs=[current_sample_id],
                 outputs=[query_text]
             )
@@ -345,107 +303,46 @@ def create_reranking_interface(task_data):
                 inputs=[current_sample_id],
                 outputs=[current_sample_id]
             ).then(
-                lambda id: initialize_document_list(id),
                 inputs=[current_sample_id],
                 outputs=[status_box, progress_text]
             ).then(
-                lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
                 inputs=[current_sample_id],
                 outputs=[query_text]
             )
-            # Submit button
             submit_btn.click(
                 save_ranking,
                 inputs=[current_sample_id],
                 outputs=[status_box, progress_text]
             )
-            # Add function to reset the current ranking order
-            def reset_order():
-                nonlocal current_order
-                sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
-                if not sample:
-                    return [gr.update() for _ in document_containers]
-                docs = sample["candidates"]
-                # Reset to original order
-                current_order = list(range(len(docs)))
-                # Update UI
-                updates = []
-                for i in range(len(document_containers)):
-                    if i < len(docs):
-                        doc_idx = current_order[i]
-                        rank_class = f"rank-{i+1}"
-                        updates.append(gr.update(
-                            value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}",
-                            visible=True,
-                            elem_classes=["document-text", rank_class]
-                        ))
-                    else:
-                        updates.append(gr.update(value="", visible=False))
-                return updates
-            # Connect reset button
-            clear_btn.click(
-                reset_order,
-                inputs=[],
-                outputs=document_containers
-            )
-            # Save all results
-            def save_results():
-                """Save all collected results to a file."""
-                output_path = f"{task_data['task_name']}_human_results.json"
-                with open(output_path, "w") as f:
-                    json.dump(results, f, indent=2)
-                return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
-            save_btn.click(save_results, outputs=[status_box])
     return demo
-# Just use a simplified version with the main demo functionality
-def get_task_file():
-    """Get the task file to use for the demo."""
-    # Try to load the default example
-    default_task = "AskUbuntuDupQuestions_human_eval.json"
-    if os.path.exists(default_task):
-        return default_task
-    # If default doesn't exist, look for any other task files
-    task_files = [f for f in os.listdir(".") if f.endswith("_human_eval.json")]
-    if task_files:
-        return task_files[0]
-    # Raise error if no task files found
-    raise FileNotFoundError("No task files found. Please ensure there's a *_human_eval.json file in the current directory.")
-# Main app with simplified structure
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("# MTEB Human Evaluation Demo")
     try:
-        # Load the task file
-        task_file = get_task_file()
-        with open(task_file, "r") as f:
-            task_data = json.load(f)
-        # Show which task is currently loaded
-        gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
-        # Display the interface
-        reranking_demo = create_reranking_interface(task_data)
     except Exception as e:
-        gr.Markdown(f"**Error loading task: {str(e)}**")
-        error_details = gr.Textbox(
-            value=str(e),
-            label="Error Details",
-            interactive=False
-        )
-if __name__ == "__main__":
-    app.launch()

 import os
 from pathlib import Path
+# Create a minimal demo with hardcoded sample data
+SAMPLE_TASK = {
+    "task_name": "Demo Reranking Task",
+    "task_type": "reranking",
+    "instructions": "Rank the documents from most relevant to least relevant for the given query.",
+    "samples": [
+        {
+            "id": "sample_1",
+            "query": "How do I install Python on Windows?",
+            "candidates": [
+                "To install Python on Windows, go to python.org and download the latest installer. Run the installer and make sure to check 'Add Python to PATH' during installation.",
+                "Python is a popular programming language used for web development, data analysis, and machine learning.",
+                "Windows is an operating system developed by Microsoft. It's the most popular desktop operating system worldwide.",
+                "Installing software on Windows typically involves downloading an installer and running it with administrator privileges.",
+                "Programming languages like Python, JavaScript, and Java are essential tools for modern software development."
+            ]
+        },
+        {
+            "id": "sample_2",
+            "query": "What are the benefits of exercise?",
+            "candidates": [
+                "Regular exercise improves cardiovascular health, strengthens muscles, and can help with weight management.",
+                "Exercise releases endorphins which can improve mood and reduce feelings of depression and anxiety.",
+                "A balanced diet is important for maintaining good health and providing energy for daily activities.",
+                "Regular physical activity can reduce the risk of chronic diseases such as heart disease, diabetes, and certain cancers.",
+                "Sleep is essential for recovery and overall health, with most adults needing 7-9 hours per night."
+            ]
+        }
+    ]
+}
 def create_reranking_interface(task_data):
     """Create a Gradio interface for reranking evaluation."""
     samples = task_data["samples"]
             if not current_order:
                 return "⚠️ No document ordering found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+            # Convert document positions to rankings
             # First document (position 0) gets rank 1, etc.
             rankings = []
             for i, doc_idx in enumerate(current_order):
                 with open(output_path, "w") as f:
                     json.dump(results, f, indent=2)
                 return f"✅ Rankings saved successfully (in memory and to file)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+            except Exception as write_error:
+                print(f"Error writing results file: {str(write_error)}")
                 # If file saving fails, still mark as success since we saved in memory
+                return f"✅ Rankings saved in memory (file save failed: {str(write_error)})", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
         except Exception as e:
+            print(f"Error in save_ranking: {str(e)}")
             # Return specific error message
             return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
         with gr.Accordion("Instructions", open=True):
                (most relevant at the top, least relevant at the bottom)
             4. Click "Submit Rankings" when you're done with the current query
             5. Use "Previous" and "Next" to navigate between queries
             """.format(instructions=task_data["instructions"]))
         current_sample_id = gr.State(value=samples[0]["id"])
+        with gr.Row():
             progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
+            status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
         with gr.Group():
             gr.Markdown("## Query:")
+            query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
             gr.Markdown("## Documents (Arrange in order of relevance, most relevant at top):")
             # Function to initialize the document list for a sample
             def initialize_document_list(sample_id):
                 nonlocal current_order
+                try:
+                    sample = next((s for s in samples if s["id"] == sample_id), None)
+                    if not sample:
+                        return "Query not found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+                    # Get the documents for this sample
+                    docs = sample["candidates"]
+                    # Initialize document order (0, 1, 2, ..., n-1)
+                    current_order = list(range(len(docs)))
+                    # Check if this sample has already been annotated to restore ordering
+                    existing_annotation = next((a for a in results["annotations"] if a["sample_id"] == sample_id), None)
+                    if existing_annotation and "rankings" in existing_annotation:
+                        # Create pairs of (doc_idx, rank)
+                        ranked_docs = []
+                        for doc_idx, rank in enumerate(existing_annotation["rankings"]):
+                            ranked_docs.append((doc_idx, rank))
+                        # Sort by rank (ascending)
+                        ranked_docs.sort(key=lambda x: x[1])
+                        # Extract document indices in rank order
+                        current_order = [doc[0] for doc in ranked_docs]
+                    # Update UI
+                    for i in range(len(document_containers)):
+                        if i < len(docs):
+                            doc_idx = current_order[i]
+                            document_containers[i].value = f"Document {doc_idx+1} (Rank {i+1}): {docs[doc_idx]}"
+                        else:
+                            document_containers[i].value = ""  # Clear unused containers
+                    # Status message
+                    status = f"Viewing query {samples.index(sample) + 1} of {len(samples)}"
+                    if completed_samples[sample_id]:
+                        status += " (already completed)"
+                    return status, f"Progress: {sum(completed_samples.values())}/{len(samples)}"
+                except Exception as e:
+                    print(f"Error in initialize_document_list: {str(e)}")
+                    return f"Error initializing documents: {str(e)}", "Progress: 0/0"
             # Create document display containers with up/down buttons
             with gr.Column():
+                # Display up to 10 documents (or however many are in the largest sample)
+                max_docs = max(len(s["candidates"]) for s in samples)
+                for i in range(max_docs):
                     with gr.Group():
+                        doc_text = gr.Textbox(label=f"Document {i+1}", interactive=False)
+                        document_containers.append(doc_text)
+                        with gr.Row():
+                            up_btn = gr.Button(f"⬆️ Move Up", size="sm")
+                            down_btn = gr.Button(f"⬇️ Move Down", size="sm")
+                        # Create closures for up/down buttons
                         def make_up_handler(idx):
                             def up_handler():
                                 nonlocal current_order
+                                if idx > 0:
+                                    # Swap with document above
                                     current_order[idx], current_order[idx-1] = current_order[idx-1], current_order[idx]
+                                    # Get current sample
                                     sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
                                     if sample:
                                         docs = sample["candidates"]
+                                        # Update document displays
                                         for j in range(len(document_containers)):
+                                            if j < len(docs):
                                                 doc_idx = current_order[j]
+                                                document_containers[j].value = f"Document {doc_idx+1} (Rank {j+1}): {docs[doc_idx]}"
+                                # Return empty list since we update the containers directly
+                                return {}
                             return up_handler
                         def make_down_handler(idx):
                             def down_handler():
                                 nonlocal current_order
                                 if idx < len(current_order) - 1:
+                                    # Swap with document below
                                     current_order[idx], current_order[idx+1] = current_order[idx+1], current_order[idx]
+                                    # Get current sample
                                     sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
                                     if sample:
                                         docs = sample["candidates"]
+                                        # Update document displays
                                         for j in range(len(document_containers)):
+                                            if j < len(docs):
                                                 doc_idx = current_order[j]
+                                                document_containers[j].value = f"Document {doc_idx+1} (Rank {j+1}): {docs[doc_idx]}"
+                                # Return empty list since we update the containers directly
+                                return {}
                             return down_handler
+                        # Connect buttons
                         up_btn.click(
+                            fn=make_up_handler(i),
+                            inputs=None,
+                            outputs=None
                         )
                         down_btn.click(
+                            fn=make_down_handler(i),
+                            inputs=None,
+                            outputs=None
                         )
                 with gr.Row():
+                    prev_btn = gr.Button("← Previous Query", size="sm")
+                    submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary")
+                    next_btn = gr.Button("Next Query →", size="sm")
             # Navigation functions
             def nav_to_prev(current_id):
+                try:
+                    current_sample = next((s for s in samples if s["id"] == current_id), None)
+                    if not current_sample:
+                        return current_id
+                    current_idx = samples.index(current_sample)
+                    if current_idx > 0:
+                        prev_sample = samples[current_idx - 1]
+                        return prev_sample["id"]
+                    return current_id
+                except Exception as e:
+                    print(f"Error in nav_to_prev: {str(e)}")
                     return current_id
             def nav_to_next(current_id):
+                try:
+                    current_sample = next((s for s in samples if s["id"] == current_id), None)
+                    if not current_sample:
+                        return current_id
+                    current_idx = samples.index(current_sample)
+                    if current_idx < len(samples) - 1:
+                        next_sample = samples[current_idx + 1]
+                        return next_sample["id"]
                     return current_id
+                except Exception as e:
+                    print(f"Error in nav_to_next: {str(e)}")
+                    return current_id
+            # Update query text
+            def update_query(sample_id):
+                try:
+                    sample = next((s for s in samples if s["id"] == sample_id), None)
+                    if not sample:
+                        return ""
+                    return sample["query"]
+                except Exception as e:
+                    print(f"Error in update_query: {str(e)}")
+                    return "Error loading query"
+            # Initialize the first sample
+            initialize_document_list(samples[0]["id"])
             # Connect navigation buttons
             prev_btn.click(
                 inputs=[current_sample_id],
                 outputs=[current_sample_id]
             ).then(
+                initialize_document_list,
                 inputs=[current_sample_id],
                 outputs=[status_box, progress_text]
             ).then(
+                update_query,
                 inputs=[current_sample_id],
                 outputs=[query_text]
             )
                 inputs=[current_sample_id],
                 outputs=[current_sample_id]
             ).then(
+                initialize_document_list,
                 inputs=[current_sample_id],
                 outputs=[status_box, progress_text]
             ).then(
+                update_query,
                 inputs=[current_sample_id],
                 outputs=[query_text]
             )
+            # Connect submit button
             submit_btn.click(
                 save_ranking,
                 inputs=[current_sample_id],
                 outputs=[status_box, progress_text]
             )
     return demo
+# Simple Gradio app with hardcoded sample data
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("# MTEB Human Evaluation Demo")
+    gr.Markdown("## Sample Reranking Task")
+    # Create and display the demo interface
+    demo = create_reranking_interface(SAMPLE_TASK)
+if __name__ == "__main__":
     try:
+        print("Starting MTEB Human Evaluation App with hardcoded sample data...")
+        print(f"Current directory: {os.getcwd()}")
+        # Launch with hardcoded sample data
+        app.launch(debug=True)
     except Exception as e:
+        print(f"ERROR STARTING APP: {str(e)}")
+        # Create an ultra-simple fallback app
+        with gr.Blocks() as fallback_app:
+            gr.Markdown("# MTEB Human Evaluation - Emergency Fallback Mode")
+            gr.Markdown("There was an error loading the application. Please see details below.")
+            gr.Textbox(value=str(e), label="Error", interactive=False)
+        fallback_app.launch()