AdnanElAssadi commited on
Commit
e1f1819
·
verified ·
1 Parent(s): 9069a07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +179 -282
app.py CHANGED
@@ -3,6 +3,37 @@ import json
3
  import os
4
  from pathlib import Path
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def create_reranking_interface(task_data):
7
  """Create a Gradio interface for reranking evaluation."""
8
  samples = task_data["samples"]
@@ -18,7 +49,7 @@ def create_reranking_interface(task_data):
18
  if not current_order:
19
  return "⚠️ No document ordering found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
20
 
21
- # Convert document positions to rankings (position in list -> document's rank)
22
  # First document (position 0) gets rank 1, etc.
23
  rankings = []
24
  for i, doc_idx in enumerate(current_order):
@@ -45,65 +76,16 @@ def create_reranking_interface(task_data):
45
  with open(output_path, "w") as f:
46
  json.dump(results, f, indent=2)
47
  return f"✅ Rankings saved successfully (in memory and to file)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
48
- except:
 
49
  # If file saving fails, still mark as success since we saved in memory
50
- return f"✅ Rankings saved in memory (file save failed)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
51
  except Exception as e:
 
52
  # Return specific error message
53
  return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
54
 
55
- with gr.Blocks(theme=gr.themes.Soft(), css="""
56
- .document-text {
57
- font-size: 16px;
58
- padding: 10px;
59
- border-radius: 8px;
60
- background-color: #f8f9fa;
61
- border-left: 5px solid #4CAF50;
62
- }
63
- .query-box {
64
- background-color: #e3f2fd;
65
- padding: 16px;
66
- border-radius: 8px;
67
- border-left: 5px solid #2196F3;
68
- font-size: 18px;
69
- margin-bottom: 20px;
70
- }
71
- .progress-indicator {
72
- font-weight: bold;
73
- text-align: center;
74
- padding: 12px;
75
- background-color: #f1f8e9;
76
- border-radius: 8px;
77
- margin: 10px 0;
78
- }
79
- .status-box {
80
- font-weight: bold;
81
- text-align: center;
82
- }
83
- .ranking-box {
84
- border: 1px solid #e0e0e0;
85
- border-radius: 8px;
86
- padding: 15px;
87
- margin-bottom: 12px;
88
- transition: all 0.3s;
89
- }
90
- .ranking-box:hover {
91
- box-shadow: 0 4px 8px rgba(0,0,0,0.1);
92
- }
93
- /* Add different colors for rank levels */
94
- .rank-1, .rank-2, .rank-3 {
95
- border-left: 5px solid #4CAF50; /* Green for top ranks */
96
- }
97
- .rank-4, .rank-5, .rank-6, .rank-7 {
98
- border-left: 5px solid #FFC107; /* Yellow for mid ranks */
99
- }
100
- .rank-8, .rank-9, .rank-10 {
101
- border-left: 5px solid #FF5722; /* Orange for lower ranks */
102
- }
103
- .rank-11, .rank-12, .rank-13, .rank-14, .rank-15, .rank-16, .rank-17, .rank-18, .rank-19, .rank-20 {
104
- border-left: 5px solid #9E9E9E; /* Gray for lowest ranks */
105
- }
106
- """) as demo:
107
  gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
108
 
109
  with gr.Accordion("Instructions", open=True):
@@ -119,18 +101,17 @@ def create_reranking_interface(task_data):
119
  (most relevant at the top, least relevant at the bottom)
120
  4. Click "Submit Rankings" when you're done with the current query
121
  5. Use "Previous" and "Next" to navigate between queries
122
- 6. Click "Save All Results" periodically to ensure your work is saved
123
  """.format(instructions=task_data["instructions"]))
124
 
125
  current_sample_id = gr.State(value=samples[0]["id"])
126
 
127
- with gr.Row(elem_classes=["progress-indicator"]):
128
  progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
129
- status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False, elem_classes=["status-box"])
130
 
131
  with gr.Group():
132
  gr.Markdown("## Query:")
133
- query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False, elem_classes=["query-box"])
134
 
135
  gr.Markdown("## Documents (Arrange in order of relevance, most relevant at top):")
136
 
@@ -140,190 +121,167 @@ def create_reranking_interface(task_data):
140
  # Function to initialize the document list for a sample
141
  def initialize_document_list(sample_id):
142
  nonlocal current_order
143
-
144
- sample = next((s for s in samples if s["id"] == sample_id), None)
145
- if not sample:
146
- return "Query not found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
147
-
148
- # Get the documents for this sample
149
- docs = sample["candidates"]
150
-
151
- # Initialize document order (0, 1, 2, ..., n-1)
152
- current_order = list(range(len(docs)))
153
-
154
- # Check if this sample has already been annotated to restore ordering
155
- existing_annotation = next((a for a in results["annotations"] if a["sample_id"] == sample_id), None)
156
- if existing_annotation and "rankings" in existing_annotation:
157
- # Create pairs of (doc_idx, rank)
158
- ranked_docs = []
159
- for doc_idx, rank in enumerate(existing_annotation["rankings"]):
160
- ranked_docs.append((doc_idx, rank))
161
 
162
- # Sort by rank (ascending)
163
- ranked_docs.sort(key=lambda x: x[1])
164
 
165
- # Extract document indices in rank order
166
- current_order = [doc[0] for doc in ranked_docs]
167
-
168
- # Update UI
169
- for i in range(len(document_containers)):
170
- if i < len(docs):
171
- doc_idx = current_order[i]
172
- rank_class = f"rank-{i+1}"
173
- document_containers[i].update(
174
- value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}",
175
- visible=True,
176
- elem_classes=["document-text", rank_class]
177
- )
178
- else:
179
- document_containers[i].update(value="", visible=False)
180
-
181
- # Status message
182
- status = f"Viewing query {samples.index(sample) + 1} of {len(samples)}"
183
- if completed_samples[sample_id]:
184
- status += " (already completed)"
185
-
186
- return status, f"Progress: {sum(completed_samples.values())}/{len(samples)}"
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  # Create document display containers with up/down buttons
189
  with gr.Column():
190
- gr.Markdown("""
191
- ### Instructions for Ranking:
192
- - Documents are initially shown in their original order
193
- - The most relevant document should be at the TOP (Rank 1)
194
- - Use the "Move Up" and "Move Down" buttons to rearrange documents
195
- - The rank is shown at the beginning of each document: [RANK X]
196
- - When you're satisfied with the order, click "Submit Rankings"
197
- """)
198
-
199
- for i in range(20): # Now handling up to 20 documents per sample
200
  with gr.Group():
201
- with gr.Box():
202
- with gr.Row():
203
- rank_label = gr.Markdown(f"**Document #{i+1}**")
204
- with gr.Column(scale=1):
205
- up_btn = gr.Button("⬆️ Move Up", size="sm", variant="primary")
206
- down_btn = gr.Button("⬇️ Move Down", size="sm", variant="secondary")
207
-
208
- doc_text = gr.Textbox(
209
- label="",
210
- interactive=False,
211
- elem_id=f"doc-text-{i}",
212
- elem_classes=["document-text"]
213
- )
214
- document_containers.append(doc_text)
215
 
216
- # Create a closure that properly captures the current index
 
 
 
 
217
  def make_up_handler(idx):
218
  def up_handler():
219
  nonlocal current_order
220
-
221
- # Only move if index is valid
222
- if idx < len(current_order) and idx > 0:
223
- # Swap with the document above
224
  current_order[idx], current_order[idx-1] = current_order[idx-1], current_order[idx]
225
 
226
- # Update all document displays with new order
227
  sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
228
  if sample:
229
  docs = sample["candidates"]
230
- updates = []
231
  for j in range(len(document_containers)):
232
- if j < len(current_order) and j < len(docs):
233
  doc_idx = current_order[j]
234
- # Add rank-specific class for styling
235
- rank_class = f"rank-{j+1}"
236
- updates.append(gr.update(
237
- value=f"[RANK {j+1}] Document {doc_idx+1}: {docs[doc_idx]}",
238
- visible=True,
239
- elem_classes=["document-text", rank_class]
240
- ))
241
- else:
242
- updates.append(gr.update(value="", visible=False))
243
- return updates
244
- return [gr.update() for _ in document_containers]
245
  return up_handler
246
 
247
  def make_down_handler(idx):
248
  def down_handler():
249
  nonlocal current_order
250
-
251
- # Only move if index is valid
252
  if idx < len(current_order) - 1:
253
- # Swap with the document below
254
  current_order[idx], current_order[idx+1] = current_order[idx+1], current_order[idx]
255
 
256
- # Update all document displays with new order
257
  sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
258
  if sample:
259
  docs = sample["candidates"]
260
- updates = []
261
  for j in range(len(document_containers)):
262
- if j < len(current_order) and j < len(docs):
263
  doc_idx = current_order[j]
264
- # Add rank-specific class for styling
265
- rank_class = f"rank-{j+1}"
266
- updates.append(gr.update(
267
- value=f"[RANK {j+1}] Document {doc_idx+1}: {docs[doc_idx]}",
268
- visible=True,
269
- elem_classes=["document-text", rank_class]
270
- ))
271
- else:
272
- updates.append(gr.update(value="", visible=False))
273
- return updates
274
- return [gr.update() for _ in document_containers]
275
  return down_handler
276
 
277
- # Connect buttons with properly created handlers
278
  up_btn.click(
279
- make_up_handler(i),
280
- inputs=[],
281
- outputs=document_containers
282
  )
283
 
284
  down_btn.click(
285
- make_down_handler(i),
286
- inputs=[],
287
- outputs=document_containers
288
  )
289
 
290
- with gr.Row(equal_height=True):
291
- prev_btn = gr.Button("← Previous Query", size="lg", variant="secondary")
292
- submit_btn = gr.Button("💾 Submit Rankings", size="lg", variant="primary")
293
- next_btn = gr.Button("Next Query →", size="lg", variant="secondary")
294
-
295
  with gr.Row():
296
- save_btn = gr.Button("💾 Save All Results", variant="primary", size="lg")
297
- # Add a clear button to reset rankings
298
- clear_btn = gr.Button("🔄 Reset Order", variant="secondary", size="lg")
299
-
300
- # Initialize the document list for the first sample
301
- status_val, progress_val = initialize_document_list(samples[0]["id"])
302
- status_box.value = status_val
303
- progress_text.value = progress_val
304
 
305
  # Navigation functions
306
  def nav_to_prev(current_id):
307
- current_sample = next((s for s in samples if s["id"] == current_id), None)
308
- if not current_sample:
 
 
 
 
 
 
 
 
 
 
309
  return current_id
310
-
311
- current_idx = samples.index(current_sample)
312
- if current_idx > 0:
313
- prev_sample = samples[current_idx - 1]
314
- return prev_sample["id"]
315
- return current_id
316
 
317
  def nav_to_next(current_id):
318
- current_sample = next((s for s in samples if s["id"] == current_id), None)
319
- if not current_sample:
 
 
 
 
 
 
 
320
  return current_id
321
-
322
- current_idx = samples.index(current_sample)
323
- if current_idx < len(samples) - 1:
324
- next_sample = samples[current_idx + 1]
325
- return next_sample["id"]
326
- return current_id
 
 
 
 
 
 
 
 
 
 
 
327
 
328
  # Connect navigation buttons
329
  prev_btn.click(
@@ -331,11 +289,11 @@ def create_reranking_interface(task_data):
331
  inputs=[current_sample_id],
332
  outputs=[current_sample_id]
333
  ).then(
334
- lambda id: initialize_document_list(id),
335
  inputs=[current_sample_id],
336
  outputs=[status_box, progress_text]
337
  ).then(
338
- lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
339
  inputs=[current_sample_id],
340
  outputs=[query_text]
341
  )
@@ -345,107 +303,46 @@ def create_reranking_interface(task_data):
345
  inputs=[current_sample_id],
346
  outputs=[current_sample_id]
347
  ).then(
348
- lambda id: initialize_document_list(id),
349
  inputs=[current_sample_id],
350
  outputs=[status_box, progress_text]
351
  ).then(
352
- lambda id: next((s["query"] for s in samples if s["id"] == id), ""),
353
  inputs=[current_sample_id],
354
  outputs=[query_text]
355
  )
356
 
357
- # Submit button
358
  submit_btn.click(
359
  save_ranking,
360
  inputs=[current_sample_id],
361
  outputs=[status_box, progress_text]
362
  )
363
-
364
- # Add function to reset the current ranking order
365
- def reset_order():
366
- nonlocal current_order
367
- sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
368
- if not sample:
369
- return [gr.update() for _ in document_containers]
370
-
371
- docs = sample["candidates"]
372
- # Reset to original order
373
- current_order = list(range(len(docs)))
374
-
375
- # Update UI
376
- updates = []
377
- for i in range(len(document_containers)):
378
- if i < len(docs):
379
- doc_idx = current_order[i]
380
- rank_class = f"rank-{i+1}"
381
- updates.append(gr.update(
382
- value=f"[RANK {i+1}] Document {doc_idx+1}: {docs[doc_idx]}",
383
- visible=True,
384
- elem_classes=["document-text", rank_class]
385
- ))
386
- else:
387
- updates.append(gr.update(value="", visible=False))
388
-
389
- return updates
390
-
391
- # Connect reset button
392
- clear_btn.click(
393
- reset_order,
394
- inputs=[],
395
- outputs=document_containers
396
- )
397
-
398
- # Save all results
399
- def save_results():
400
- """Save all collected results to a file."""
401
- output_path = f"{task_data['task_name']}_human_results.json"
402
- with open(output_path, "w") as f:
403
- json.dump(results, f, indent=2)
404
- return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
405
-
406
- save_btn.click(save_results, outputs=[status_box])
407
 
408
  return demo
409
 
410
- # Just use a simplified version with the main demo functionality
411
- def get_task_file():
412
- """Get the task file to use for the demo."""
413
- # Try to load the default example
414
- default_task = "AskUbuntuDupQuestions_human_eval.json"
415
- if os.path.exists(default_task):
416
- return default_task
417
-
418
- # If default doesn't exist, look for any other task files
419
- task_files = [f for f in os.listdir(".") if f.endswith("_human_eval.json")]
420
- if task_files:
421
- return task_files[0]
422
-
423
- # Raise error if no task files found
424
- raise FileNotFoundError("No task files found. Please ensure there's a *_human_eval.json file in the current directory.")
425
-
426
- # Main app with simplified structure
427
  with gr.Blocks(theme=gr.themes.Soft()) as app:
428
  gr.Markdown("# MTEB Human Evaluation Demo")
 
429
 
 
 
 
 
430
  try:
431
- # Load the task file
432
- task_file = get_task_file()
433
-
434
- with open(task_file, "r") as f:
435
- task_data = json.load(f)
436
-
437
- # Show which task is currently loaded
438
- gr.Markdown(f"**Current Task: {task_data['task_name']}** ({len(task_data['samples'])} samples)")
439
 
440
- # Display the interface
441
- reranking_demo = create_reranking_interface(task_data)
442
  except Exception as e:
443
- gr.Markdown(f"**Error loading task: {str(e)}**")
444
- error_details = gr.Textbox(
445
- value=str(e),
446
- label="Error Details",
447
- interactive=False
448
- )
449
-
450
- if __name__ == "__main__":
451
- app.launch()
 
3
  import os
4
  from pathlib import Path
5
 
6
+ # Create a minimal demo with hardcoded sample data
7
+ SAMPLE_TASK = {
8
+ "task_name": "Demo Reranking Task",
9
+ "task_type": "reranking",
10
+ "instructions": "Rank the documents from most relevant to least relevant for the given query.",
11
+ "samples": [
12
+ {
13
+ "id": "sample_1",
14
+ "query": "How do I install Python on Windows?",
15
+ "candidates": [
16
+ "To install Python on Windows, go to python.org and download the latest installer. Run the installer and make sure to check 'Add Python to PATH' during installation.",
17
+ "Python is a popular programming language used for web development, data analysis, and machine learning.",
18
+ "Windows is an operating system developed by Microsoft. It's the most popular desktop operating system worldwide.",
19
+ "Installing software on Windows typically involves downloading an installer and running it with administrator privileges.",
20
+ "Programming languages like Python, JavaScript, and Java are essential tools for modern software development."
21
+ ]
22
+ },
23
+ {
24
+ "id": "sample_2",
25
+ "query": "What are the benefits of exercise?",
26
+ "candidates": [
27
+ "Regular exercise improves cardiovascular health, strengthens muscles, and can help with weight management.",
28
+ "Exercise releases endorphins which can improve mood and reduce feelings of depression and anxiety.",
29
+ "A balanced diet is important for maintaining good health and providing energy for daily activities.",
30
+ "Regular physical activity can reduce the risk of chronic diseases such as heart disease, diabetes, and certain cancers.",
31
+ "Sleep is essential for recovery and overall health, with most adults needing 7-9 hours per night."
32
+ ]
33
+ }
34
+ ]
35
+ }
36
+
37
  def create_reranking_interface(task_data):
38
  """Create a Gradio interface for reranking evaluation."""
39
  samples = task_data["samples"]
 
49
  if not current_order:
50
  return "⚠️ No document ordering found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
51
 
52
+ # Convert document positions to rankings
53
  # First document (position 0) gets rank 1, etc.
54
  rankings = []
55
  for i, doc_idx in enumerate(current_order):
 
76
  with open(output_path, "w") as f:
77
  json.dump(results, f, indent=2)
78
  return f"✅ Rankings saved successfully (in memory and to file)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
79
+ except Exception as write_error:
80
+ print(f"Error writing results file: {str(write_error)}")
81
  # If file saving fails, still mark as success since we saved in memory
82
+ return f"✅ Rankings saved in memory (file save failed: {str(write_error)})", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
83
  except Exception as e:
84
+ print(f"Error in save_ranking: {str(e)}")
85
  # Return specific error message
86
  return f"Error: {str(e)}", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
87
 
88
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
90
 
91
  with gr.Accordion("Instructions", open=True):
 
101
  (most relevant at the top, least relevant at the bottom)
102
  4. Click "Submit Rankings" when you're done with the current query
103
  5. Use "Previous" and "Next" to navigate between queries
 
104
  """.format(instructions=task_data["instructions"]))
105
 
106
  current_sample_id = gr.State(value=samples[0]["id"])
107
 
108
+ with gr.Row():
109
  progress_text = gr.Textbox(label="Progress", value=f"Progress: 0/{len(samples)}", interactive=False)
110
+ status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
111
 
112
  with gr.Group():
113
  gr.Markdown("## Query:")
114
+ query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
115
 
116
  gr.Markdown("## Documents (Arrange in order of relevance, most relevant at top):")
117
 
 
121
  # Function to initialize the document list for a sample
122
  def initialize_document_list(sample_id):
123
  nonlocal current_order
124
+ try:
125
+ sample = next((s for s in samples if s["id"] == sample_id), None)
126
+ if not sample:
127
+ return "Query not found", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ # Get the documents for this sample
130
+ docs = sample["candidates"]
131
 
132
+ # Initialize document order (0, 1, 2, ..., n-1)
133
+ current_order = list(range(len(docs)))
134
+
135
+ # Check if this sample has already been annotated to restore ordering
136
+ existing_annotation = next((a for a in results["annotations"] if a["sample_id"] == sample_id), None)
137
+ if existing_annotation and "rankings" in existing_annotation:
138
+ # Create pairs of (doc_idx, rank)
139
+ ranked_docs = []
140
+ for doc_idx, rank in enumerate(existing_annotation["rankings"]):
141
+ ranked_docs.append((doc_idx, rank))
142
+
143
+ # Sort by rank (ascending)
144
+ ranked_docs.sort(key=lambda x: x[1])
145
+
146
+ # Extract document indices in rank order
147
+ current_order = [doc[0] for doc in ranked_docs]
148
+
149
+ # Update UI
150
+ for i in range(len(document_containers)):
151
+ if i < len(docs):
152
+ doc_idx = current_order[i]
153
+ document_containers[i].value = f"Document {doc_idx+1} (Rank {i+1}): {docs[doc_idx]}"
154
+ else:
155
+ document_containers[i].value = "" # Clear unused containers
156
+
157
+ # Status message
158
+ status = f"Viewing query {samples.index(sample) + 1} of {len(samples)}"
159
+ if completed_samples[sample_id]:
160
+ status += " (already completed)"
161
+
162
+ return status, f"Progress: {sum(completed_samples.values())}/{len(samples)}"
163
+ except Exception as e:
164
+ print(f"Error in initialize_document_list: {str(e)}")
165
+ return f"Error initializing documents: {str(e)}", "Progress: 0/0"
166
 
167
  # Create document display containers with up/down buttons
168
  with gr.Column():
169
+ # Display up to 10 documents (or however many are in the largest sample)
170
+ max_docs = max(len(s["candidates"]) for s in samples)
171
+ for i in range(max_docs):
 
 
 
 
 
 
 
172
  with gr.Group():
173
+ doc_text = gr.Textbox(label=f"Document {i+1}", interactive=False)
174
+ document_containers.append(doc_text)
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
+ with gr.Row():
177
+ up_btn = gr.Button(f"⬆️ Move Up", size="sm")
178
+ down_btn = gr.Button(f"⬇️ Move Down", size="sm")
179
+
180
+ # Create closures for up/down buttons
181
  def make_up_handler(idx):
182
  def up_handler():
183
  nonlocal current_order
184
+ if idx > 0:
185
+ # Swap with document above
 
 
186
  current_order[idx], current_order[idx-1] = current_order[idx-1], current_order[idx]
187
 
188
+ # Get current sample
189
  sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
190
  if sample:
191
  docs = sample["candidates"]
192
+ # Update document displays
193
  for j in range(len(document_containers)):
194
+ if j < len(docs):
195
  doc_idx = current_order[j]
196
+ document_containers[j].value = f"Document {doc_idx+1} (Rank {j+1}): {docs[doc_idx]}"
197
+
198
+ # Return empty list since we update the containers directly
199
+ return {}
 
 
 
 
 
 
 
200
  return up_handler
201
 
202
  def make_down_handler(idx):
203
  def down_handler():
204
  nonlocal current_order
 
 
205
  if idx < len(current_order) - 1:
206
+ # Swap with document below
207
  current_order[idx], current_order[idx+1] = current_order[idx+1], current_order[idx]
208
 
209
+ # Get current sample
210
  sample = next((s for s in samples if s["id"] == current_sample_id.value), None)
211
  if sample:
212
  docs = sample["candidates"]
213
+ # Update document displays
214
  for j in range(len(document_containers)):
215
+ if j < len(docs):
216
  doc_idx = current_order[j]
217
+ document_containers[j].value = f"Document {doc_idx+1} (Rank {j+1}): {docs[doc_idx]}"
218
+
219
+ # Return empty list since we update the containers directly
220
+ return {}
 
 
 
 
 
 
 
221
  return down_handler
222
 
223
+ # Connect buttons
224
  up_btn.click(
225
+ fn=make_up_handler(i),
226
+ inputs=None,
227
+ outputs=None
228
  )
229
 
230
  down_btn.click(
231
+ fn=make_down_handler(i),
232
+ inputs=None,
233
+ outputs=None
234
  )
235
 
 
 
 
 
 
236
  with gr.Row():
237
+ prev_btn = gr.Button(" Previous Query", size="sm")
238
+ submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary")
239
+ next_btn = gr.Button("Next Query ", size="sm")
 
 
 
 
 
240
 
241
  # Navigation functions
242
  def nav_to_prev(current_id):
243
+ try:
244
+ current_sample = next((s for s in samples if s["id"] == current_id), None)
245
+ if not current_sample:
246
+ return current_id
247
+
248
+ current_idx = samples.index(current_sample)
249
+ if current_idx > 0:
250
+ prev_sample = samples[current_idx - 1]
251
+ return prev_sample["id"]
252
+ return current_id
253
+ except Exception as e:
254
+ print(f"Error in nav_to_prev: {str(e)}")
255
  return current_id
 
 
 
 
 
 
256
 
257
  def nav_to_next(current_id):
258
+ try:
259
+ current_sample = next((s for s in samples if s["id"] == current_id), None)
260
+ if not current_sample:
261
+ return current_id
262
+
263
+ current_idx = samples.index(current_sample)
264
+ if current_idx < len(samples) - 1:
265
+ next_sample = samples[current_idx + 1]
266
+ return next_sample["id"]
267
  return current_id
268
+ except Exception as e:
269
+ print(f"Error in nav_to_next: {str(e)}")
270
+ return current_id
271
+
272
+ # Update query text
273
+ def update_query(sample_id):
274
+ try:
275
+ sample = next((s for s in samples if s["id"] == sample_id), None)
276
+ if not sample:
277
+ return ""
278
+ return sample["query"]
279
+ except Exception as e:
280
+ print(f"Error in update_query: {str(e)}")
281
+ return "Error loading query"
282
+
283
+ # Initialize the first sample
284
+ initialize_document_list(samples[0]["id"])
285
 
286
  # Connect navigation buttons
287
  prev_btn.click(
 
289
  inputs=[current_sample_id],
290
  outputs=[current_sample_id]
291
  ).then(
292
+ initialize_document_list,
293
  inputs=[current_sample_id],
294
  outputs=[status_box, progress_text]
295
  ).then(
296
+ update_query,
297
  inputs=[current_sample_id],
298
  outputs=[query_text]
299
  )
 
303
  inputs=[current_sample_id],
304
  outputs=[current_sample_id]
305
  ).then(
306
+ initialize_document_list,
307
  inputs=[current_sample_id],
308
  outputs=[status_box, progress_text]
309
  ).then(
310
+ update_query,
311
  inputs=[current_sample_id],
312
  outputs=[query_text]
313
  )
314
 
315
+ # Connect submit button
316
  submit_btn.click(
317
  save_ranking,
318
  inputs=[current_sample_id],
319
  outputs=[status_box, progress_text]
320
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  return demo
323
 
324
+ # Simple Gradio app with hardcoded sample data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  with gr.Blocks(theme=gr.themes.Soft()) as app:
326
  gr.Markdown("# MTEB Human Evaluation Demo")
327
+ gr.Markdown("## Sample Reranking Task")
328
 
329
+ # Create and display the demo interface
330
+ demo = create_reranking_interface(SAMPLE_TASK)
331
+
332
+ if __name__ == "__main__":
333
  try:
334
+ print("Starting MTEB Human Evaluation App with hardcoded sample data...")
335
+ print(f"Current directory: {os.getcwd()}")
 
 
 
 
 
 
336
 
337
+ # Launch with hardcoded sample data
338
+ app.launch(debug=True)
339
  except Exception as e:
340
+ print(f"ERROR STARTING APP: {str(e)}")
341
+
342
+ # Create an ultra-simple fallback app
343
+ with gr.Blocks() as fallback_app:
344
+ gr.Markdown("# MTEB Human Evaluation - Emergency Fallback Mode")
345
+ gr.Markdown("There was an error loading the application. Please see details below.")
346
+ gr.Textbox(value=str(e), label="Error", interactive=False)
347
+
348
+ fallback_app.launch()