Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,172 +4,26 @@ import os
|
|
4 |
from pathlib import Path
|
5 |
|
6 |
def create_reranking_interface(task_data):
|
7 |
-
"""Create a Gradio interface for reranking evaluation
|
8 |
samples = task_data["samples"]
|
9 |
results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
|
10 |
completed_samples = {s["id"]: False for s in samples}
|
11 |
|
12 |
-
#
|
13 |
-
|
14 |
-
|
15 |
-
# Use existing ranks if available
|
16 |
-
ranks = [0] * len(candidates)
|
17 |
-
if existing_ranks and len(existing_ranks) == len(candidates):
|
18 |
-
ranks = existing_ranks.copy()
|
19 |
-
|
20 |
-
# Generate a unique ID for this set of dropdowns to avoid conflicts
|
21 |
-
import random
|
22 |
-
import time
|
23 |
-
dropdown_group_id = f"rank_group_{int(time.time())}_{random.randint(1000, 9999)}"
|
24 |
-
|
25 |
-
html = f"""
|
26 |
-
<div class="ranking-simple">
|
27 |
-
<input type="hidden" id="rank-order-state" value="">
|
28 |
-
<div class="rank-instructions">Select a rank (1-{len(candidates)}) for each document.</div>
|
29 |
-
"""
|
30 |
-
|
31 |
-
# Add each document with a dropdown selector
|
32 |
-
for i, doc in enumerate(candidates):
|
33 |
-
import html as html_escaper
|
34 |
-
escaped_doc = html_escaper.escape(doc)
|
35 |
-
current_rank = ranks[i] if ranks[i] > 0 else i + 1
|
36 |
-
|
37 |
-
html += f"""
|
38 |
-
<div class="rank-item" data-doc-id="{i}">
|
39 |
-
<div class="rank-selector">
|
40 |
-
<select class="rank-dropdown" data-doc-id="{i}" onchange="updateRankOrder('{dropdown_group_id}')">
|
41 |
-
"""
|
42 |
-
|
43 |
-
# Add options 1 through N
|
44 |
-
for rank in range(1, len(candidates) + 1):
|
45 |
-
selected = "selected" if rank == current_rank else ""
|
46 |
-
html += f'<option value="{rank}" {selected}>{rank}</option>'
|
47 |
-
|
48 |
-
html += f"""
|
49 |
-
</select>
|
50 |
-
</div>
|
51 |
-
<div class="doc-content">{escaped_doc}</div>
|
52 |
-
</div>
|
53 |
-
"""
|
54 |
-
|
55 |
-
# Add the JavaScript for handling rank updates
|
56 |
-
html += f"""
|
57 |
-
<script>
|
58 |
-
// Function to update the hidden state when dropdowns change
|
59 |
-
function updateRankOrder(groupId) {{
|
60 |
-
const items = document.querySelectorAll('.rank-item');
|
61 |
-
const selectedRanks = new Map();
|
62 |
-
const docOrder = [];
|
63 |
-
|
64 |
-
// First collect all selected ranks
|
65 |
-
items.forEach(item => {{
|
66 |
-
const docId = parseInt(item.getAttribute('data-doc-id'));
|
67 |
-
const dropdown = item.querySelector('.rank-dropdown');
|
68 |
-
const rank = parseInt(dropdown.value);
|
69 |
-
selectedRanks.set(docId, rank);
|
70 |
-
}});
|
71 |
-
|
72 |
-
// Sort documents by their selected rank
|
73 |
-
const sortedDocs = Array.from(selectedRanks.entries())
|
74 |
-
.sort((a, b) => a[1] - b[1])
|
75 |
-
.map(entry => entry[0]);
|
76 |
-
|
77 |
-
// Update the order state
|
78 |
-
const orderInput = document.querySelector('#current-order textarea');
|
79 |
-
if (orderInput) {{
|
80 |
-
orderInput.value = JSON.stringify(sortedDocs);
|
81 |
-
const event = new Event('input', {{ bubbles: true }});
|
82 |
-
orderInput.dispatchEvent(event);
|
83 |
-
}}
|
84 |
-
}}
|
85 |
-
|
86 |
-
// Initialize on page load
|
87 |
-
document.addEventListener('DOMContentLoaded', function() {{
|
88 |
-
updateRankOrder('{dropdown_group_id}');
|
89 |
-
}});
|
90 |
-
|
91 |
-
// Backup initialization for iframe environments
|
92 |
-
setTimeout(function() {{
|
93 |
-
updateRankOrder('{dropdown_group_id}');
|
94 |
-
}}, 1000);
|
95 |
-
</script>
|
96 |
-
</div>
|
97 |
-
"""
|
98 |
-
|
99 |
-
return html
|
100 |
-
|
101 |
-
def save_ranking(order_json, sample_id):
|
102 |
-
"""Save the current ranking to results."""
|
103 |
-
try:
|
104 |
-
if not order_json or order_json == "[]":
|
105 |
-
return "⚠️ Drag documents to set the ranking before submitting.", progress_text.value
|
106 |
-
order = json.loads(order_json)
|
107 |
-
num_candidates = len(next(s["candidates"] for s in samples if s["id"] == sample_id))
|
108 |
-
if len(order) != num_candidates:
|
109 |
-
return f"⚠️ Ranking order length mismatch. Expected {num_candidates}, got {len(order)}.", progress_text.value
|
110 |
-
rankings = [0] * num_candidates
|
111 |
-
for rank_minus_1, doc_idx in enumerate(order):
|
112 |
-
if doc_idx < num_candidates:
|
113 |
-
rankings[doc_idx] = rank_minus_1 + 1
|
114 |
-
else:
|
115 |
-
raise ValueError(f"Invalid document index {doc_idx} found in order.")
|
116 |
-
if sorted(rankings) != list(range(1, num_candidates + 1)):
|
117 |
-
return "⚠️ Ranking validation failed. Ranks are not 1 to N.", progress_text.value
|
118 |
-
annotation = {"sample_id": sample_id, "rankings": rankings}
|
119 |
-
existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
|
120 |
-
if existing_idx is not None:
|
121 |
-
results["annotations"][existing_idx] = annotation
|
122 |
-
else:
|
123 |
-
results["annotations"].append(annotation)
|
124 |
-
completed_samples[sample_id] = True
|
125 |
-
output_path = f"{task_data['task_name']}_human_results.json"
|
126 |
-
with open(output_path, "w") as f:
|
127 |
-
json.dump(results, f, indent=2)
|
128 |
-
return f"✅ Rankings saved successfully ({len(results['annotations'])}/{len(samples)} completed)", f"Progress: {sum(completed_samples.values())}/{len(samples)}"
|
129 |
-
except json.JSONDecodeError:
|
130 |
-
return "⚠️ Error decoding ranking order. Please try again.", progress_text.value
|
131 |
-
except Exception as e:
|
132 |
-
import traceback
|
133 |
-
print(traceback.format_exc())
|
134 |
-
return f"Error saving ranking: {str(e)}", progress_text.value
|
135 |
-
|
136 |
-
def load_sample(sample_id):
|
137 |
-
"""Load a sample into the interface."""
|
138 |
try:
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
return sample["query"], new_html, "[]", progress, status
|
147 |
except Exception as e:
|
148 |
-
|
149 |
-
|
150 |
-
def next_sample_id(current_id):
|
151 |
-
current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
|
152 |
-
if current_idx == -1:
|
153 |
-
return current_id
|
154 |
-
next_idx = min(current_idx + 1, len(samples) - 1)
|
155 |
-
return samples[next_idx]["id"]
|
156 |
-
|
157 |
-
def prev_sample_id(current_id):
|
158 |
-
current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
|
159 |
-
if current_idx == -1:
|
160 |
-
return current_id
|
161 |
-
prev_idx = max(current_idx - 1, 0)
|
162 |
-
return samples[prev_idx]["id"]
|
163 |
-
|
164 |
-
def save_results():
|
165 |
-
output_path = f"{task_data['task_name']}_human_results.json"
|
166 |
-
try:
|
167 |
-
with open(output_path, "w") as f:
|
168 |
-
json.dump(results, f, indent=2)
|
169 |
-
return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
|
170 |
-
except Exception as e:
|
171 |
-
return f"⚠️ Error saving results file: {str(e)}"
|
172 |
|
|
|
173 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
174 |
gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
|
175 |
with gr.Accordion("Instructions", open=True):
|
@@ -180,107 +34,277 @@ def create_reranking_interface(task_data):
|
|
180 |
|
181 |
### How to use this interface:
|
182 |
1. Read the query at the top
|
183 |
-
2.
|
184 |
-
3.
|
185 |
4. Click "Submit Rankings" when you're done with the current query
|
186 |
5. Use "Previous" and "Next" to navigate between queries
|
187 |
6. Click "Save All Results" periodically to ensure your work is saved
|
188 |
""".format(instructions=task_data["instructions"]))
|
189 |
|
|
|
190 |
current_sample_id = gr.State(value=samples[0]["id"])
|
191 |
|
|
|
192 |
with gr.Row():
|
193 |
-
progress_text = gr.Textbox(label="Progress", value=f"Progress:
|
194 |
status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
|
195 |
|
|
|
196 |
with gr.Group():
|
197 |
gr.Markdown("## Query:")
|
198 |
-
query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False)
|
199 |
-
|
200 |
-
|
201 |
-
order_state = gr.Textbox(value="[]", visible=False, elem_id="current-order")
|
202 |
with gr.Row():
|
203 |
-
|
204 |
-
|
205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
save_btn = gr.Button("💾 Save All Results", variant="secondary")
|
207 |
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
submit_btn.click(
|
259 |
-
|
260 |
-
inputs=[
|
261 |
outputs=[status_box, progress_text]
|
262 |
)
|
263 |
|
|
|
264 |
next_btn.click(
|
265 |
-
next_sample_id,
|
|
|
|
|
266 |
).then(
|
267 |
load_sample,
|
268 |
inputs=[current_sample_id],
|
269 |
-
outputs=[query_text,
|
|
|
|
|
|
|
270 |
)
|
271 |
|
272 |
prev_btn.click(
|
273 |
-
prev_sample_id,
|
|
|
|
|
274 |
).then(
|
275 |
load_sample,
|
276 |
inputs=[current_sample_id],
|
277 |
-
outputs=[query_text,
|
|
|
|
|
|
|
278 |
)
|
279 |
|
|
|
280 |
save_btn.click(save_results, outputs=[status_box])
|
281 |
|
282 |
-
|
283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
|
285 |
return demo
|
286 |
|
|
|
4 |
from pathlib import Path
|
5 |
|
6 |
def create_reranking_interface(task_data):
|
7 |
+
"""Create a Gradio interface for reranking evaluation."""
|
8 |
samples = task_data["samples"]
|
9 |
results = {"task_name": task_data["task_name"], "task_type": "reranking", "annotations": []}
|
10 |
completed_samples = {s["id"]: False for s in samples}
|
11 |
|
12 |
+
# Try to load existing results
|
13 |
+
output_path = f"{task_data['task_name']}_human_results.json"
|
14 |
+
if os.path.exists(output_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
try:
|
16 |
+
with open(output_path, "r") as f:
|
17 |
+
existing_results = json.load(f)
|
18 |
+
results = existing_results
|
19 |
+
# Update completed samples based on existing annotations
|
20 |
+
for anno in results.get("annotations", []):
|
21 |
+
if "sample_id" in anno:
|
22 |
+
completed_samples[anno["sample_id"]] = True
|
|
|
23 |
except Exception as e:
|
24 |
+
print(f"Error loading existing results: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
+
# Create the main interface
|
27 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
28 |
gr.Markdown(f"# {task_data['task_name']} - Human Reranking Evaluation")
|
29 |
with gr.Accordion("Instructions", open=True):
|
|
|
34 |
|
35 |
### How to use this interface:
|
36 |
1. Read the query at the top
|
37 |
+
2. For each document, select its rank (1 = most relevant)
|
38 |
+
3. Make sure each document has a unique rank (1 to N)
|
39 |
4. Click "Submit Rankings" when you're done with the current query
|
40 |
5. Use "Previous" and "Next" to navigate between queries
|
41 |
6. Click "Save All Results" periodically to ensure your work is saved
|
42 |
""".format(instructions=task_data["instructions"]))
|
43 |
|
44 |
+
# State variables
|
45 |
current_sample_id = gr.State(value=samples[0]["id"])
|
46 |
|
47 |
+
# Progress tracking
|
48 |
with gr.Row():
|
49 |
+
progress_text = gr.Textbox(label="Progress", value=f"Progress: {sum(completed_samples.values())}/{len(samples)}", interactive=False)
|
50 |
status_box = gr.Textbox(label="Status", value="Ready to start evaluation", interactive=False)
|
51 |
|
52 |
+
# Query display
|
53 |
with gr.Group():
|
54 |
gr.Markdown("## Query:")
|
55 |
+
query_text = gr.Textbox(value=samples[0]["query"], label="", interactive=False, lines=3)
|
56 |
+
|
57 |
+
# Validation
|
|
|
58 |
with gr.Row():
|
59 |
+
validate_btn = gr.Button("Validate Rankings", variant="secondary")
|
60 |
+
validation_text = gr.Textbox(label="Validation", interactive=False)
|
61 |
+
|
62 |
+
# Document ranking section
|
63 |
+
gr.Markdown("## Documents to Rank:")
|
64 |
+
|
65 |
+
# Container for document elements
|
66 |
+
doc_containers = []
|
67 |
+
rank_inputs = []
|
68 |
+
doc_texts = []
|
69 |
+
|
70 |
+
# Create a container for up to 10 documents
|
71 |
+
max_docs = 10
|
72 |
+
for i in range(max_docs):
|
73 |
+
with gr.Group(visible=(i < len(samples[0]["candidates"]))) as doc_container:
|
74 |
+
doc_containers.append(doc_container)
|
75 |
+
|
76 |
+
with gr.Row():
|
77 |
+
# Rank selection
|
78 |
+
with gr.Column(scale=1, min_width=100):
|
79 |
+
rank_input = gr.Number(
|
80 |
+
value=i+1,
|
81 |
+
label=f"Rank",
|
82 |
+
minimum=1,
|
83 |
+
maximum=len(samples[0]["candidates"]),
|
84 |
+
step=1,
|
85 |
+
interactive=True
|
86 |
+
)
|
87 |
+
rank_inputs.append(rank_input)
|
88 |
+
|
89 |
+
# Document text
|
90 |
+
with gr.Column(scale=4):
|
91 |
+
doc_text = gr.Textbox(
|
92 |
+
value=samples[0]["candidates"][i] if i < len(samples[0]["candidates"]) else "",
|
93 |
+
label=f"Document {i+1}",
|
94 |
+
lines=4,
|
95 |
+
interactive=False
|
96 |
+
)
|
97 |
+
doc_texts.append(doc_text)
|
98 |
+
|
99 |
+
gr.Markdown("---")
|
100 |
+
|
101 |
+
# Navigation and submission buttons
|
102 |
+
with gr.Row():
|
103 |
+
prev_btn = gr.Button("← Previous Query", size="sm")
|
104 |
+
submit_btn = gr.Button("Submit Rankings", size="lg", variant="primary")
|
105 |
+
next_btn = gr.Button("Next →", size="sm")
|
106 |
save_btn = gr.Button("💾 Save All Results", variant="secondary")
|
107 |
|
108 |
+
# Function to validate rankings
|
109 |
+
def validate_rankings(*ranks):
|
110 |
+
try:
|
111 |
+
# Filter out None values
|
112 |
+
valid_ranks = [int(r) for r in ranks if r is not None]
|
113 |
+
|
114 |
+
# Check for duplicates
|
115 |
+
if len(set(valid_ranks)) != len(valid_ranks):
|
116 |
+
# Find duplicate ranks
|
117 |
+
dupes = {}
|
118 |
+
for r in valid_ranks:
|
119 |
+
dupes[r] = dupes.get(r, 0) + 1
|
120 |
+
duplicates = [r for r, count in dupes.items() if count > 1]
|
121 |
+
return f"��️ Duplicate ranks found: {', '.join(str(d) for d in sorted(duplicates))}. Each document must have a unique rank."
|
122 |
+
|
123 |
+
# Check for complete ranking
|
124 |
+
max_rank = max(valid_ranks) if valid_ranks else 0
|
125 |
+
expected_ranks = set(range(1, max_rank + 1))
|
126 |
+
if set(valid_ranks) != expected_ranks:
|
127 |
+
missing = sorted(expected_ranks - set(valid_ranks))
|
128 |
+
if missing:
|
129 |
+
return f"⚠️ Missing ranks: {', '.join(str(m) for m in missing)}. Ranks must be consecutive integers from 1 to {max_rank}."
|
130 |
+
|
131 |
+
return "✓ Rankings are valid! Ready to submit."
|
132 |
+
except Exception as e:
|
133 |
+
return f"Error validating rankings: {str(e)}"
|
134 |
+
|
135 |
+
# Function to load a sample
|
136 |
+
def load_sample(sample_id):
|
137 |
+
try:
|
138 |
+
sample = next((s for s in samples if s["id"] == sample_id), None)
|
139 |
+
if not sample:
|
140 |
+
return [gr.update()] * (3 + 2*max_docs)
|
141 |
+
|
142 |
+
candidates = sample["candidates"]
|
143 |
+
num_docs = len(candidates)
|
144 |
+
|
145 |
+
# Get existing ranking if available
|
146 |
+
existing_ranking = next((anno["rankings"] for anno in results["annotations"] if anno["sample_id"] == sample_id), None)
|
147 |
+
|
148 |
+
# Set default ranks (from existing or sequential)
|
149 |
+
ranks = []
|
150 |
+
for i in range(num_docs):
|
151 |
+
if existing_ranking and i < len(existing_ranking):
|
152 |
+
ranks.append(existing_ranking[i])
|
153 |
+
else:
|
154 |
+
ranks.append(i + 1)
|
155 |
+
|
156 |
+
# Set container visibility
|
157 |
+
container_visibility = [i < num_docs for i in range(max_docs)]
|
158 |
+
|
159 |
+
# Update maximum values for number inputs
|
160 |
+
for input_field in rank_inputs:
|
161 |
+
input_field.maximum = num_docs
|
162 |
+
|
163 |
+
# Fill in document contents
|
164 |
+
docs = [candidates[i] if i < num_docs else "" for i in range(max_docs)]
|
165 |
+
|
166 |
+
# Update visuals based on completed status
|
167 |
+
status = "Already ranked" if completed_samples.get(sample_id, False) else "Ready to rank"
|
168 |
+
progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
|
169 |
+
|
170 |
+
# Prepare all outputs
|
171 |
+
outputs = [sample["query"], progress, status]
|
172 |
+
outputs.extend(ranks) # Rank values
|
173 |
+
outputs.extend(docs) # Document texts
|
174 |
+
outputs.extend(container_visibility) # Container visibilities
|
175 |
+
|
176 |
+
return outputs
|
177 |
+
except Exception as e:
|
178 |
+
import traceback
|
179 |
+
print(traceback.format_exc())
|
180 |
+
return [gr.update(value=f"Error loading sample: {str(e)}")] + [gr.update()] * (2 + 2*max_docs)
|
181 |
+
|
182 |
+
# Function to save rankings
|
183 |
+
def save_rankings(sample_id, *ranks):
|
184 |
+
try:
|
185 |
+
# Get the sample
|
186 |
+
sample = next((s for s in samples if s["id"] == sample_id), None)
|
187 |
+
if not sample:
|
188 |
+
return "⚠️ Sample not found", progress_text.value
|
189 |
+
|
190 |
+
num_candidates = len(sample["candidates"])
|
191 |
+
|
192 |
+
# Get the rankings for just this sample
|
193 |
+
valid_ranks = [int(r) for r in ranks[:num_candidates] if r is not None]
|
194 |
+
|
195 |
+
# Validate rankings
|
196 |
+
if len(valid_ranks) != num_candidates:
|
197 |
+
return f"⚠️ Not all documents have ranks. Expected {num_candidates}, got {len(valid_ranks)}.", progress_text.value
|
198 |
+
|
199 |
+
if sorted(valid_ranks) != list(range(1, num_candidates + 1)):
|
200 |
+
return "⚠️ Rankings must include all integers from 1 to " + str(num_candidates), progress_text.value
|
201 |
+
|
202 |
+
# Create annotation
|
203 |
+
annotation = {"sample_id": sample_id, "rankings": valid_ranks}
|
204 |
+
|
205 |
+
# Update or add the annotation
|
206 |
+
existing_idx = next((i for i, a in enumerate(results["annotations"]) if a["sample_id"] == sample_id), None)
|
207 |
+
if existing_idx is not None:
|
208 |
+
results["annotations"][existing_idx] = annotation
|
209 |
+
else:
|
210 |
+
results["annotations"].append(annotation)
|
211 |
+
|
212 |
+
# Mark sample as completed
|
213 |
+
completed_samples[sample_id] = True
|
214 |
+
|
215 |
+
# Save to file
|
216 |
+
with open(output_path, "w") as f:
|
217 |
+
json.dump(results, f, indent=2)
|
218 |
+
|
219 |
+
# Update progress
|
220 |
+
progress = f"Progress: {sum(completed_samples.values())}/{len(samples)}"
|
221 |
+
|
222 |
+
return f"✅ Rankings saved successfully! ({sum(completed_samples.values())}/{len(samples)} completed)", progress
|
223 |
+
except Exception as e:
|
224 |
+
import traceback
|
225 |
+
print(traceback.format_exc())
|
226 |
+
return f"Error saving rankings: {str(e)}", progress_text.value
|
227 |
|
228 |
+
# Function to navigate to next sample
|
229 |
+
def next_sample_id(current_id):
|
230 |
+
current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
|
231 |
+
if current_idx == -1:
|
232 |
+
return current_id
|
233 |
+
next_idx = min(current_idx + 1, len(samples) - 1)
|
234 |
+
return samples[next_idx]["id"]
|
235 |
+
|
236 |
+
# Function to navigate to previous sample
|
237 |
+
def prev_sample_id(current_id):
|
238 |
+
current_idx = next((i for i, s in enumerate(samples) if s["id"] == current_id), -1)
|
239 |
+
if current_idx == -1:
|
240 |
+
return current_id
|
241 |
+
prev_idx = max(current_idx - 1, 0)
|
242 |
+
return samples[prev_idx]["id"]
|
243 |
+
|
244 |
+
# Function to save all results
|
245 |
+
def save_results():
|
246 |
+
try:
|
247 |
+
with open(output_path, "w") as f:
|
248 |
+
json.dump(results, f, indent=2)
|
249 |
+
return f"✅ Results saved to {output_path} ({len(results['annotations'])} annotations)"
|
250 |
+
except Exception as e:
|
251 |
+
return f"⚠️ Error saving results file: {str(e)}"
|
252 |
+
|
253 |
+
# Connect validation button
|
254 |
+
validate_btn.click(
|
255 |
+
validate_rankings,
|
256 |
+
inputs=rank_inputs,
|
257 |
+
outputs=validation_text
|
258 |
+
)
|
259 |
+
|
260 |
+
# Connect submission button
|
261 |
submit_btn.click(
|
262 |
+
save_rankings,
|
263 |
+
inputs=[current_sample_id] + rank_inputs,
|
264 |
outputs=[status_box, progress_text]
|
265 |
)
|
266 |
|
267 |
+
# Connect navigation buttons
|
268 |
next_btn.click(
|
269 |
+
next_sample_id,
|
270 |
+
inputs=[current_sample_id],
|
271 |
+
outputs=[current_sample_id]
|
272 |
).then(
|
273 |
load_sample,
|
274 |
inputs=[current_sample_id],
|
275 |
+
outputs=[query_text, progress_text, status_box] +
|
276 |
+
rank_inputs +
|
277 |
+
doc_texts +
|
278 |
+
doc_containers
|
279 |
)
|
280 |
|
281 |
prev_btn.click(
|
282 |
+
prev_sample_id,
|
283 |
+
inputs=[current_sample_id],
|
284 |
+
outputs=[current_sample_id]
|
285 |
).then(
|
286 |
load_sample,
|
287 |
inputs=[current_sample_id],
|
288 |
+
outputs=[query_text, progress_text, status_box] +
|
289 |
+
rank_inputs +
|
290 |
+
doc_texts +
|
291 |
+
doc_containers
|
292 |
)
|
293 |
|
294 |
+
# Connect save button
|
295 |
save_btn.click(save_results, outputs=[status_box])
|
296 |
|
297 |
+
# Initialize interface with first sample
|
298 |
+
demo.load(
|
299 |
+
lambda: load_sample(samples[0]['id']),
|
300 |
+
outputs=[query_text, progress_text, status_box] +
|
301 |
+
rank_inputs +
|
302 |
+
doc_texts +
|
303 |
+
doc_containers
|
304 |
+
)
|
305 |
+
|
306 |
+
# Add CSS styling
|
307 |
+
demo.load(lambda: gr.Accordion.update(open=True), outputs=[])
|
308 |
|
309 |
return demo
|
310 |
|