Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -192,8 +192,8 @@ You are a PDF research agent with a single tool: mcp_test_search(query: string,
|
|
192 |
Act iteratively:
|
193 |
1) Split the user question into 1–4 focused sub-queries. Subqueries should be asked as natural language questions in the english language, not just keywords.
|
194 |
2) For each sub-query, call mcp_test_search (k=5 by default; increase to up to 10 if you need to go deep).
|
195 |
-
3) You will receive the output of mcp_test_search as a list of indices corresponding to page numbers. Stop generating once all the tool calls end. You will
|
196 |
-
4) Stop early when confident; otherwise run new search calls using the tool
|
197 |
|
198 |
Workflow:
|
199 |
• Use ONLY the provided images for grounding and cite as (p.<page>).
|
@@ -282,12 +282,12 @@ def stream_agent(question: str,
|
|
282 |
if round_idx == 1:
|
283 |
parts.append({"type": "input_text", "text": question})
|
284 |
else:
|
285 |
-
parts.append({"type": "input_text", "text": "Continue reasoning with the newly attached pages."})
|
286 |
|
287 |
parts += _build_image_parts_from_indices(attached_indices)
|
288 |
if attached_indices:
|
289 |
pages_str = ", ".join(str(i + 1) for i in sorted(set(attached_indices)))
|
290 |
-
parts.append({"type": "input_text", "text": f"(Attached pages: {pages_str}).
|
291 |
|
292 |
# First call includes system; follow-ups use previous_response_id
|
293 |
if prev_response_id:
|
@@ -340,14 +340,12 @@ def stream_agent(question: str,
|
|
340 |
elif etype in ("response.mcp_call_arguments.delta", "response.tool_call_arguments.delta"):
|
341 |
delta = getattr(event, "delta", None)
|
342 |
if delta:
|
343 |
-
log_lines.append(str(delta))
|
344 |
round_state["summary_text"] += "\nQuery call: " + event.delta + "\n"
|
345 |
yield round_state["final_text"] or " ", round_state["summary_text"] or " ", "\n".join(log_lines[-400:])
|
346 |
|
347 |
# Capture tool RESULT text and try to parse indices
|
348 |
elif etype.startswith("response.output_item.done"):
|
349 |
-
print("A tool output was detected")
|
350 |
-
print(event)
|
351 |
delta_text = getattr(event.item, "output", "")
|
352 |
if delta_text:
|
353 |
tool_result_buffer += str(delta_text)
|
@@ -356,6 +354,8 @@ def stream_agent(question: str,
|
|
356 |
round_state["last_search_indices"] += parsed_now
|
357 |
log_lines.append(f"[tool-result] indices={parsed_now}")
|
358 |
yield round_state["final_text"] or " ", round_state["summary_text"] or " ", "\n".join(log_lines[-400:])
|
|
|
|
|
359 |
|
360 |
# Finalize this response; remember ID for follow-ups
|
361 |
_final = stream.get_final_response()
|
@@ -517,7 +517,7 @@ def build_ui():
|
|
517 |
index_url_btn.click(handle_url, inputs=[url_box], outputs=[status_box, pdf_view])
|
518 |
|
519 |
# ---- Tab 2: Ask (Direct — returns indices)
|
520 |
-
with gr.Tab("2)
|
521 |
with gr.Row():
|
522 |
with gr.Column(scale=1):
|
523 |
query_box = gr.Textbox(placeholder="Enter your question…", label="Query", lines=4)
|
@@ -530,7 +530,7 @@ def build_ui():
|
|
530 |
search_button.click(search, inputs=[query_box, k_slider], outputs=[output_text])
|
531 |
|
532 |
# ---- Tab 3: Agent (Streaming)
|
533 |
-
with gr.Tab("3)
|
534 |
with gr.Row(equal_height=True):
|
535 |
with gr.Column(scale=1):
|
536 |
with gr.Group():
|
@@ -576,9 +576,9 @@ def build_ui():
|
|
576 |
)
|
577 |
|
578 |
with gr.Column(scale=3):
|
579 |
-
with gr.Tab("Answer
|
580 |
final_md = gr.Markdown(value="", elem_classes=["card", "markdown-wrap"])
|
581 |
-
with gr.Tab("Live
|
582 |
summary_md = gr.Markdown(value="", elem_classes=["card", "summary-wrap"])
|
583 |
with gr.Tab("Event Log"):
|
584 |
log_md = gr.Markdown(value="", elem_classes=["card", "log-box"])
|
|
|
192 |
Act iteratively:
|
193 |
1) Split the user question into 1–4 focused sub-queries. Subqueries should be asked as natural language questions in the english language, not just keywords.
|
194 |
2) For each sub-query, call mcp_test_search (k=5 by default; increase to up to 10 if you need to go deep).
|
195 |
+
3) You will receive the output of mcp_test_search as a list of indices corresponding to page numbers. Stop generating once all the tool calls end. You will be fed the corresponding pages as images in a follow-up message.
|
196 |
+
4) Stop early when confident; otherwise run new search calls using the tool to find additional missing information. Use up to 5 rounds of iterations and 20 searches in total. If info is missing, try to continue searching using new keywords and queries.
|
197 |
|
198 |
Workflow:
|
199 |
• Use ONLY the provided images for grounding and cite as (p.<page>).
|
|
|
282 |
if round_idx == 1:
|
283 |
parts.append({"type": "input_text", "text": question})
|
284 |
else:
|
285 |
+
parts.append({"type": "input_text", "text": "Continue reasoning with the newly attached pages. Remember you can ask further questions to the search tool."})
|
286 |
|
287 |
parts += _build_image_parts_from_indices(attached_indices)
|
288 |
if attached_indices:
|
289 |
pages_str = ", ".join(str(i + 1) for i in sorted(set(attached_indices)))
|
290 |
+
parts.append({"type": "input_text", "text": f"(Attached pages from round {round_idx}: {pages_str}). Ground your answer in these images, or query for new pages."})
|
291 |
|
292 |
# First call includes system; follow-ups use previous_response_id
|
293 |
if prev_response_id:
|
|
|
340 |
elif etype in ("response.mcp_call_arguments.delta", "response.tool_call_arguments.delta"):
|
341 |
delta = getattr(event, "delta", None)
|
342 |
if delta:
|
343 |
+
log_lines.append("[call] " + str(delta))
|
344 |
round_state["summary_text"] += "\nQuery call: " + event.delta + "\n"
|
345 |
yield round_state["final_text"] or " ", round_state["summary_text"] or " ", "\n".join(log_lines[-400:])
|
346 |
|
347 |
# Capture tool RESULT text and try to parse indices
|
348 |
elif etype.startswith("response.output_item.done"):
|
|
|
|
|
349 |
delta_text = getattr(event.item, "output", "")
|
350 |
if delta_text:
|
351 |
tool_result_buffer += str(delta_text)
|
|
|
354 |
round_state["last_search_indices"] += parsed_now
|
355 |
log_lines.append(f"[tool-result] indices={parsed_now}")
|
356 |
yield round_state["final_text"] or " ", round_state["summary_text"] or " ", "\n".join(log_lines[-400:])
|
357 |
+
else:
|
358 |
+
print(etype)
|
359 |
|
360 |
# Finalize this response; remember ID for follow-ups
|
361 |
_final = stream.get_final_response()
|
|
|
517 |
index_url_btn.click(handle_url, inputs=[url_box], outputs=[status_box, pdf_view])
|
518 |
|
519 |
# ---- Tab 2: Ask (Direct — returns indices)
|
520 |
+
with gr.Tab("2) Direct Search"):
|
521 |
with gr.Row():
|
522 |
with gr.Column(scale=1):
|
523 |
query_box = gr.Textbox(placeholder="Enter your question…", label="Query", lines=4)
|
|
|
530 |
search_button.click(search, inputs=[query_box, k_slider], outputs=[output_text])
|
531 |
|
532 |
# ---- Tab 3: Agent (Streaming)
|
533 |
+
with gr.Tab("3) Deep Search"):
|
534 |
with gr.Row(equal_height=True):
|
535 |
with gr.Column(scale=1):
|
536 |
with gr.Group():
|
|
|
576 |
)
|
577 |
|
578 |
with gr.Column(scale=3):
|
579 |
+
with gr.Tab("Answer"):
|
580 |
final_md = gr.Markdown(value="", elem_classes=["card", "markdown-wrap"])
|
581 |
+
with gr.Tab("Live Reasoning"):
|
582 |
summary_md = gr.Markdown(value="", elem_classes=["card", "summary-wrap"])
|
583 |
with gr.Tab("Event Log"):
|
584 |
log_md = gr.Markdown(value="", elem_classes=["card", "log-box"])
|