manu commited on
Commit
0a4c43d
·
verified ·
1 Parent(s): 7227c5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -192,8 +192,8 @@ You are a PDF research agent with a single tool: mcp_test_search(query: string,
192
  Act iteratively:
193
  1) Split the user question into 1–4 focused sub-queries. Subqueries should be asked as natural language questions in the english language, not just keywords.
194
  2) For each sub-query, call mcp_test_search (k=5 by default; increase to up to 10 if you need to go deep).
195
- 3) You will receive the output of mcp_test_search as a list of indices corresponding to page numbers. Stop generating once all the tool calls end. You will later be fed the corresponding pages as images in a follow-up message.
196
- 4) Stop early when confident; otherwise run new search calls using the tool when need be to find additional information. Use up to 5 rounds of iterations and 20 searches in total. If info is missing, try to continue searching using new keywords and queries.
197
 
198
  Workflow:
199
  • Use ONLY the provided images for grounding and cite as (p.<page>).
@@ -282,12 +282,12 @@ def stream_agent(question: str,
282
  if round_idx == 1:
283
  parts.append({"type": "input_text", "text": question})
284
  else:
285
- parts.append({"type": "input_text", "text": "Continue reasoning with the newly attached pages."})
286
 
287
  parts += _build_image_parts_from_indices(attached_indices)
288
  if attached_indices:
289
  pages_str = ", ".join(str(i + 1) for i in sorted(set(attached_indices)))
290
- parts.append({"type": "input_text", "text": f"(Attached pages: {pages_str}). Use ONLY these images; cite as (p.X)."})
291
 
292
  # First call includes system; follow-ups use previous_response_id
293
  if prev_response_id:
@@ -340,14 +340,12 @@ def stream_agent(question: str,
340
  elif etype in ("response.mcp_call_arguments.delta", "response.tool_call_arguments.delta"):
341
  delta = getattr(event, "delta", None)
342
  if delta:
343
- log_lines.append(str(delta))
344
  round_state["summary_text"] += "\nQuery call: " + event.delta + "\n"
345
  yield round_state["final_text"] or " ", round_state["summary_text"] or " ", "\n".join(log_lines[-400:])
346
 
347
  # Capture tool RESULT text and try to parse indices
348
  elif etype.startswith("response.output_item.done"):
349
- print("A tool output was detected")
350
- print(event)
351
  delta_text = getattr(event.item, "output", "")
352
  if delta_text:
353
  tool_result_buffer += str(delta_text)
@@ -356,6 +354,8 @@ def stream_agent(question: str,
356
  round_state["last_search_indices"] += parsed_now
357
  log_lines.append(f"[tool-result] indices={parsed_now}")
358
  yield round_state["final_text"] or " ", round_state["summary_text"] or " ", "\n".join(log_lines[-400:])
 
 
359
 
360
  # Finalize this response; remember ID for follow-ups
361
  _final = stream.get_final_response()
@@ -517,7 +517,7 @@ def build_ui():
517
  index_url_btn.click(handle_url, inputs=[url_box], outputs=[status_box, pdf_view])
518
 
519
  # ---- Tab 2: Ask (Direct — returns indices)
520
- with gr.Tab("2) Ask (Direct — returns indices)"):
521
  with gr.Row():
522
  with gr.Column(scale=1):
523
  query_box = gr.Textbox(placeholder="Enter your question…", label="Query", lines=4)
@@ -530,7 +530,7 @@ def build_ui():
530
  search_button.click(search, inputs=[query_box, k_slider], outputs=[output_text])
531
 
532
  # ---- Tab 3: Agent (Streaming)
533
- with gr.Tab("3) Agent (Streaming)"):
534
  with gr.Row(equal_height=True):
535
  with gr.Column(scale=1):
536
  with gr.Group():
@@ -576,9 +576,9 @@ def build_ui():
576
  )
577
 
578
  with gr.Column(scale=3):
579
- with gr.Tab("Answer (Markdown)"):
580
  final_md = gr.Markdown(value="", elem_classes=["card", "markdown-wrap"])
581
- with gr.Tab("Live Summary (Markdown)"):
582
  summary_md = gr.Markdown(value="", elem_classes=["card", "summary-wrap"])
583
  with gr.Tab("Event Log"):
584
  log_md = gr.Markdown(value="", elem_classes=["card", "log-box"])
 
192
  Act iteratively:
193
  1) Split the user question into 1–4 focused sub-queries. Subqueries should be asked as natural language questions in the english language, not just keywords.
194
  2) For each sub-query, call mcp_test_search (k=5 by default; increase to up to 10 if you need to go deep).
195
+ 3) You will receive the output of mcp_test_search as a list of indices corresponding to page numbers. Stop generating once all the tool calls end. You will be fed the corresponding pages as images in a follow-up message.
196
+ 4) Stop early when confident; otherwise run new search calls using the tool to find additional missing information. Use up to 5 rounds of iterations and 20 searches in total. If info is missing, try to continue searching using new keywords and queries.
197
 
198
  Workflow:
199
  • Use ONLY the provided images for grounding and cite as (p.<page>).
 
282
  if round_idx == 1:
283
  parts.append({"type": "input_text", "text": question})
284
  else:
285
+ parts.append({"type": "input_text", "text": "Continue reasoning with the newly attached pages. Remember you can ask further questions to the search tool."})
286
 
287
  parts += _build_image_parts_from_indices(attached_indices)
288
  if attached_indices:
289
  pages_str = ", ".join(str(i + 1) for i in sorted(set(attached_indices)))
290
+ parts.append({"type": "input_text", "text": f"(Attached pages from round {round_idx}: {pages_str}). Ground your answer in these images, or query for new pages."})
291
 
292
  # First call includes system; follow-ups use previous_response_id
293
  if prev_response_id:
 
340
  elif etype in ("response.mcp_call_arguments.delta", "response.tool_call_arguments.delta"):
341
  delta = getattr(event, "delta", None)
342
  if delta:
343
+ log_lines.append("[call] " + str(delta))
344
  round_state["summary_text"] += "\nQuery call: " + event.delta + "\n"
345
  yield round_state["final_text"] or " ", round_state["summary_text"] or " ", "\n".join(log_lines[-400:])
346
 
347
  # Capture tool RESULT text and try to parse indices
348
  elif etype.startswith("response.output_item.done"):
 
 
349
  delta_text = getattr(event.item, "output", "")
350
  if delta_text:
351
  tool_result_buffer += str(delta_text)
 
354
  round_state["last_search_indices"] += parsed_now
355
  log_lines.append(f"[tool-result] indices={parsed_now}")
356
  yield round_state["final_text"] or " ", round_state["summary_text"] or " ", "\n".join(log_lines[-400:])
357
+ else:
358
+ print(etype)
359
 
360
  # Finalize this response; remember ID for follow-ups
361
  _final = stream.get_final_response()
 
517
  index_url_btn.click(handle_url, inputs=[url_box], outputs=[status_box, pdf_view])
518
 
519
  # ---- Tab 2: Ask (Direct — returns indices)
520
+ with gr.Tab("2) Direct Search"):
521
  with gr.Row():
522
  with gr.Column(scale=1):
523
  query_box = gr.Textbox(placeholder="Enter your question…", label="Query", lines=4)
 
530
  search_button.click(search, inputs=[query_box, k_slider], outputs=[output_text])
531
 
532
  # ---- Tab 3: Agent (Streaming)
533
+ with gr.Tab("3) Deep Search"):
534
  with gr.Row(equal_height=True):
535
  with gr.Column(scale=1):
536
  with gr.Group():
 
576
  )
577
 
578
  with gr.Column(scale=3):
579
+ with gr.Tab("Answer"):
580
  final_md = gr.Markdown(value="", elem_classes=["card", "markdown-wrap"])
581
+ with gr.Tab("Live Reasoning"):
582
  summary_md = gr.Markdown(value="", elem_classes=["card", "summary-wrap"])
583
  with gr.Tab("Event Log"):
584
  log_md = gr.Markdown(value="", elem_classes=["card", "log-box"])