onisj commited on
Commit
488dc3e
·
1 Parent(s): 76b50c7

Use free tools only, remove OpenAI dependency

Browse files
app.py CHANGED
@@ -1,96 +1,478 @@
 
 
 
1
  import aiohttp
2
  import asyncio
3
- from graph import graph
4
- from state import JARVISState
5
- from pydantic import BaseModel
6
- from typing import List
7
  import json
8
- import os
 
 
 
 
 
 
 
 
 
9
  from dotenv import load_dotenv
 
 
 
 
 
 
 
 
 
10
 
11
  # Load environment variables
12
  load_dotenv()
13
- # Debug: Verify environment variables
14
- print(f"OPENAI_API_KEY loaded: {'set' if os.getenv('OPENAI_API_KEY') else 'not set'}")
15
- print(f"LANGFUSE_PUBLIC_KEY loaded: {'set' if os.getenv('LANGFUSE_PUBLIC_KEY') else 'not set'}")
16
 
17
- # Verify critical environment variables
18
- required_env_vars = ["OPENAI_API_KEY", "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
19
  for var in required_env_vars:
20
  if not os.getenv(var):
21
  raise ValueError(f"Environment variable {var} is not set")
 
22
 
23
- # Pydantic Models for Submission
24
- class Answer(BaseModel):
25
- task_id: str
26
- submitted_answer: str
27
-
28
- class Submission(BaseModel):
29
- username: str
30
- agent_code: str
31
- answers: List[Answer]
32
-
33
- async def fetch_questions() -> List[dict]:
34
- async with aiohttp.ClientSession() as session:
35
- async with session.get("https://api.gaia-benchmark.com/questions") as resp:
36
- return await resp.json()
37
-
38
- async def download_file(task_id: str, file_path: str) -> bool:
39
- async with aiohttp.ClientSession() as session:
40
- async with session.get(f"https://api.gaia-benchmark.com/files/{task_id}") as resp:
41
- if resp.status == 200:
42
- with open(file_path, "wb") as f:
43
- f.write(await resp.read())
44
- return True
45
- return False
46
-
47
- async def process_question(question: dict) -> Answer:
48
- # Determine file type based on question context
49
- file_type = "jpg" if "image" in question["question"].lower() else "txt"
50
- if "menu" in question["question"].lower() or "report" in question["question"].lower() or "document" in question["question"].lower():
51
- file_type = "pdf" # Prioritize PDF for reports/documents
52
- elif "data" in question["question"].lower():
53
- file_type = "csv"
54
-
55
- file_path = f"temp_{question['task_id']}.{file_type}"
56
- await download_file(question["task_id"], file_path)
57
-
58
- state = JARVISState(
59
- task_id=question["task_id"],
60
- question=question["question"],
61
- tools_needed=[],
62
- web_results=[],
63
- file_results="",
64
- image_results="",
65
- calculation_results="",
66
- document_results="",
67
- messages=[],
68
- answer=""
69
  )
70
- # Use unique thread_id for memory
71
- result = await graph.ainvoke(state, config={"thread_id": question["task_id"]})
72
- return Answer(task_id=question["task_id"], submitted_answer=result["answer"])
73
-
74
- async def submit_answers(answers: List[Answer], username: str, agent_code: str):
75
- submission = Submission(
76
- username=username,
77
- agent_code=agent_code,
78
- answers=answers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  )
80
- async with aiohttp.ClientSession() as session:
81
- async with session.post("https://api.gaia-benchmark.com/submit", json=submission.dict()) as resp:
82
- return await resp.json()
83
-
84
- async def main():
85
- username = "onisj" # Your Hugging Face username
86
- agent_code = "https://huggingface.co/spaces/onisj/jarvis_gaia_agent/tree/main"
87
- questions = await fetch_questions()
88
- answers = []
89
- for question in questions[:20]: # Process 20 questions
90
- answer = await process_question(question)
91
- answers.append(answer)
92
- result = await submit_answers(answers, username, agent_code)
93
- print("Submission result:", json.dumps(result, indent=2))
94
 
95
  if __name__ == "__main__":
96
- asyncio.run(main())
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
  import aiohttp
5
  import asyncio
 
 
 
 
6
  import json
7
+ import nest_asyncio
8
+ from langgraph.graph import StateGraph, END
9
+ from langgraph.checkpoint.memory import MemorySaver
10
+ from langchain_huggingface import HuggingFacePipeline
11
+ from transformers import pipeline
12
+ from langchain_core.messages import SystemMessage, HumanMessage
13
+ from tools import search_tool, multi_hop_search_tool, file_parser_tool, image_parser_tool, calculator_tool, document_retriever_tool
14
+ from tools.search import initialize_search_tools
15
+ from state import JARVISState
16
+ import pandas as pd
17
  from dotenv import load_dotenv
18
+ import logging
19
+ from langfuse.callback import CallbackHandler
20
+
21
+ # Set up logging
22
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Apply nest_asyncio
26
+ nest_asyncio.apply()
27
 
28
  # Load environment variables
29
  load_dotenv()
 
 
 
30
 
31
+ # Verify environment variables
32
+ required_env_vars = ["SPACE_ID", "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
33
  for var in required_env_vars:
34
  if not os.getenv(var):
35
  raise ValueError(f"Environment variable {var} is not set")
36
+ logger.info(f"Environment variables loaded: SPACE_ID={os.getenv('SPACE_ID')[:10]}..., LANGFUSE_HOST={os.getenv('LANGFUSE_HOST', 'https://cloud.langfuse.com')}")
37
 
38
+ # Initialize Hugging Face model
39
+ try:
40
+ hf_pipeline = pipeline(
41
+ "text-generation",
42
+ model="mistralai/Mixtral-7B-Instruct-v0.1",
43
+ device_map="auto",
44
+ max_new_tokens=512,
45
+ do_sample=True,
46
+ temperature=0.7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  )
48
+ llm = HuggingFacePipeline(pipeline=hf_pipeline)
49
+ logger.info("HuggingFace model initialized: mistralai/Mixtral-7B-Instruct-v0.1")
50
+ except Exception as e:
51
+ logger.error(f"Failed to initialize HuggingFace model: {e}")
52
+ llm = None
53
+
54
+ # Initialize search tools with LLM
55
+ try:
56
+ initialize_search_tools(llm)
57
+ logger.info("Search tools initialized")
58
+ except Exception as e:
59
+ logger.error(f"Failed to initialize search tools: {e}")
60
+
61
+ # Initialize Langfuse
62
+ try:
63
+ langfuse = CallbackHandler(
64
+ public_key=os.getenv("LANGFUSE_PUBLIC_KEY"),
65
+ secret_key=os.getenv("LANGFUSE_SECRET_KEY"),
66
+ host=os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
67
+ )
68
+ logger.info("Langfuse initialized successfully")
69
+ except Exception as e:
70
+ logger.warning(f"Failed to initialize Langfuse: {e}")
71
+ langfuse = None
72
+
73
+ # Initialize MemorySaver
74
+ memory = MemorySaver()
75
+ use_checkpointing = True
76
+
77
+ # --- Constants ---
78
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space/api"
79
+ GAIA_FILE_URL = "https://api.gaia-benchmark.com/files/"
80
+
81
+ # --- Helper Functions ---
82
+ def log_state(task_id: str, state: JARVISState):
83
+ """Log intermediate state to state_log.json"""
84
+ try:
85
+ log_entry = {
86
+ "task_id": task_id,
87
+ "question": state["question"],
88
+ "tools_needed": state["tools_needed"],
89
+ "web_results": state["web_results"],
90
+ "file_results": state["file_results"],
91
+ "image_results": state["image_results"],
92
+ "calculation_results": state["calculation_results"],
93
+ "document_results": state["document_results"],
94
+ "answer": state["answer"]
95
+ }
96
+ with open("state_log.json", "a") as f:
97
+ json.dump(log_entry, f, indent=2)
98
+ f.write("\n")
99
+ except Exception as e:
100
+ logger.error(f"Error logging state for task {task_id}: {e}")
101
+
102
+ async def test_gaia_api(task_id: str) -> bool:
103
+ """Test connectivity to GAIA file API"""
104
+ try:
105
+ async with aiohttp.ClientSession() as session:
106
+ async with session.head(f"{GAIA_FILE_URL}{task_id}", timeout=5) as resp:
107
+ return resp.status in [200, 403, 404]
108
+ except Exception as e:
109
+ logger.warning(f"GAIA API test failed: {e}")
110
+ return False
111
+
112
+ # --- Node Functions ---
113
+ async def parse_question(state: JARVISState) -> JARVISState:
114
+ try:
115
+ question = state["question"]
116
+ prompt = f"""Analyze this GAIA question: {question}
117
+ Determine which tools are needed (web_search, multi_hop_search, file_parser, image_parser, calculator, document_retriever).
118
+ Return a JSON list of tool names."""
119
+ if llm:
120
+ response = await llm.ainvoke(prompt, config={"callbacks": [langfuse] if langfuse else []})
121
+ try:
122
+ tools_needed = json.loads(response.content)
123
+ except json.JSONDecodeError as je:
124
+ logger.warning(f"Invalid JSON in LLM response for task {state['task_id']}: {je}")
125
+ tools_needed = ["web_search"]
126
+ else:
127
+ logger.warning("No LLM available, using default tools")
128
+ tools_needed = ["web_search"]
129
+ state["tools_needed"] = tools_needed
130
+ log_state(state["task_id"], state)
131
+ return state
132
+ except Exception as e:
133
+ logger.error(f"Error parsing question for task {state['task_id']}: {e}")
134
+ state["tools_needed"] = []
135
+ log_state(state["task_id"], state)
136
+ return state
137
+
138
+ async def tool_dispatcher(state: JARVISState) -> JARVISState:
139
+ try:
140
+ tools_needed = state["tools_needed"]
141
+ updated_state = state.copy()
142
+ can_download_files = await test_gaia_api(updated_state["task_id"])
143
+
144
+ for tool in tools_needed:
145
+ try:
146
+ if tool == "web_search" or tool == "multi_hop_search":
147
+ result = await web_search_agent(updated_state)
148
+ updated_state["web_results"].extend(result["web_results"])
149
+ elif tool == "file_parser" and can_download_files:
150
+ result = await file_parser_agent(updated_state)
151
+ updated_state["file_results"] = result["file_results"]
152
+ elif tool == "image_parser" and can_download_files:
153
+ result = await image_parser_agent(updated_state)
154
+ updated_state["image_results"] = result["image_results"]
155
+ elif tool == "calculator":
156
+ result = await calculator_agent(updated_state)
157
+ updated_state["calculation_results"] = result["calculation_results"]
158
+ elif tool == "document_retriever" and can_download_files:
159
+ result = await document_retriever_agent(updated_state)
160
+ updated_state["document_results"] = result["document_results"]
161
+ except Exception as e:
162
+ logger.warning(f"Error in tool {tool} for task {updated_state['task_id']}: {e}")
163
+
164
+ log_state(updated_state["task_id"], updated_state)
165
+ return updated_state
166
+ except Exception as e:
167
+ logger.error(f"Error in tool dispatcher for task {state['task_id']}: {e}")
168
+ log_state(state["task_id"], state)
169
+ return state
170
+
171
+ async def web_search_agent(state: JARVISState) -> JARVISState:
172
+ try:
173
+ results = []
174
+ if "web_search" in state["tools_needed"]:
175
+ result = await search_tool.invoke({"query": state["question"]})
176
+ results.append(result)
177
+ if "multi_hop_search" in state["tools_needed"]:
178
+ result = await multi_hop_search_tool.invoke({"query": state["question"], "steps": 3})
179
+ results.append(result)
180
+ return {"web_results": results}
181
+ except Exception as e:
182
+ logger.error(f"Error in web search for task {state['task_id']}: {e}")
183
+ return {"web_results": []}
184
+
185
+ async def file_parser_agent(state: JARVISState) -> JARVISState:
186
+ try:
187
+ if "file_parser" in state["tools_needed"]:
188
+ file_type = "csv" if "data" in state["question"].lower() else "txt"
189
+ result = await file_parser_tool.aparse(state["task_id"], file_type=file_type)
190
+ return {"file_results": result}
191
+ return {"file_results": ""}
192
+ except Exception as e:
193
+ logger.error(f"Error in file parser for task {state['task_id']}: {e}")
194
+ return {"file_results": "File parsing failed"}
195
+
196
+ async def image_parser_agent(state: JARVISState) -> JARVISState:
197
+ try:
198
+ if "image_parser" in state["tools_needed"]:
199
+ task = "match" if "fruits" in state["question"].lower() else "describe"
200
+ match_query = "fruits" if task == "match" else ""
201
+ file_path = f"temp_{state['task_id']}.jpg"
202
+ if not os.path.exists(file_path):
203
+ logger.warning(f"Image file not found for task {state['task_id']}")
204
+ return {"image_results": "Image file not found"}
205
+ result = await image_parser_tool.aparse(
206
+ file_path, task=task, match_query=match_query
207
+ )
208
+ return {"image_results": result}
209
+ return {"image_results": ""}
210
+ except Exception as e:
211
+ logger.error(f"Error in image parser for task {state['task_id']}: {e}")
212
+ return {"image_results": "Image parsing failed"}
213
+
214
+ async def calculator_agent(state: JARVISState) -> JARVISState:
215
+ try:
216
+ if "calculator" in state["tools_needed"]:
217
+ prompt = f"Extract a mathematical expression from: {state['question']}\n{state['file_results']}"
218
+ if llm:
219
+ response = await llm.ainvoke(prompt, config={"callbacks": [langfuse] if langfuse else []})
220
+ expression = response.content
221
+ else:
222
+ expression = "0"
223
+ result = await calculator_tool.aparse(expression)
224
+ return {"calculation_results": result}
225
+ return {"calculation_results": ""}
226
+ except Exception as e:
227
+ logger.error(f"Error in calculator for task {state['task_id']}: {e}")
228
+ return {"calculation_results": "Calculation failed"}
229
+
230
+ async def document_retriever_agent(state: JARVISState) -> JARVISState:
231
+ try:
232
+ if "document_retriever" in state["tools_needed"]:
233
+ file_type = "txt" if "menu" in state["question"].lower() else "csv"
234
+ if "report" in state["question"].lower() or "document" in state["question"].lower():
235
+ file_type = "pdf"
236
+ result = await document_retriever_tool.aparse(
237
+ state["task_id"], state["question"], file_type=file_type
238
+ )
239
+ return {"document_results": result}
240
+ return {"document_results": ""}
241
+ except Exception as e:
242
+ logger.error(f"Error in document retriever for task {state['task_id']}: {e}")
243
+ return {"document_results": "Document retrieval failed"}
244
+
245
+ async def reasoning_agent(state: JARVISState) -> JARVISState:
246
+ try:
247
+ prompt = f"""Question: {state['question']}
248
+ Web Results: {state['web_results']}
249
+ File Results: {state['file_results']}
250
+ Image Results: {state['image_results']}
251
+ Calculation Results: {state['calculation_results']}
252
+ Document Results: {state['document_results']}
253
+ Synthesize an exact-match answer for the GAIA benchmark.
254
+ Output only the answer (e.g., '90', 'White;5876')."""
255
+ if llm:
256
+ response = await llm.ainvoke(
257
+ [
258
+ SystemMessage(content="You are JARVIS, a precise assistant for the GAIA benchmark. Provide exact answers only."),
259
+ HumanMessage(content=prompt)
260
+ ],
261
+ config={"callbacks": [langfuse] if langfuse else []}
262
+ )
263
+ answer = response.content.strip()
264
+ else:
265
+ answer = "Unknown"
266
+ state["answer"] = answer
267
+ log_state(state["task_id"], state)
268
+ return state
269
+ except Exception as e:
270
+ logger.error(f"Error in reasoning for task {state['task_id']}: {e}")
271
+ state["answer"] = "Error in reasoning"
272
+ log_state(state["task_id"], state)
273
+ return state
274
+
275
+ def router(state: JARVISState) -> str:
276
+ if state["tools_needed"]:
277
+ return "tool_dispatcher"
278
+ return "reasoning"
279
+
280
+ # --- Define StateGraph ---
281
+ workflow = StateGraph(JARVISState)
282
+ workflow.add_node("parse", parse_question)
283
+ workflow.add_node("tool_dispatcher", tool_dispatcher)
284
+ workflow.add_node("reasoning", reasoning_agent)
285
+
286
+ workflow.set_entry_point("parse")
287
+ workflow.add_conditional_edges(
288
+ "parse",
289
+ router,
290
+ {
291
+ "tool_dispatcher": "tool_dispatcher",
292
+ "reasoning": "reasoning"
293
+ }
294
+ )
295
+ workflow.add_edge("tool_dispatcher", "reasoning")
296
+ workflow.add_edge("reasoning", END)
297
+
298
+ # Compile graph
299
+ graph = workflow.compile(checkpointer=memory if use_checkpointing else None)
300
+
301
+ # --- Basic Agent Definition ---
302
+ class BasicAgent:
303
+ def __init__(self):
304
+ logger.info("BasicAgent initialized.")
305
+
306
+ async def process_question(self, task_id: str, question: str) -> str:
307
+ file_type = "jpg" if "image" in question.lower() else "txt"
308
+ if "menu" in question.lower() or "report" in question.lower() or "document" in question.lower():
309
+ file_type = "pdf"
310
+ elif "data" in question.lower():
311
+ file_type = "csv"
312
+
313
+ file_path = f"temp_{task_id}.{file_type}"
314
+ if await test_gaia_api(task_id):
315
+ try:
316
+ async with aiohttp.ClientSession() as session:
317
+ async with session.get(f"{GAIA_FILE_URL}{task_id}") as resp:
318
+ if resp.status == 200:
319
+ with open(file_path, "wb") as f:
320
+ f.write(await resp.read())
321
+ else:
322
+ logger.warning(f"Failed to download file for task {task_id}: HTTP {resp.status}")
323
+ except Exception as e:
324
+ logger.error(f"Error downloading file for task {task_id}: {e}")
325
+
326
+ state = JARVISState(
327
+ task_id=task_id,
328
+ question=question,
329
+ tools_needed=[],
330
+ web_results=[],
331
+ file_results="",
332
+ image_results="",
333
+ calculation_results="",
334
+ document_results="",
335
+ messages=[],
336
+ answer=""
337
+ )
338
+ try:
339
+ config = {"configurable": {"thread_id": task_id}} if use_checkpointing else {}
340
+ result = await graph.ainvoke(state, config=config)
341
+ return result["answer"] or "No answer generated"
342
+ except Exception as e:
343
+ logger.error(f"Error processing task {task_id}: {e}")
344
+ return f"Error: {str(e)}"
345
+ finally:
346
+ if os.path.exists(file_path):
347
+ try:
348
+ os.remove(file_path)
349
+ except Exception as e:
350
+ logger.error(f"Error removing file {file_path}: {e}")
351
+
352
+ async def async_call(self, question: str, task_id: str) -> str:
353
+ return await self.process_question(task_id, question)
354
+
355
+ def __call__(self, question: str, task_id: str = None) -> str:
356
+ logger.info(f"Agent received question (first 50 chars): {question[:50]}...")
357
+ if task_id is None:
358
+ logger.warning("task_id not provided, using placeholder")
359
+ task_id = "placeholder_task_id"
360
+ try:
361
+ try:
362
+ loop = asyncio.get_event_loop()
363
+ except RuntimeError:
364
+ loop = asyncio.new_event_loop()
365
+ asyncio.set_event_loop(loop)
366
+ return loop.run_until_complete(self.async_call(question, task_id))
367
+ finally:
368
+ pass
369
+
370
+ # --- Main Function ---
371
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
372
+ space_id = os.getenv("SPACE_ID")
373
+ if not profile:
374
+ logger.error("User not logged in.")
375
+ return "Please Login to Hugging Face with the button.", None
376
+ username = f"{profile.username}"
377
+ logger.info(f"User logged in: {username}")
378
+
379
+ api_url = DEFAULT_API_URL
380
+ questions_url = f"{api_url}/questions"
381
+ submit_url = f"{api_url}/submit"
382
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
383
+
384
+ try:
385
+ agent = BasicAgent()
386
+ except Exception as e:
387
+ logger.error(f"Error instantiating agent: {e}")
388
+ return f"Error initializing agent: {e}", None
389
+
390
+ logger.info(f"Fetching questions from: {questions_url}")
391
+ try:
392
+ response = requests.get(questions_url, timeout=15)
393
+ response.raise_for_status()
394
+ questions_data = response.json()
395
+ if not questions_data:
396
+ logger.error("Fetched questions list is empty.")
397
+ return "Fetched questions list is empty or invalid format.", None
398
+ logger.info(f"Fetched {len(questions_data)} questions.")
399
+ except Exception as e:
400
+ logger.error(f"Error fetching questions: {e}")
401
+ return f"Error fetching questions: {e}", None
402
+
403
+ results_log = []
404
+ answers_payload = []
405
+ logger.info(f"Running agent on {len(questions_data)} questions...")
406
+ for item in questions_data:
407
+ task_id = item.get("task_id")
408
+ question_text = item.get("question")
409
+ if not task_id or question_text is None:
410
+ logger.warning(f"Skipping item with missing task_id or question: {item}")
411
+ continue
412
+ try:
413
+ submitted_answer = agent(question_text, task_id)
414
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
415
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
416
+ except Exception as e:
417
+ logger.error(f"Error running agent on task {task_id}: {e}")
418
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
419
+
420
+ if not answers_payload:
421
+ logger.error("Agent did not produce any answers to submit.")
422
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
423
+
424
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
425
+ logger.info(f"Submitting {len(answers_payload)} answers to: {submit_url}")
426
+ try:
427
+ response = requests.post(submit_url, json=submission_data, timeout=120)
428
+ response.raise_for_status()
429
+ result_data = response.json()
430
+ logger.info(f"Server response: {result_data}")
431
+ final_status = (
432
+ f"Submission Successful!\n"
433
+ f"User: {result_data.get('username')}\n"
434
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
435
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
436
+ f"Message: {result_data.get('message', 'No message received.')}"
437
+ )
438
+ results_df = pd.DataFrame(results_log)
439
+ return final_status, results_df
440
+ except Exception as e:
441
+ logger.error(f"Submission failed: {e}")
442
+ results_df = pd.DataFrame(results_log)
443
+ return f"Submission Failed: {e}", results_df
444
+
445
+ # --- Build Gradio Interface ---
446
+ with gr.Blocks() as demo:
447
+ gr.Markdown("# JARVIS Agent Evaluation Runner")
448
+ gr.Markdown(
449
+ """
450
+ **Instructions:**
451
+
452
+ 1. Log in to your Hugging Face account using the button below.
453
+ 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the JARVIS agent, and submit answers.
454
+
455
+ ---
456
+ **Disclaimers:**
457
+ The agent uses a local Hugging Face model (Mixtral-7B) and async tools for the GAIA benchmark.
458
+ """
459
+ )
460
+
461
+ gr.LoginButton()
462
+
463
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
464
+
465
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
466
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
467
+
468
+ run_button.click(
469
+ fn=run_and_submit_all,
470
+ outputs=[status_output, results_table]
471
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
  if __name__ == "__main__":
474
+ logger.info("\n" + "-"*30 + " App Starting " + "-"*30)
475
+ space_id = os.getenv("SPACE_ID")
476
+ logger.info(f"SPACE_ID: {space_id}")
477
+ logger.info("Launching Gradio Interface...")
478
+ demo.launch(debug=True, share=False)
requirements.txt CHANGED
@@ -1,97 +1,89 @@
1
- aiohappyeyeballs==2.6.1
2
- aiohttp==3.12.2
3
- aiosignal==1.3.2
4
  annotated-types==0.7.0
5
- anyio==4.9.0
6
- attrs==25.3.0
7
  backoff==2.2.1
8
- certifi==2025.4.26
9
- charset-normalizer==3.4.2
10
- click==8.2.1
11
  dataclasses-json==0.6.7
12
  distro==1.9.0
13
- duckduckgo_search==8.0.2
14
- filelock==3.18.0
15
- frozenlist==1.6.0
16
- fsspec==2025.5.1
17
- greenlet==3.2.2
18
- h11==0.16.0
19
- hf-xet==1.1.2
20
- httpcore==1.0.9
21
- httpx==0.28.1
22
  httpx-sse==0.4.0
23
- huggingface-hub==0.24.5
24
- idna==3.10
25
- Jinja2==3.1.6
26
- jiter==0.10.0
27
- joblib==1.5.1
28
  jsonpatch==1.33
29
  jsonpointer==3.0.0
30
- langchain==0.3.25
31
- langchain-community==0.3.24
32
- langchain-core==0.3.62
33
- langchain-openai==0.2.0
34
- langchain-text-splitters==0.3.8
35
- langfuse==2.44.0
36
- langgraph==0.4.7
37
- langgraph-checkpoint==2.0.26
38
- langgraph-prebuilt==0.2.1
39
- langgraph-sdk==0.1.70
40
- langsmith==0.1.147
41
- lxml==5.4.0
42
  markdown-it-py==3.0.0
43
- MarkupSafe==3.0.2
44
- marshmallow==3.26.1
45
  mdurl==0.1.2
46
  mpmath==1.3.0
47
- msgpack==1.1.0
48
- multidict==6.4.4
49
- mypy_extensions==1.1.0
50
- networkx==3.4.2
51
  numpy==1.26.4
52
- openai==1.40.0
53
- orjson==3.10.18
54
- ormsgpack==1.10.0
55
  packaging==23.2
56
- pandas==2.2.3
57
- pillow==11.0.0
58
  primp==0.15.0
59
- propcache==0.3.1
60
  pydantic==2.8.2
61
- pydantic-settings==2.9.1
62
  pydantic_core==2.20.1
63
- Pygments==2.19.1
64
  PyPDF2==3.0.1
65
  pytesseract==0.3.10
66
  python-dateutil==2.9.0.post0
67
  python-dotenv==1.0.1
68
- pytz==2025.2
69
- PyYAML==6.0.2
70
- regex==2024.11.6
71
  requests==2.32.3
72
  requests-toolbelt==1.0.0
73
- rich==14.0.0
74
- safetensors==0.5.3
75
- scikit-learn==1.6.1
76
- scipy==1.15.3
77
  sentence-transformers==3.0.1
78
- six==1.17.0
79
- smolagents==1.17.0
80
  sniffio==1.3.1
81
- SQLAlchemy==2.0.41
82
- sympy==1.14.0
83
  tenacity==8.5.0
84
- threadpoolctl==3.6.0
85
- tiktoken==0.9.0
86
  tokenizers==0.19.1
87
  torch==2.2.2
88
- tqdm==4.67.1
89
  transformers==4.42.4
90
  typing-inspect==0.9.0
91
- typing-inspection==0.4.1
92
- typing_extensions==4.13.2
93
- tzdata==2025.2
94
- urllib3==2.4.0
95
- wrapt==1.17.2
96
- xxhash==3.5.0
97
- yarl==1.20.0
 
1
+ aiohttp==3.8.6
2
+ aiosignal==1.3.1
 
3
  annotated-types==0.7.0
4
+ anyio==4.4.0
5
+ attrs==23.2.0
6
  backoff==2.2.1
7
+ certifi==2024.7.4
8
+ charset-normalizer==3.3.2
9
+ click==8.1.7
10
  dataclasses-json==0.6.7
11
  distro==1.9.0
12
+ duckduckgo_search==6.2.4
13
+ filelock==3.15.4
14
+ frozenlist==1.4.1
15
+ fsspec==2024.6.1
16
+ greenlet==3.0.3
17
+ h11==0.14.0
18
+ httpcore==1.0.5
19
+ httpx==0.27.0
 
20
  httpx-sse==0.4.0
21
+ huggingface-hub==0.23.4
22
+ idna==3.7
23
+ Jinja2==3.1.4
24
+ jiter==0.5.0
25
+ joblib==1.4.2
26
  jsonpatch==1.33
27
  jsonpointer==3.0.0
28
+ langchain==0.2.11
29
+ langchain-community==0.2.10
30
+ langchain-core==0.2.23
31
+ langchain-openai==0.1.17
32
+ langchain-text-splitters==0.2.2
33
+ langfuse==2.36.1
34
+ langgraph==0.1.15
35
+ langgraph-checkpoint==1.0.2
36
+ langsmith==0.1.93
37
+ lxml==5.2.2
 
 
38
  markdown-it-py==3.0.0
39
+ MarkupSafe==2.1.5
40
+ marshmallow==3.21.3
41
  mdurl==0.1.2
42
  mpmath==1.3.0
43
+ msgpack==1.0.8
44
+ multidict==6.0.5
45
+ mypy_extensions==1.0.0
46
+ networkx==3.3
47
  numpy==1.26.4
48
+ openai==1.35.13
49
+ orjson==3.10.6
 
50
  packaging==23.2
51
+ pandas==2.2.2
52
+ pillow==10.4.0
53
  primp==0.15.0
 
54
  pydantic==2.8.2
 
55
  pydantic_core==2.20.1
56
+ Pygments==2.18.0
57
  PyPDF2==3.0.1
58
  pytesseract==0.3.10
59
  python-dateutil==2.9.0.post0
60
  python-dotenv==1.0.1
61
+ pytz==2024.1
62
+ PyYAML==6.0.1
63
+ regex==2024.7.24
64
  requests==2.32.3
65
  requests-toolbelt==1.0.0
66
+ rich==13.7.1
67
+ safetensors==0.4.3
68
+ scikit-learn==1.5.1
69
+ scipy==1.14.0
70
  sentence-transformers==3.0.1
71
+ six==1.16.0
 
72
  sniffio==1.3.1
73
+ SQLAlchemy==2.0.31
74
+ sympy==1.13.1
75
  tenacity==8.5.0
76
+ threadpoolctl==3.5.0
77
+ tiktoken==0.7.0
78
  tokenizers==0.19.1
79
  torch==2.2.2
80
+ tqdm==4.66.4
81
  transformers==4.42.4
82
  typing-inspect==0.9.0
83
+ typing_extensions==4.12.2
84
+ tzdata==2024.1
85
+ urllib3==2.2.2
86
+ wrapt==1.16.0
87
+ xxhash==3.4.1
88
+ yarl==1.9.4
89
+ gradio[oauth]==4.44.1
tools/__init__.py CHANGED
@@ -2,4 +2,4 @@ from .search import search_tool, multi_hop_search_tool
2
  from .file_parser import file_parser_tool
3
  from .image_parser import image_parser_tool
4
  from .calculator import calculator_tool
5
- from .retriever import document_retriever_tool
 
2
  from .file_parser import file_parser_tool
3
  from .image_parser import image_parser_tool
4
  from .calculator import calculator_tool
5
+ from .document_retriever import document_retriever_tool
tools/calculator.py CHANGED
@@ -1,20 +1,15 @@
1
- import ast
2
- from typing import Dict
 
3
 
4
- class CalculatorTool:
5
- def __init__(self):
6
- self.name = "calculator"
7
- self.description = "Evaluates mathematical expressions."
8
- self.inputs = {
9
- "expression": {"type": "string", "description": "Mathematical expression to evaluate"}
10
- }
11
- self.output_type = str
12
 
13
- async def aparse(self, expression: str) -> str:
14
- try:
15
- result = eval(expression, {"__builtins__": {}}, {"abs": abs, "round": round})
16
- return str(result)
17
- except Exception as e:
18
- return f"Error calculating expression: {str(e)}"
19
-
20
- calculator_tool = CalculatorTool()
 
 
1
+ from langchain_core.tools import tool
2
+ from sympy import sympify
3
+ import logging
4
 
5
+ logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
6
 
7
+ @tool
8
+ async def calculator_tool(expression: str) -> str:
9
+ """Evaluate a mathematical expression"""
10
+ try:
11
+ result = sympify(expression)
12
+ return str(result)
13
+ except Exception as e:
14
+ logger.error(f"Error evaluating expression '{expression}': {e}")
15
+ return f"Error: {str(e)}"
tools/document_retriever.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ from langchain_community.document_loaders import TextLoader, CSVLoader, PyPDFLoader
3
+ import logging
4
+ import os
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ @tool
9
+ async def document_retriever_tool(task_id: str, query: str, file_type: str) -> str:
10
+ """Retrieve content from a document"""
11
+ try:
12
+ file_path = f"temp_{task_id}.{file_type}"
13
+ if not os.path.exists(file_path):
14
+ logger.warning(f"Document not found: {file_path}")
15
+ return "Document not found"
16
+
17
+ if file_type == "txt":
18
+ loader = TextLoader(file_path)
19
+ elif file_type == "csv":
20
+ loader = CSVLoader(file_path)
21
+ elif file_type == "pdf":
22
+ loader = PyPDFLoader(file_path)
23
+ else:
24
+ return f"Unsupported file type: {file_type}"
25
+
26
+ docs = loader.load()
27
+ return "\n".join(doc.page_content for doc in docs)
28
+ except Exception as e:
29
+ logger.error(f"Error retrieving document for task {task_id}: {e}")
30
+ return f"Error: {str(e)}"
tools/file_parser.py CHANGED
@@ -1,38 +1,33 @@
 
1
  import pandas as pd
2
- import requests
 
3
  import os
4
 
5
- class FileParserTool:
6
- def __init__(self):
7
- self.name = "file_parser"
8
- self.description = "Downloads and parses CSV or text files for GAIA tasks."
9
- self.inputs = {
10
- "task_id": {"type": "string", "description": "GAIA task ID"},
11
- "file_type": {"type": "string", "description": "File type (csv, txt, default: csv)"}
12
- }
13
- self.output_type = str
14
 
15
- async def aparse(self, task_id: str, file_type: str = "csv") -> str:
16
- try:
17
- url = f"https://api.gaia-benchmark.com/files/{task_id}"
18
- response = await requests.get(url)
19
- if response.status_code == 200:
20
- file_path = f"temp_{task_id}.{file_type}"
21
- with open(file_path, "wb") as f:
22
- f.write(response.content)
23
- if file_type == "csv":
24
- df = pd.read_csv(file_path)
25
- return df.to_string()
26
- elif file_type == "txt":
27
- with open(file_path, "r") as f:
28
- return f.read()
29
- else:
30
- return f"Unsupported file type: {file_type}"
31
- return f"Error downloading file for task ID {task_id}"
32
- except Exception as e:
33
- return f"Error: {str(e)}"
34
- finally:
35
- if os.path.exists(file_path):
36
- os.remove(file_path)
37
-
38
- file_parser_tool = FileParserTool()
 
 
1
+ from langchain_core.tools import tool
2
  import pandas as pd
3
+ import PyPDF2
4
+ import logging
5
  import os
6
 
7
+ logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
8
 
9
+ @tool
10
+ async def file_parser_tool(task_id: str, file_type: str) -> str:
11
+ """Parse a file based on task_id and file_type"""
12
+ try:
13
+ file_path = f"temp_{task_id}.{file_type}"
14
+ if not os.path.exists(file_path):
15
+ logger.warning(f"File not found: {file_path}")
16
+ return "File not found"
17
+
18
+ if file_type == "csv":
19
+ df = pd.read_csv(file_path)
20
+ return df.to_string()
21
+ elif file_type == "txt":
22
+ with open(file_path, "r", encoding="utf-8") as f:
23
+ return f.read()
24
+ elif file_type == "pdf":
25
+ with open(file_path, "rb") as f:
26
+ reader = PyPDF2.PdfReader(f)
27
+ text = "".join(page.extract_text() for page in reader.pages)
28
+ return text
29
+ else:
30
+ return f"Unsupported file type: {file_type}"
31
+ except Exception as e:
32
+ logger.error(f"Error parsing file for task {task_id}: {e}")
33
+ return f"Error: {str(e)}"
tools/image_parser.py CHANGED
@@ -1,66 +1,26 @@
1
- from langchain_openai import ChatOpenAI
2
- from sentence_transformers import SentenceTransformer, util
3
- import pytesseract
4
- from PIL import Image
5
- import base64
6
  import os
7
- from dotenv import load_dotenv
8
 
9
- # Load environment variables
10
- load_dotenv()
11
- # Debug: Verify OPENAI_API_KEY
12
- if not os.getenv("OPENAI_API_KEY"):
13
- print("Error: OPENAI_API_KEY not loaded in image_parser.py")
14
 
15
- class ImageParserTool:
16
- def __init__(self):
17
- self.name = "image_parser"
18
- self.description = "Analyzes images to extract text, identify objects, or match descriptions."
19
- self.inputs = {
20
- "image_path": {"type": "string", "description": "Path to image file"},
21
- "task": {"type": "string", "description": "Task type (ocr, describe, match)"},
22
- "match_query": {"type": "string", "description": "Query for semantic matching (optional)"}
23
- }
24
- self.output_type = str
25
- api_key = os.getenv("OPENAI_API_KEY")
26
- if not api_key:
27
- raise ValueError("OPENAI_API_KEY environment variable not set")
28
- self.vlm = ChatOpenAI(model="gpt-4o", api_key=api_key)
29
- self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
30
 
31
- async def aparse(self, image_path: str, task: str = "describe", match_query: str = "") -> str:
32
- try:
33
- # Read image
34
- with open(image_path, "rb") as f:
35
- image_data = base64.b64encode(f.read()).decode()
36
- img = Image.open(image_path)
37
-
38
- if task == "ocr":
39
- # Extract text with Tesseract
40
- text = pytesseract.image_to_string(img)
41
- return text if text.strip() else "No text found in image."
42
- elif task == "describe":
43
- # Describe image with VLM
44
- response = await self.vlm.ainvoke([
45
- {"type": "image_url", "image_url": f"data:image/jpeg;base64,{image_data}"},
46
- {"type": "text", "text": "Describe objects in the image in detail."}
47
- ])
48
- return response.content
49
- elif task == "match" and match_query:
50
- # Semantic matching with sentence-transformers
51
- description = await self.vlm.ainvoke([
52
- {"type": "image_url", "image_url": f"data:image/jpeg;base64,{image_data}"},
53
- {"type": "text", "text": "List objects in the image."}
54
- ])
55
- objects = description.content.split(", ")
56
- query_embedding = self.embedder.encode(match_query, convert_to_tensor=True)
57
- object_embeddings = self.embedder.encode(objects, convert_to_tensor=True)
58
- similarities = util.cos_sim(query_embedding, object_embeddings)[0]
59
- best_match = objects[similarities.argmax()]
60
- return f"Best match for '{match_query}': {best_match}"
61
- else:
62
- return "Invalid task or missing match_query for matching."
63
- except Exception as e:
64
- return f"Error analyzing image: {str(e)}"
65
-
66
- image_parser_tool = ImageParserTool()
 
1
+ from langchain_core.tools import tool
2
+ import easyocr
3
+ import logging
 
 
4
  import os
 
5
 
6
+ logger = logging.getLogger(__name__)
 
 
 
 
7
 
8
+ reader = easyocr.Reader(['en'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ @tool
11
+ async def image_parser_tool(file_path: str, task: str = "describe", match_query: str = "") -> str:
12
+ """Parse text from an image"""
13
+ try:
14
+ if not os.path.exists(file_path):
15
+ logger.warning(f"Image not found: {file_path}")
16
+ return "Image not found"
17
+
18
+ results = reader.readtext(file_path)
19
+ text = " ".join(result[1] for result in results)
20
+
21
+ if task == "match" and match_query:
22
+ return str(match_query.lower() in text.lower())
23
+ return text
24
+ except Exception as e:
25
+ logger.error(f"Error parsing image {file_path}: {e}")
26
+ return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/search.py CHANGED
@@ -1,68 +1,91 @@
1
- from langchain_openai import ChatOpenAI
2
  from langchain_core.tools import tool
3
- from duckduckgo_search import DDGS
 
 
 
 
4
  import os
5
- from dotenv import load_dotenv
6
 
7
- # Load environment variables
8
- load_dotenv()
9
- api_key = os.getenv("OPENAI_API_KEY")
10
- if not api_key:
11
- raise ValueError("OPENAI_API_KEY environment variable not set")
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  @tool
14
- async def web_search(query: str) -> str:
15
- """
16
- Performs a web search using DuckDuckGo and returns a string of results.
17
-
18
- Args:
19
- query (str): The search query string.
20
-
21
- Returns:
22
- str: A string containing the search results.
23
- """
24
  try:
25
- with DDGS() as ddgs:
26
- results = await ddgs.atext(keywords=query, max_results=5)
27
- return "\n".join([f"{r['title']}: {r['body']}" for r in results])
28
- except Exception as e:
29
- return f"Error performing web search: {str(e)}"
30
-
31
- search_tool = web_search
 
32
 
33
- class MultiHopSearchTool:
34
- def __init__(self):
35
- self.name = "multi_hop_search"
36
- self.description = "Performs iterative web searches to refine results for complex queries."
37
- self.inputs = {
38
- "query": {"type": "string", "description": "Initial search query"},
39
- "steps": {"type": "integer", "description": "Number of search iterations (default: 3)"}
40
- }
41
- self.output_type = str
42
- self.llm = ChatOpenAI(
43
- model="gpt-4o",
44
- api_key=api_key,
45
- temperature=0,
46
- http_client=None # Explicitly disable custom HTTP client to avoid proxies
47
- )
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- async def aparse(self, query: str, steps: int = 3) -> str:
50
- try:
51
- current_query = query
52
- results = []
53
- for _ in range(steps):
54
- search_result = await web_search.invoke({"query": current_query})
55
- results.append(search_result)
56
-
57
- # Refine query using LLM
58
- prompt = f"""Based on the query: {current_query}
59
- And the search results: {search_result}
60
- Generate a refined search query to get more precise results."""
61
- response = await self.llm.ainvoke(prompt)
62
- current_query = response.content
63
 
64
- return "\n\n".join(results)
65
- except Exception as e:
66
- return f"Error in multi-hop search: {str(e)}"
67
-
68
- multi_hop_search_tool = MultiHopSearchTool()
 
 
 
 
 
 
1
  from langchain_core.tools import tool
2
+ from langchain_huggingface import HuggingFacePipeline
3
+ from sentence_transformers import SentenceTransformer
4
+ import logging
5
+ from typing import List, Dict, Any
6
+ import requests
7
  import os
 
8
 
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Initialize embedding model (free, open-source)
12
+ try:
13
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
14
+ except Exception as e:
15
+ logger.error(f"Failed to initialize embedding model: {e}")
16
+ embedder = None
17
+
18
+ # Global LLM instance
19
+ search_llm = None
20
+
21
+ def initialize_search_tools(llm: HuggingFacePipeline) -> None:
22
+ """Initialize search tools with the provided LLM"""
23
+ global search_llm
24
+ search_llm = llm
25
+ logger.info("Search tools initialized with HuggingFace LLM")
26
 
27
  @tool
28
+ async def search_tool(query: str) -> List[Dict[str, Any]]:
29
+ """Perform a web search using the query"""
 
 
 
 
 
 
 
 
30
  try:
31
+ if not search_llm:
32
+ logger.warning("Search LLM not initialized")
33
+ return [{"content": "Search unavailable", "url": ""}]
34
+
35
+ # Refine query using LLM
36
+ prompt = f"Refine this search query for better results: {query}"
37
+ response = await search_llm.ainvoke(prompt)
38
+ refined_query = response.content.strip()
39
 
40
+ # Check for SerpAPI key (free tier available)
41
+ serpapi_key = os.getenv("SERPAPI_API_KEY")
42
+ if serpapi_key:
43
+ try:
44
+ params = {"q": refined_query, "api_key": serpapi_key}
45
+ response = requests.get("https://serpapi.com/search", params=params)
46
+ response.raise_for_status()
47
+ results = response.json().get("organic_results", [])
48
+ return [{"content": r.get("snippet", ""), "url": r.get("link", "")} for r in results]
49
+ except Exception as e:
50
+ logger.warning(f"SerpAPI failed: {e}, falling back to mock search")
51
+
52
+ # Mock search if no API key or API fails
53
+ if embedder:
54
+ query_embedding = embedder.encode(refined_query)
55
+ results = [
56
+ {"content": f"Mock result for {refined_query}", "url": "https://example.com"},
57
+ {"content": f"Another mock result for {refined_query}", "url": "https://example.org"}
58
+ ]
59
+ else:
60
+ results = [{"content": "Embedding model unavailable", "url": ""}]
61
+
62
+ logger.info(f"Search results for query '{refined_query}': {len(results)} items")
63
+ return results
64
+ except Exception as e:
65
+ logger.error(f"Error in search_tool: {e}")
66
+ return [{"content": f"Search failed: {str(e)}", "url": ""}]
67
 
68
+ @tool
69
+ async def multi_hop_search_tool(query: str, steps: int = 3) -> List[Dict[str, Any]]:
70
+ """Perform a multi-hop search by iteratively refining the query"""
71
+ try:
72
+ if not search_llm:
73
+ logger.warning("Search LLM not initialized")
74
+ return [{"content": "Multi-hop search unavailable", "url": ""}]
75
+
76
+ results = []
77
+ current_query = query
78
+ for step in range(steps):
79
+ prompt = f"Based on the query '{current_query}', generate a follow-up question to deepen the search."
80
+ response = await search_llm.ainvoke(prompt)
81
+ next_query = response.content.strip()
82
 
83
+ step_results = await search_tool.invoke({"query": next_query})
84
+ results.extend(step_results)
85
+ current_query = next_query
86
+ logger.info(f"Multi-hop step {step + 1}: {next_query}")
87
+
88
+ return results
89
+ except Exception as e:
90
+ logger.error(f"Error in multi_hop_search_tool: {e}")
91
+ return [{"content": f"Multi-hop search failed: {str(e)}", "url": ""}]