Spaces:

onisj
/

jarvis_gaia_agent

Starting

App Files Files Community

onisj commited on May 28

Commit

1bbca12

1 Parent(s): 75210f1

Add .gitignore and clean tracked files

Browse files

Files changed (12) hide show

.gitignore +74 -0
app.py +96 -0
dockerfile +24 -0
graph.py +143 -0
requirements.txt +97 -0
state.py +14 -0
tools/__init__.py +5 -0
tools/calculator.py +20 -0
tools/file_parser.py +38 -0
tools/image_parser.py +66 -0
tools/retriever.py +80 -0
tools/search.py +68 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,74 @@

+# Python virtual environments
+venv/
+venv311/
+*.venv/
+# Python cache files
+__pycache__/
+*.py[cod]
+*$py.class
+# Environment variables
+.env
+*.env
+# IDE and editor files
+.vscode/
+.idea/
+*.sublime-project
+*.sublime-workspace
+# macOS system files
+.DS_Store
+.AppleDouble
+.LSOverride
+# Jupyter Notebook checkpoints
+.ipynb_checkpoints/
+# Python package installation
+*.egg
+*.egg-info/
+dist/
+build/
+eggs/
+*.whl
+# Testing and coverage
+.coverage
+coverage.xml
+*.cover
+*.py,cover
+.tox/
+.pytest_cache/
+# Logs and temporary files
+*.log
+*.log.*
+*.tmp
+temp/
+# Dependency directories
+pip-wheel-metadata/
+.pip_cache/
+.wheels/
+# Byte-compiled / optimized / DLL files
+*.so
+*.pyd
+*.dll
+# Hugging Face Space specific
+*.ipynb
+*.parquet
+*.feather
+*.pickle
+*.pkl
+*.h5
+*.joblib
+# Miscellaneous
+*.swp
+*~
+*.bak
+*.old

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import aiohttp
+import asyncio
+from graph import graph
+from state import JARVISState
+from pydantic import BaseModel
+from typing import List
+import json
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Debug: Verify environment variables
+print(f"OPENAI_API_KEY loaded: {'set' if os.getenv('OPENAI_API_KEY') else 'not set'}")
+print(f"LANGFUSE_PUBLIC_KEY loaded: {'set' if os.getenv('LANGFUSE_PUBLIC_KEY') else 'not set'}")
+# Verify critical environment variables
+required_env_vars = ["OPENAI_API_KEY", "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
+for var in required_env_vars:
+    if not os.getenv(var):
+        raise ValueError(f"Environment variable {var} is not set")
+# Pydantic Models for Submission
+class Answer(BaseModel):
+    task_id: str
+    submitted_answer: str
+class Submission(BaseModel):
+    username: str
+    agent_code: str
+    answers: List[Answer]
+async def fetch_questions() -> List[dict]:
+    async with aiohttp.ClientSession() as session:
+        async with session.get("https://api.gaia-benchmark.com/questions") as resp:
+            return await resp.json()
+async def download_file(task_id: str, file_path: str) -> bool:
+    async with aiohttp.ClientSession() as session:
+        async with session.get(f"https://api.gaia-benchmark.com/files/{task_id}") as resp:
+            if resp.status == 200:
+                with open(file_path, "wb") as f:
+                    f.write(await resp.read())
+                return True
+            return False
+async def process_question(question: dict) -> Answer:
+    # Determine file type based on question context
+    file_type = "jpg" if "image" in question["question"].lower() else "txt"
+    if "menu" in question["question"].lower() or "report" in question["question"].lower() or "document" in question["question"].lower():
+        file_type = "pdf"  # Prioritize PDF for reports/documents
+    elif "data" in question["question"].lower():
+        file_type = "csv"
+    file_path = f"temp_{question['task_id']}.{file_type}"
+    await download_file(question["task_id"], file_path)
+    state = JARVISState(
+        task_id=question["task_id"],
+        question=question["question"],
+        tools_needed=[],
+        web_results=[],
+        file_results="",
+        image_results="",
+        calculation_results="",
+        document_results="",
+        messages=[],
+        answer=""
+    )
+    # Use unique thread_id for memory
+    result = await graph.ainvoke(state, config={"thread_id": question["task_id"]})
+    return Answer(task_id=question["task_id"], submitted_answer=result["answer"])
+async def submit_answers(answers: List[Answer], username: str, agent_code: str):
+    submission = Submission(
+        username=username,
+        agent_code=agent_code,
+        answers=answers
+    )
+    async with aiohttp.ClientSession() as session:
+        async with session.post("https://api.gaia-benchmark.com/submit", json=submission.dict()) as resp:
+            return await resp.json()
+async def main():
+    username = "onisj"  # Your Hugging Face username
+    agent_code = "https://huggingface.co/spaces/onisj/jarvis_gaia_agent/tree/main"
+    questions = await fetch_questions()
+    answers = []
+    for question in questions[:20]:  # Process 20 questions
+        answer = await process_question(question)
+        answers.append(answer)
+    result = await submit_answers(answers, username, agent_code)
+    print("Submission result:", json.dumps(result, indent=2))
+if __name__ == "__main__":
+    asyncio.run(main())

dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+FROM python:3.11-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    tesseract-ocr \
+    libtesseract-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Copy project files
+COPY requirements.txt .
+COPY app.py .
+COPY graph.py .
+COPY state.py .
+COPY tools/ tools/
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Run the application
+CMD ["python", "app.py"]

graph.py ADDED Viewed

	@@ -0,0 +1,143 @@

+from langgraph.graph import StateGraph, END
+from langgraph.checkpoint.memory import MemorySaver
+from state import JARVISState
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import SystemMessage, HumanMessage
+from tools import search_tool, multi_hop_search_tool, file_parser_tool, image_parser_tool, calculator_tool, document_retriever_tool
+from langfuse.callback import LangfuseCallbackHandler
+import json
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Debug: Verify environment variables
+print(f"OPENAI_API_KEY loaded in graph.py: {'set' if os.getenv('OPENAI_API_KEY') else 'not set'}")
+print(f"LANGFUSE_PUBLIC_KEY loaded in graph.py: {'set' if os.getenv('LANGFUSE_PUBLIC_KEY') else 'not set'}")
+# Initialize LLM and Langfuse
+api_key = os.getenv("OPENAI_API_KEY")
+if not api_key:
+    raise ValueError("OPENAI_API_KEY environment variable not set")
+llm = ChatOpenAI(model="gpt-4o", api_key=api_key)
+langfuse = LangfuseCallbackHandler(
+    public_key=os.getenv("LANGFUSE_PUBLIC_KEY"),
+    secret_key=os.getenv("LANGFUSE_SECRET_KEY"),
+    host=os.getenv("LANGFUSE_HOST")
+)
+memory = MemorySaver()
+# Question Parser Node
+async def parse_question(state: JARVISState) -> JARVISState:
+    question = state["question"]
+    prompt = f"""Analyze this GAIA question: {question}
+    Determine which tools are needed (web_search, multi_hop_search, file_parser, image_parser, calculator, document_retriever).
+    Return a JSON list of tool names."""
+    response = await llm.ainvoke(prompt, config={"callbacks": [langfuse]})
+    tools_needed = json.loads(response.content)
+    return {"messages": state["messages"] + [response], "tools_needed": tools_needed}
+# Web Search Agent Node
+async def web_search_agent(state: JARVISState) -> JARVISState:
+    results = []
+    if "web_search" in state["tools_needed"]:
+        result = await search_tool.arun(state["question"])
+        results.append(result)
+    if "multi_hop_search" in state["tools_needed"]:
+        result = await multi_hop_search_tool.aparse(state["question"], steps=3)
+        results.append(result)
+    return {"web_results": results}
+# File Parser Agent Node
+async def file_parser_agent(state: JARVISState) -> JARVISState:
+    if "file_parser" in state["tools_needed"]:
+        result = await file_parser_tool.aparse(state["task_id"])
+        return {"file_results": result}
+    return {"file_results": ""}
+# Image Parser Agent Node
+async def image_parser_agent(state: JARVISState) -> JARVISState:
+    if "image_parser" in state["tools_needed"]:
+        task = "match" if "fruits" in state["question"].lower() else "describe"
+        match_query = "fruits" if task == "match" else ""
+        result = await image_parser_tool.aparse(
+            f"temp_{state['task_id']}.jpg", task=task, match_query=match_query
+        )
+        return {"image_results": result}
+    return {"image_results": ""}
+# Calculator Agent Node
+async def calculator_agent(state: JARVISState) -> JARVISState:
+    if "calculator" in state["tools_needed"]:
+        prompt = f"Extract a mathematical expression from: {state['question']}\n{state['file_results']}"
+        response = await llm.ainvoke(prompt, config={"callbacks": [langfuse]})
+        expression = response.content
+        result = await calculator_tool.aparse(expression)
+        return {"calculation_results": result}
+    return {"calculation_results": ""}
+# Document Retriever Agent Node
+async def document_retriever_agent(state: JARVISState) -> JARVISState:
+    if "document_retriever" in state["tools_needed"]:
+        file_type = "txt" if "menu" in state["question"].lower() else "csv"
+        if "report" in state["question"].lower() or "document" in state["question"].lower():
+            file_type = "pdf"
+        result = await document_retriever_tool.aparse(
+            state["task_id"], state["question"], file_type=file_type
+        )
+        return {"document_results": result}
+    return {"document_results": ""}
+# Reasoning Agent Node
+async def reasoning_agent(state: JARVISState) -> JARVISState:
+    prompt = f"""Question: {state['question']}
+    Web Results: {state['web_results']}
+    File Results: {state['file_results']}
+    Image Results: {state['image_results']}
+    Calculation Results: {state['calculation_results']}
+    Document Results: {state['document_results']}
+    Synthesize an exact-match answer for the GAIA benchmark.
+    Output only the answer (e.g., '90', 'White;5876')."""
+    response = await llm.ainvoke(
+        [
+            SystemMessage(content="You are JARVIS, a precise assistant for the GAIA benchmark. Provide exact answers only."),
+            HumanMessage(content=prompt)
+        ],
+        config={"callbacks": [langfuse]}
+    )
+    return {"answer": response.content, "messages": state["messages"] + [response]}
+# Conditional Edge Router
+def router(state: JARVISState) -> str:
+    if state["tools_needed"]:
+        return "tools"
+    return "reasoning"
+# Build Graph
+workflow = StateGraph(JARVISState)
+workflow.add_node("parse", parse_question)
+workflow.add_node("web_search", web_search_agent)
+workflow.add_node("file_parser", file_parser_agent)
+workflow.add_node("image_parser", image_parser_agent)
+workflow.add_node("calculator", calculator_agent)
+workflow.add_node("document_retriever", document_retriever_agent)
+workflow.add_node("reasoning", reasoning_agent)
+workflow.set_entry_point("parse")
+workflow.add_conditional_edges(
+    "parse",
+    router,
+    {
+        "tools": ["web_search", "file_parser", "image_parser", "calculator", "document_retriever"],
+        "reasoning": "reasoning"
+    }
+)
+workflow.add_edge("web_search", "reasoning")
+workflow.add_edge("file_parser", "reasoning")
+workflow.add_edge("image_parser", "reasoning")
+workflow.add_edge("calculator", "reasoning")
+workflow.add_edge("document_retriever", "reasoning")
+workflow.add_edge("reasoning", END)
+graph = workflow.compile(checkpointer=memory)

requirements.txt ADDED Viewed

	@@ -0,0 +1,97 @@

+aiohappyeyeballs==2.6.1
+aiohttp==3.12.2
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+attrs==25.3.0
+backoff==2.2.1
+certifi==2025.4.26
+charset-normalizer==3.4.2
+click==8.2.1
+dataclasses-json==0.6.7
+distro==1.9.0
+duckduckgo_search==8.0.2
+filelock==3.18.0
+frozenlist==1.6.0
+fsspec==2025.5.1
+greenlet==3.2.2
+h11==0.16.0
+hf-xet==1.1.2
+httpcore==1.0.9
+httpx==0.28.1
+httpx-sse==0.4.0
+huggingface-hub==0.24.5
+idna==3.10
+Jinja2==3.1.6
+jiter==0.10.0
+joblib==1.5.1
+jsonpatch==1.33
+jsonpointer==3.0.0
+langchain==0.3.25
+langchain-community==0.3.24
+langchain-core==0.3.62
+langchain-openai==0.2.0
+langchain-text-splitters==0.3.8
+langfuse==2.44.0
+langgraph==0.4.7
+langgraph-checkpoint==2.0.26
+langgraph-prebuilt==0.2.1
+langgraph-sdk==0.1.70
+langsmith==0.1.147
+lxml==5.4.0
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+marshmallow==3.26.1
+mdurl==0.1.2
+mpmath==1.3.0
+msgpack==1.1.0
+multidict==6.4.4
+mypy_extensions==1.1.0
+networkx==3.4.2
+numpy==1.26.4
+openai==1.40.0
+orjson==3.10.18
+ormsgpack==1.10.0
+packaging==23.2
+pandas==2.2.3
+pillow==11.0.0
+primp==0.15.0
+propcache==0.3.1
+pydantic==2.8.2
+pydantic-settings==2.9.1
+pydantic_core==2.20.1
+Pygments==2.19.1
+PyPDF2==3.0.1
+pytesseract==0.3.10
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+pytz==2025.2
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+requests-toolbelt==1.0.0
+rich==14.0.0
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.15.3
+sentence-transformers==3.0.1
+six==1.17.0
+smolagents==1.17.0
+sniffio==1.3.1
+SQLAlchemy==2.0.41
+sympy==1.14.0
+tenacity==8.5.0
+threadpoolctl==3.6.0
+tiktoken==0.9.0
+tokenizers==0.19.1
+torch==2.2.2
+tqdm==4.67.1
+transformers==4.42.4
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+typing_extensions==4.13.2
+tzdata==2025.2
+urllib3==2.4.0
+wrapt==1.17.2
+xxhash==3.5.0
+yarl==1.20.0

state.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from typing import TypedDict, List
+from langchain_core.messages import AnyMessage
+class JARVISState(TypedDict):
+    task_id: str
+    question: str
+    tools_needed: List[str]
+    web_results: List[str]
+    file_results: str
+    image_results: str
+    calculation_results: str
+    document_results: str
+    messages: List[AnyMessage]
+    answer: str

tools/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .search import search_tool, multi_hop_search_tool
+from .file_parser import file_parser_tool
+from .image_parser import image_parser_tool
+from .calculator import calculator_tool
+from .retriever import document_retriever_tool

tools/calculator.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import ast
+from typing import Dict
+class CalculatorTool:
+    def __init__(self):
+        self.name = "calculator"
+        self.description = "Evaluates mathematical expressions."
+        self.inputs = {
+            "expression": {"type": "string", "description": "Mathematical expression to evaluate"}
+        }
+        self.output_type = str
+    async def aparse(self, expression: str) -> str:
+        try:
+            result = eval(expression, {"__builtins__": {}}, {"abs": abs, "round": round})
+            return str(result)
+        except Exception as e:
+            return f"Error calculating expression: {str(e)}"
+calculator_tool = CalculatorTool()

tools/file_parser.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import pandas as pd
+import requests
+import os
+class FileParserTool:
+    def __init__(self):
+        self.name = "file_parser"
+        self.description = "Downloads and parses CSV or text files for GAIA tasks."
+        self.inputs = {
+            "task_id": {"type": "string", "description": "GAIA task ID"},
+            "file_type": {"type": "string", "description": "File type (csv, txt, default: csv)"}
+        }
+        self.output_type = str
+    async def aparse(self, task_id: str, file_type: str = "csv") -> str:
+        try:
+            url = f"https://api.gaia-benchmark.com/files/{task_id}"
+            response = await requests.get(url)
+            if response.status_code == 200:
+                file_path = f"temp_{task_id}.{file_type}"
+                with open(file_path, "wb") as f:
+                    f.write(response.content)
+                if file_type == "csv":
+                    df = pd.read_csv(file_path)
+                    return df.to_string()
+                elif file_type == "txt":
+                    with open(file_path, "r") as f:
+                        return f.read()
+                else:
+                    return f"Unsupported file type: {file_type}"
+            return f"Error downloading file for task ID {task_id}"
+        except Exception as e:
+            return f"Error: {str(e)}"
+        finally:
+            if os.path.exists(file_path):
+                os.remove(file_path)
+file_parser_tool = FileParserTool()

tools/image_parser.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from langchain_openai import ChatOpenAI
+from sentence_transformers import SentenceTransformer, util
+import pytesseract
+from PIL import Image
+import base64
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Debug: Verify OPENAI_API_KEY
+if not os.getenv("OPENAI_API_KEY"):
+    print("Error: OPENAI_API_KEY not loaded in image_parser.py")
+class ImageParserTool:
+    def __init__(self):
+        self.name = "image_parser"
+        self.description = "Analyzes images to extract text, identify objects, or match descriptions."
+        self.inputs = {
+            "image_path": {"type": "string", "description": "Path to image file"},
+            "task": {"type": "string", "description": "Task type (ocr, describe, match)"},
+            "match_query": {"type": "string", "description": "Query for semantic matching (optional)"}
+        }
+        self.output_type = str
+        api_key = os.getenv("OPENAI_API_KEY")
+        if not api_key:
+            raise ValueError("OPENAI_API_KEY environment variable not set")
+        self.vlm = ChatOpenAI(model="gpt-4o", api_key=api_key)
+        self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
+    async def aparse(self, image_path: str, task: str = "describe", match_query: str = "") -> str:
+        try:
+            # Read image
+            with open(image_path, "rb") as f:
+                image_data = base64.b64encode(f.read()).decode()
+            img = Image.open(image_path)
+            if task == "ocr":
+                # Extract text with Tesseract
+                text = pytesseract.image_to_string(img)
+                return text if text.strip() else "No text found in image."
+            elif task == "describe":
+                # Describe image with VLM
+                response = await self.vlm.ainvoke([
+                    {"type": "image_url", "image_url": f"data:image/jpeg;base64,{image_data}"},
+                    {"type": "text", "text": "Describe objects in the image in detail."}
+                ])
+                return response.content
+            elif task == "match" and match_query:
+                # Semantic matching with sentence-transformers
+                description = await self.vlm.ainvoke([
+                    {"type": "image_url", "image_url": f"data:image/jpeg;base64,{image_data}"},
+                    {"type": "text", "text": "List objects in the image."}
+                ])
+                objects = description.content.split(", ")
+                query_embedding = self.embedder.encode(match_query, convert_to_tensor=True)
+                object_embeddings = self.embedder.encode(objects, convert_to_tensor=True)
+                similarities = util.cos_sim(query_embedding, object_embeddings)[0]
+                best_match = objects[similarities.argmax()]
+                return f"Best match for '{match_query}': {best_match}"
+            else:
+                return "Invalid task or missing match_query for matching."
+        except Exception as e:
+            return f"Error analyzing image: {str(e)}"
+image_parser_tool = ImageParserTool()

tools/retriever.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from sentence_transformers import SentenceTransformer
+import numpy as np
+import pandas as pd
+import PyPDF2
+import os
+from typing import List, Dict
+class DocumentRetrieverTool:
+    def __init__(self):
+        self.name = "document_retriever"
+        self.description = "Retrieves relevant text from GAIA text-heavy files (CSV, TXT, PDF) using semantic search."
+        self.inputs = {
+            "task_id": {"type": "string", "description": "GAIA task ID for the file"},
+            "query": {"type": "string", "description": "Question or query to search for"},
+            "file_type": {"type": "string", "description": "File type (csv, txt, pdf, default: txt)"}
+        }
+        self.output_type = str
+        self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=500,
+            chunk_overlap=50,
+            length_function=len
+        )
+        self.chunks: List[str] = []
+        self.embeddings: np.ndarray = None
+    async def aparse(self, task_id: str, query: str, file_type: str = "txt") -> str:
+        """
+        Loads a GAIA file, splits it into chunks, embeds them, and retrieves relevant text for the query.
+        Supports CSV, TXT, and PDF files.
+        """
+        try:
+            file_path = f"temp_{task_id}.{file_type}"
+            if not os.path.exists(file_path):
+                return f"File not found for task ID {task_id}"
+            # Load and preprocess file
+            text = ""
+            if file_type == "csv":
+                df = pd.read_csv(file_path)
+                text = df.to_string()
+            elif file_type == "txt":
+                with open(file_path, "r", encoding="utf-8") as f:
+                    text = f.read()
+            elif file_type == "pdf":
+                with open(file_path, "rb") as f:
+                    pdf = PyPDF2.PdfReader(f)
+                    text = "".join(page.extract_text() or "" for page in pdf.pages)
+            else:
+                return f"Unsupported file type: {file_type}"
+            # Check if text was extracted
+            if not text.strip():
+                return "No extractable text found in file."
+            # Split text into chunks
+            self.chunks = self.text_splitter.split_text(text)
+            if not self.chunks:
+                return "No content found in file."
+            # Embed chunks and query
+            self.embeddings = self.embedder.encode(self.chunks, convert_to_tensor=True)
+            query_embedding = self.embedder.encode(query, convert_to_tensor=True)
+            # Compute cosine similarities
+            from sentence_transformers import util
+            similarities = util.cos_sim(query_embedding, self.embeddings)[0]
+            # Get top 3 most relevant chunks
+            top_k = min(3, len(self.chunks))
+            top_indices = similarities.argsort(descending=True)[:top_k]
+            relevant_chunks = [self.chunks[idx] for idx in top_indices]
+            # Combine results
+            return "\n\n".join(relevant_chunks)
+        except Exception as e:
+            return f"Error retrieving documents: {str(e)}"
+document_retriever_tool = DocumentRetrieverTool()

tools/search.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from langchain_openai import ChatOpenAI
+from langchain_core.tools import tool
+from duckduckgo_search import DDGS
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+api_key = os.getenv("OPENAI_API_KEY")
+if not api_key:
+    raise ValueError("OPENAI_API_KEY environment variable not set")
+@tool
+async def web_search(query: str) -> str:
+    """
+    Performs a web search using DuckDuckGo and returns a string of results.
+    Args:
+        query (str): The search query string.
+    Returns:
+        str: A string containing the search results.
+    """
+    try:
+        with DDGS() as ddgs:
+            results = await ddgs.atext(keywords=query, max_results=5)
+            return "\n".join([f"{r['title']}: {r['body']}" for r in results])
+    except Exception as e:
+        return f"Error performing web search: {str(e)}"
+search_tool = web_search
+class MultiHopSearchTool:
+    def __init__(self):
+        self.name = "multi_hop_search"
+        self.description = "Performs iterative web searches to refine results for complex queries."
+        self.inputs = {
+            "query": {"type": "string", "description": "Initial search query"},
+            "steps": {"type": "integer", "description": "Number of search iterations (default: 3)"}
+        }
+        self.output_type = str
+        self.llm = ChatOpenAI(
+            model="gpt-4o",
+            api_key=api_key,
+            temperature=0,
+            http_client=None  # Explicitly disable custom HTTP client to avoid proxies
+        )
+    async def aparse(self, query: str, steps: int = 3) -> str:
+        try:
+            current_query = query
+            results = []
+            for _ in range(steps):
+                search_result = await web_search.invoke({"query": current_query})
+                results.append(search_result)
+                # Refine query using LLM
+                prompt = f"""Based on the query: {current_query}
+                And the search results: {search_result}
+                Generate a refined search query to get more precise results."""
+                response = await self.llm.ainvoke(prompt)
+                current_query = response.content
+            return "\n\n".join(results)
+        except Exception as e:
+            return f"Error in multi-hop search: {str(e)}"
+multi_hop_search_tool = MultiHopSearchTool()