Spaces:

onisj
/

jarvis_gaia_agent

Starting

App Files Files Community

onisj commited on 15 days ago

Commit

488dc3e

1 Parent(s): 76b50c7

Use free tools only, remove OpenAI dependency

Browse files

Files changed (8) hide show

app.py +462 -80
requirements.txt +61 -69
tools/__init__.py +1 -1
tools/calculator.py +13 -18
tools/document_retriever.py +30 -0
tools/file_parser.py +29 -34
tools/image_parser.py +22 -62
tools/search.py +82 -59

app.py CHANGED Viewed

@@ -1,96 +1,478 @@
 import aiohttp
 import asyncio
-from graph import graph
-from state import JARVISState
-from pydantic import BaseModel
-from typing import List
 import json
-import os
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
-# Debug: Verify environment variables
-print(f"OPENAI_API_KEY loaded: {'set' if os.getenv('OPENAI_API_KEY') else 'not set'}")
-print(f"LANGFUSE_PUBLIC_KEY loaded: {'set' if os.getenv('LANGFUSE_PUBLIC_KEY') else 'not set'}")
-# Verify critical environment variables
-required_env_vars = ["OPENAI_API_KEY", "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
 for var in required_env_vars:
     if not os.getenv(var):
         raise ValueError(f"Environment variable {var} is not set")
-# Pydantic Models for Submission
-class Answer(BaseModel):
-    task_id: str
-    submitted_answer: str
-class Submission(BaseModel):
-    username: str
-    agent_code: str
-    answers: List[Answer]
-async def fetch_questions() -> List[dict]:
-    async with aiohttp.ClientSession() as session:
-        async with session.get("https://api.gaia-benchmark.com/questions") as resp:
-            return await resp.json()
-async def download_file(task_id: str, file_path: str) -> bool:
-    async with aiohttp.ClientSession() as session:
-        async with session.get(f"https://api.gaia-benchmark.com/files/{task_id}") as resp:
-            if resp.status == 200:
-                with open(file_path, "wb") as f:
-                    f.write(await resp.read())
-                return True
-            return False
-async def process_question(question: dict) -> Answer:
-    # Determine file type based on question context
-    file_type = "jpg" if "image" in question["question"].lower() else "txt"
-    if "menu" in question["question"].lower() or "report" in question["question"].lower() or "document" in question["question"].lower():
-        file_type = "pdf"  # Prioritize PDF for reports/documents
-    elif "data" in question["question"].lower():
-        file_type = "csv"
-    file_path = f"temp_{question['task_id']}.{file_type}"
-    await download_file(question["task_id"], file_path)
-    state = JARVISState(
-        task_id=question["task_id"],
-        question=question["question"],
-        tools_needed=[],
-        web_results=[],
-        file_results="",
-        image_results="",
-        calculation_results="",
-        document_results="",
-        messages=[],
-        answer=""
     )
-    # Use unique thread_id for memory
-    result = await graph.ainvoke(state, config={"thread_id": question["task_id"]})
-    return Answer(task_id=question["task_id"], submitted_answer=result["answer"])
-async def submit_answers(answers: List[Answer], username: str, agent_code: str):
-    submission = Submission(
-        username=username,
-        agent_code=agent_code,
-        answers=answers
     )
-    async with aiohttp.ClientSession() as session:
-        async with session.post("https://api.gaia-benchmark.com/submit", json=submission.dict()) as resp:
-            return await resp.json()
-async def main():
-    username = "onisj"  # Your Hugging Face username
-    agent_code = "https://huggingface.co/spaces/onisj/jarvis_gaia_agent/tree/main"
-    questions = await fetch_questions()
-    answers = []
-    for question in questions[:20]:  # Process 20 questions
-        answer = await process_question(question)
-        answers.append(answer)
-    result = await submit_answers(answers, username, agent_code)
-    print("Submission result:", json.dumps(result, indent=2))
 if __name__ == "__main__":
-    asyncio.run(main())

+import os
+import gradio as gr
+import requests
 import aiohttp
 import asyncio
 import json
+import nest_asyncio
+from langgraph.graph import StateGraph, END
+from langgraph.checkpoint.memory import MemorySaver
+from langchain_huggingface import HuggingFacePipeline
+from transformers import pipeline
+from langchain_core.messages import SystemMessage, HumanMessage
+from tools import search_tool, multi_hop_search_tool, file_parser_tool, image_parser_tool, calculator_tool, document_retriever_tool
+from tools.search import initialize_search_tools
+from state import JARVISState
+import pandas as pd
 from dotenv import load_dotenv
+import logging
+from langfuse.callback import CallbackHandler
+# Set up logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
+# Apply nest_asyncio
+nest_asyncio.apply()
 # Load environment variables
 load_dotenv()
+# Verify environment variables
+required_env_vars = ["SPACE_ID", "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
 for var in required_env_vars:
     if not os.getenv(var):
         raise ValueError(f"Environment variable {var} is not set")
+logger.info(f"Environment variables loaded: SPACE_ID={os.getenv('SPACE_ID')[:10]}..., LANGFUSE_HOST={os.getenv('LANGFUSE_HOST', 'https://cloud.langfuse.com')}")
+# Initialize Hugging Face model
+try:
+    hf_pipeline = pipeline(
+        "text-generation",
+        model="mistralai/Mixtral-7B-Instruct-v0.1",
+        device_map="auto",
+        max_new_tokens=512,
+        do_sample=True,
+        temperature=0.7
     )
+    llm = HuggingFacePipeline(pipeline=hf_pipeline)
+    logger.info("HuggingFace model initialized: mistralai/Mixtral-7B-Instruct-v0.1")
+except Exception as e:
+    logger.error(f"Failed to initialize HuggingFace model: {e}")
+    llm = None
+# Initialize search tools with LLM
+try:
+    initialize_search_tools(llm)
+    logger.info("Search tools initialized")
+except Exception as e:
+    logger.error(f"Failed to initialize search tools: {e}")
+# Initialize Langfuse
+try:
+    langfuse = CallbackHandler(
+        public_key=os.getenv("LANGFUSE_PUBLIC_KEY"),
+        secret_key=os.getenv("LANGFUSE_SECRET_KEY"),
+        host=os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
+    )
+    logger.info("Langfuse initialized successfully")
+except Exception as e:
+    logger.warning(f"Failed to initialize Langfuse: {e}")
+    langfuse = None
+# Initialize MemorySaver
+memory = MemorySaver()
+use_checkpointing = True
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space/api"
+GAIA_FILE_URL = "https://api.gaia-benchmark.com/files/"
+# --- Helper Functions ---
+def log_state(task_id: str, state: JARVISState):
+    """Log intermediate state to state_log.json"""
+    try:
+        log_entry = {
+            "task_id": task_id,
+            "question": state["question"],
+            "tools_needed": state["tools_needed"],
+            "web_results": state["web_results"],
+            "file_results": state["file_results"],
+            "image_results": state["image_results"],
+            "calculation_results": state["calculation_results"],
+            "document_results": state["document_results"],
+            "answer": state["answer"]
+        }
+        with open("state_log.json", "a") as f:
+            json.dump(log_entry, f, indent=2)
+            f.write("\n")
+    except Exception as e:
+        logger.error(f"Error logging state for task {task_id}: {e}")
+async def test_gaia_api(task_id: str) -> bool:
+    """Test connectivity to GAIA file API"""
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.head(f"{GAIA_FILE_URL}{task_id}", timeout=5) as resp:
+                return resp.status in [200, 403, 404]
+    except Exception as e:
+        logger.warning(f"GAIA API test failed: {e}")
+        return False
+# --- Node Functions ---
+async def parse_question(state: JARVISState) -> JARVISState:
+    try:
+        question = state["question"]
+        prompt = f"""Analyze this GAIA question: {question}
+        Determine which tools are needed (web_search, multi_hop_search, file_parser, image_parser, calculator, document_retriever).
+        Return a JSON list of tool names."""
+        if llm:
+            response = await llm.ainvoke(prompt, config={"callbacks": [langfuse] if langfuse else []})
+            try:
+                tools_needed = json.loads(response.content)
+            except json.JSONDecodeError as je:
+                logger.warning(f"Invalid JSON in LLM response for task {state['task_id']}: {je}")
+                tools_needed = ["web_search"]
+        else:
+            logger.warning("No LLM available, using default tools")
+            tools_needed = ["web_search"]
+        state["tools_needed"] = tools_needed
+        log_state(state["task_id"], state)
+        return state
+    except Exception as e:
+        logger.error(f"Error parsing question for task {state['task_id']}: {e}")
+        state["tools_needed"] = []
+        log_state(state["task_id"], state)
+        return state
+async def tool_dispatcher(state: JARVISState) -> JARVISState:
+    try:
+        tools_needed = state["tools_needed"]
+        updated_state = state.copy()
+        can_download_files = await test_gaia_api(updated_state["task_id"])
+        for tool in tools_needed:
+            try:
+                if tool == "web_search" or tool == "multi_hop_search":
+                    result = await web_search_agent(updated_state)
+                    updated_state["web_results"].extend(result["web_results"])
+                elif tool == "file_parser" and can_download_files:
+                    result = await file_parser_agent(updated_state)
+                    updated_state["file_results"] = result["file_results"]
+                elif tool == "image_parser" and can_download_files:
+                    result = await image_parser_agent(updated_state)
+                    updated_state["image_results"] = result["image_results"]
+                elif tool == "calculator":
+                    result = await calculator_agent(updated_state)
+                    updated_state["calculation_results"] = result["calculation_results"]
+                elif tool == "document_retriever" and can_download_files:
+                    result = await document_retriever_agent(updated_state)
+                    updated_state["document_results"] = result["document_results"]
+            except Exception as e:
+                logger.warning(f"Error in tool {tool} for task {updated_state['task_id']}: {e}")
+        log_state(updated_state["task_id"], updated_state)
+        return updated_state
+    except Exception as e:
+        logger.error(f"Error in tool dispatcher for task {state['task_id']}: {e}")
+        log_state(state["task_id"], state)
+        return state
+async def web_search_agent(state: JARVISState) -> JARVISState:
+    try:
+        results = []
+        if "web_search" in state["tools_needed"]:
+            result = await search_tool.invoke({"query": state["question"]})
+            results.append(result)
+        if "multi_hop_search" in state["tools_needed"]:
+            result = await multi_hop_search_tool.invoke({"query": state["question"], "steps": 3})
+            results.append(result)
+        return {"web_results": results}
+    except Exception as e:
+        logger.error(f"Error in web search for task {state['task_id']}: {e}")
+        return {"web_results": []}
+async def file_parser_agent(state: JARVISState) -> JARVISState:
+    try:
+        if "file_parser" in state["tools_needed"]:
+            file_type = "csv" if "data" in state["question"].lower() else "txt"
+            result = await file_parser_tool.aparse(state["task_id"], file_type=file_type)
+            return {"file_results": result}
+        return {"file_results": ""}
+    except Exception as e:
+        logger.error(f"Error in file parser for task {state['task_id']}: {e}")
+        return {"file_results": "File parsing failed"}
+async def image_parser_agent(state: JARVISState) -> JARVISState:
+    try:
+        if "image_parser" in state["tools_needed"]:
+            task = "match" if "fruits" in state["question"].lower() else "describe"
+            match_query = "fruits" if task == "match" else ""
+            file_path = f"temp_{state['task_id']}.jpg"
+            if not os.path.exists(file_path):
+                logger.warning(f"Image file not found for task {state['task_id']}")
+                return {"image_results": "Image file not found"}
+            result = await image_parser_tool.aparse(
+                file_path, task=task, match_query=match_query
+            )
+            return {"image_results": result}
+        return {"image_results": ""}
+    except Exception as e:
+        logger.error(f"Error in image parser for task {state['task_id']}: {e}")
+        return {"image_results": "Image parsing failed"}
+async def calculator_agent(state: JARVISState) -> JARVISState:
+    try:
+        if "calculator" in state["tools_needed"]:
+            prompt = f"Extract a mathematical expression from: {state['question']}\n{state['file_results']}"
+            if llm:
+                response = await llm.ainvoke(prompt, config={"callbacks": [langfuse] if langfuse else []})
+                expression = response.content
+            else:
+                expression = "0"
+            result = await calculator_tool.aparse(expression)
+            return {"calculation_results": result}
+        return {"calculation_results": ""}
+    except Exception as e:
+        logger.error(f"Error in calculator for task {state['task_id']}: {e}")
+        return {"calculation_results": "Calculation failed"}
+async def document_retriever_agent(state: JARVISState) -> JARVISState:
+    try:
+        if "document_retriever" in state["tools_needed"]:
+            file_type = "txt" if "menu" in state["question"].lower() else "csv"
+            if "report" in state["question"].lower() or "document" in state["question"].lower():
+                file_type = "pdf"
+            result = await document_retriever_tool.aparse(
+                state["task_id"], state["question"], file_type=file_type
+            )
+            return {"document_results": result}
+        return {"document_results": ""}
+    except Exception as e:
+        logger.error(f"Error in document retriever for task {state['task_id']}: {e}")
+        return {"document_results": "Document retrieval failed"}
+async def reasoning_agent(state: JARVISState) -> JARVISState:
+    try:
+        prompt = f"""Question: {state['question']}
+        Web Results: {state['web_results']}
+        File Results: {state['file_results']}
+        Image Results: {state['image_results']}
+        Calculation Results: {state['calculation_results']}
+        Document Results: {state['document_results']}
+        Synthesize an exact-match answer for the GAIA benchmark.
+        Output only the answer (e.g., '90', 'White;5876')."""
+        if llm:
+            response = await llm.ainvoke(
+                [
+                    SystemMessage(content="You are JARVIS, a precise assistant for the GAIA benchmark. Provide exact answers only."),
+                    HumanMessage(content=prompt)
+                ],
+                config={"callbacks": [langfuse] if langfuse else []}
+            )
+            answer = response.content.strip()
+        else:
+            answer = "Unknown"
+        state["answer"] = answer
+        log_state(state["task_id"], state)
+        return state
+    except Exception as e:
+        logger.error(f"Error in reasoning for task {state['task_id']}: {e}")
+        state["answer"] = "Error in reasoning"
+        log_state(state["task_id"], state)
+        return state
+def router(state: JARVISState) -> str:
+    if state["tools_needed"]:
+        return "tool_dispatcher"
+    return "reasoning"
+# --- Define StateGraph ---
+workflow = StateGraph(JARVISState)
+workflow.add_node("parse", parse_question)
+workflow.add_node("tool_dispatcher", tool_dispatcher)
+workflow.add_node("reasoning", reasoning_agent)
+workflow.set_entry_point("parse")
+workflow.add_conditional_edges(
+    "parse",
+    router,
+    {
+        "tool_dispatcher": "tool_dispatcher",
+        "reasoning": "reasoning"
+    }
+)
+workflow.add_edge("tool_dispatcher", "reasoning")
+workflow.add_edge("reasoning", END)
+# Compile graph
+graph = workflow.compile(checkpointer=memory if use_checkpointing else None)
+# --- Basic Agent Definition ---
+class BasicAgent:
+    def __init__(self):
+        logger.info("BasicAgent initialized.")
+    async def process_question(self, task_id: str, question: str) -> str:
+        file_type = "jpg" if "image" in question.lower() else "txt"
+        if "menu" in question.lower() or "report" in question.lower() or "document" in question.lower():
+            file_type = "pdf"
+        elif "data" in question.lower():
+            file_type = "csv"
+        file_path = f"temp_{task_id}.{file_type}"
+        if await test_gaia_api(task_id):
+            try:
+                async with aiohttp.ClientSession() as session:
+                    async with session.get(f"{GAIA_FILE_URL}{task_id}") as resp:
+                        if resp.status == 200:
+                            with open(file_path, "wb") as f:
+                                f.write(await resp.read())
+                        else:
+                            logger.warning(f"Failed to download file for task {task_id}: HTTP {resp.status}")
+            except Exception as e:
+                logger.error(f"Error downloading file for task {task_id}: {e}")
+        state = JARVISState(
+            task_id=task_id,
+            question=question,
+            tools_needed=[],
+            web_results=[],
+            file_results="",
+            image_results="",
+            calculation_results="",
+            document_results="",
+            messages=[],
+            answer=""
+        )
+        try:
+            config = {"configurable": {"thread_id": task_id}} if use_checkpointing else {}
+            result = await graph.ainvoke(state, config=config)
+            return result["answer"] or "No answer generated"
+        except Exception as e:
+            logger.error(f"Error processing task {task_id}: {e}")
+            return f"Error: {str(e)}"
+        finally:
+            if os.path.exists(file_path):
+                try:
+                    os.remove(file_path)
+                except Exception as e:
+                    logger.error(f"Error removing file {file_path}: {e}")
+    async def async_call(self, question: str, task_id: str) -> str:
+        return await self.process_question(task_id, question)
+    def __call__(self, question: str, task_id: str = None) -> str:
+        logger.info(f"Agent received question (first 50 chars): {question[:50]}...")
+        if task_id is None:
+            logger.warning("task_id not provided, using placeholder")
+            task_id = "placeholder_task_id"
+        try:
+            try:
+                loop = asyncio.get_event_loop()
+            except RuntimeError:
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+            return loop.run_until_complete(self.async_call(question, task_id))
+        finally:
+            pass
+# --- Main Function ---
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    space_id = os.getenv("SPACE_ID")
+    if not profile:
+        logger.error("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    username = f"{profile.username}"
+    logger.info(f"User logged in: {username}")
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    try:
+        agent = BasicAgent()
+    except Exception as e:
+        logger.error(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    logger.info(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+            logger.error("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
+        logger.info(f"Fetched {len(questions_data)} questions.")
+    except Exception as e:
+        logger.error(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    results_log = []
+    answers_payload = []
+    logger.info(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            logger.warning(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            submitted_answer = agent(question_text, task_id)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+        except Exception as e:
+            logger.error(f"Error running agent on task {task_id}: {e}")
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+    if not answers_payload:
+        logger.error("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    logger.info(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=120)
+        response.raise_for_status()
+        result_data = response.json()
+        logger.info(f"Server response: {result_data}")
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except Exception as e:
+        logger.error(f"Submission failed: {e}")
+        results_df = pd.DataFrame(results_log)
+        return f"Submission Failed: {e}", results_df
+# --- Build Gradio Interface ---
+with gr.Blocks() as demo:
+    gr.Markdown("# JARVIS Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1. Log in to your Hugging Face account using the button below.
+        2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the JARVIS agent, and submit answers.
+        ---
+        **Disclaimers:**
+        The agent uses a local Hugging Face model (Mixtral-7B) and async tools for the GAIA benchmark.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
+    logger.info("\n" + "-"*30 + " App Starting " + "-"*30)
+    space_id = os.getenv("SPACE_ID")
+    logger.info(f"SPACE_ID: {space_id}")
+    logger.info("Launching Gradio Interface...")
+    demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,97 +1,89 @@
-aiohappyeyeballs==2.6.1
-aiohttp==3.12.2
-aiosignal==1.3.2
 annotated-types==0.7.0
-anyio==4.9.0
-attrs==25.3.0
 backoff==2.2.1
-certifi==2025.4.26
-charset-normalizer==3.4.2
-click==8.2.1
 dataclasses-json==0.6.7
 distro==1.9.0
-duckduckgo_search==8.0.2
-filelock==3.18.0
-frozenlist==1.6.0
-fsspec==2025.5.1
-greenlet==3.2.2
-h11==0.16.0
-hf-xet==1.1.2
-httpcore==1.0.9
-httpx==0.28.1
 httpx-sse==0.4.0
-huggingface-hub==0.24.5
-idna==3.10
-Jinja2==3.1.6
-jiter==0.10.0
-joblib==1.5.1
 jsonpatch==1.33
 jsonpointer==3.0.0
-langchain==0.3.25
-langchain-community==0.3.24
-langchain-core==0.3.62
-langchain-openai==0.2.0
-langchain-text-splitters==0.3.8
-langfuse==2.44.0
-langgraph==0.4.7
-langgraph-checkpoint==2.0.26
-langgraph-prebuilt==0.2.1
-langgraph-sdk==0.1.70
-langsmith==0.1.147
-lxml==5.4.0
 markdown-it-py==3.0.0
-MarkupSafe==3.0.2
-marshmallow==3.26.1
 mdurl==0.1.2
 mpmath==1.3.0
-msgpack==1.1.0
-multidict==6.4.4
-mypy_extensions==1.1.0
-networkx==3.4.2
 numpy==1.26.4
-openai==1.40.0
-orjson==3.10.18
-ormsgpack==1.10.0
 packaging==23.2
-pandas==2.2.3
-pillow==11.0.0
 primp==0.15.0
-propcache==0.3.1
 pydantic==2.8.2
-pydantic-settings==2.9.1
 pydantic_core==2.20.1
-Pygments==2.19.1
 PyPDF2==3.0.1
 pytesseract==0.3.10
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
-pytz==2025.2
-PyYAML==6.0.2
-regex==2024.11.6
 requests==2.32.3
 requests-toolbelt==1.0.0
-rich==14.0.0
-safetensors==0.5.3
-scikit-learn==1.6.1
-scipy==1.15.3
 sentence-transformers==3.0.1
-six==1.17.0
-smolagents==1.17.0
 sniffio==1.3.1
-SQLAlchemy==2.0.41
-sympy==1.14.0
 tenacity==8.5.0
-threadpoolctl==3.6.0
-tiktoken==0.9.0
 tokenizers==0.19.1
 torch==2.2.2
-tqdm==4.67.1
 transformers==4.42.4
 typing-inspect==0.9.0
-typing-inspection==0.4.1
-typing_extensions==4.13.2
-tzdata==2025.2
-urllib3==2.4.0
-wrapt==1.17.2
-xxhash==3.5.0
-yarl==1.20.0

+aiohttp==3.8.6
+aiosignal==1.3.1
 annotated-types==0.7.0
+anyio==4.4.0
+attrs==23.2.0
 backoff==2.2.1
+certifi==2024.7.4
+charset-normalizer==3.3.2
+click==8.1.7
 dataclasses-json==0.6.7
 distro==1.9.0
+duckduckgo_search==6.2.4
+filelock==3.15.4
+frozenlist==1.4.1
+fsspec==2024.6.1
+greenlet==3.0.3
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.0
 httpx-sse==0.4.0
+huggingface-hub==0.23.4
+idna==3.7
+Jinja2==3.1.4
+jiter==0.5.0
+joblib==1.4.2
 jsonpatch==1.33
 jsonpointer==3.0.0
+langchain==0.2.11
+langchain-community==0.2.10
+langchain-core==0.2.23
+langchain-openai==0.1.17
+langchain-text-splitters==0.2.2
+langfuse==2.36.1
+langgraph==0.1.15
+langgraph-checkpoint==1.0.2
+langsmith==0.1.93
+lxml==5.2.2
 markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.21.3
 mdurl==0.1.2
 mpmath==1.3.0
+msgpack==1.0.8
+multidict==6.0.5
+mypy_extensions==1.0.0
+networkx==3.3
 numpy==1.26.4
+openai==1.35.13
+orjson==3.10.6
 packaging==23.2
+pandas==2.2.2
+pillow==10.4.0
 primp==0.15.0
 pydantic==2.8.2
 pydantic_core==2.20.1
+Pygments==2.18.0
 PyPDF2==3.0.1
 pytesseract==0.3.10
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
+pytz==2024.1
+PyYAML==6.0.1
+regex==2024.7.24
 requests==2.32.3
 requests-toolbelt==1.0.0
+rich==13.7.1
+safetensors==0.4.3
+scikit-learn==1.5.1
+scipy==1.14.0
 sentence-transformers==3.0.1
+six==1.16.0
 sniffio==1.3.1
+SQLAlchemy==2.0.31
+sympy==1.13.1
 tenacity==8.5.0
+threadpoolctl==3.5.0
+tiktoken==0.7.0
 tokenizers==0.19.1
 torch==2.2.2
+tqdm==4.66.4
 transformers==4.42.4
 typing-inspect==0.9.0
+typing_extensions==4.12.2
+tzdata==2024.1
+urllib3==2.2.2
+wrapt==1.16.0
+xxhash==3.4.1
+yarl==1.9.4
+gradio[oauth]==4.44.1

tools/__init__.py CHANGED Viewed

@@ -2,4 +2,4 @@ from .search import search_tool, multi_hop_search_tool
 from .file_parser import file_parser_tool
 from .image_parser import image_parser_tool
 from .calculator import calculator_tool
-from .retriever import document_retriever_tool

 from .file_parser import file_parser_tool
 from .image_parser import image_parser_tool
 from .calculator import calculator_tool
+from .document_retriever import document_retriever_tool

tools/calculator.py CHANGED Viewed

@@ -1,20 +1,15 @@
-import ast
-from typing import Dict
-class CalculatorTool:
-    def __init__(self):
-        self.name = "calculator"
-        self.description = "Evaluates mathematical expressions."
-        self.inputs = {
-            "expression": {"type": "string", "description": "Mathematical expression to evaluate"}
-        }
-        self.output_type = str
-    async def aparse(self, expression: str) -> str:
-        try:
-            result = eval(expression, {"__builtins__": {}}, {"abs": abs, "round": round})
-            return str(result)
-        except Exception as e:
-            return f"Error calculating expression: {str(e)}"
-calculator_tool = CalculatorTool()

+from langchain_core.tools import tool
+from sympy import sympify
+import logging
+logger = logging.getLogger(__name__)
+@tool
+async def calculator_tool(expression: str) -> str:
+    """Evaluate a mathematical expression"""
+    try:
+        result = sympify(expression)
+        return str(result)
+    except Exception as e:
+        logger.error(f"Error evaluating expression '{expression}': {e}")
+        return f"Error: {str(e)}"

tools/document_retriever.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from langchain_core.tools import tool
+from langchain_community.document_loaders import TextLoader, CSVLoader, PyPDFLoader
+import logging
+import os
+logger = logging.getLogger(__name__)
+@tool
+async def document_retriever_tool(task_id: str, query: str, file_type: str) -> str:
+    """Retrieve content from a document"""
+    try:
+        file_path = f"temp_{task_id}.{file_type}"
+        if not os.path.exists(file_path):
+            logger.warning(f"Document not found: {file_path}")
+            return "Document not found"
+        if file_type == "txt":
+            loader = TextLoader(file_path)
+        elif file_type == "csv":
+            loader = CSVLoader(file_path)
+        elif file_type == "pdf":
+            loader = PyPDFLoader(file_path)
+        else:
+            return f"Unsupported file type: {file_type}"
+        docs = loader.load()
+        return "\n".join(doc.page_content for doc in docs)
+    except Exception as e:
+        logger.error(f"Error retrieving document for task {task_id}: {e}")
+        return f"Error: {str(e)}"

tools/file_parser.py CHANGED Viewed

@@ -1,38 +1,33 @@
 import pandas as pd
-import requests
 import os
-class FileParserTool:
-    def __init__(self):
-        self.name = "file_parser"
-        self.description = "Downloads and parses CSV or text files for GAIA tasks."
-        self.inputs = {
-            "task_id": {"type": "string", "description": "GAIA task ID"},
-            "file_type": {"type": "string", "description": "File type (csv, txt, default: csv)"}
-        }
-        self.output_type = str
-    async def aparse(self, task_id: str, file_type: str = "csv") -> str:
-        try:
-            url = f"https://api.gaia-benchmark.com/files/{task_id}"
-            response = await requests.get(url)
-            if response.status_code == 200:
-                file_path = f"temp_{task_id}.{file_type}"
-                with open(file_path, "wb") as f:
-                    f.write(response.content)
-                if file_type == "csv":
-                    df = pd.read_csv(file_path)
-                    return df.to_string()
-                elif file_type == "txt":
-                    with open(file_path, "r") as f:
-                        return f.read()
-                else:
-                    return f"Unsupported file type: {file_type}"
-            return f"Error downloading file for task ID {task_id}"
-        except Exception as e:
-            return f"Error: {str(e)}"
-        finally:
-            if os.path.exists(file_path):
-                os.remove(file_path)
-file_parser_tool = FileParserTool()

+from langchain_core.tools import tool
 import pandas as pd
+import PyPDF2
+import logging
 import os
+logger = logging.getLogger(__name__)
+@tool
+async def file_parser_tool(task_id: str, file_type: str) -> str:
+    """Parse a file based on task_id and file_type"""
+    try:
+        file_path = f"temp_{task_id}.{file_type}"
+        if not os.path.exists(file_path):
+            logger.warning(f"File not found: {file_path}")
+            return "File not found"
+        if file_type == "csv":
+            df = pd.read_csv(file_path)
+            return df.to_string()
+        elif file_type == "txt":
+            with open(file_path, "r", encoding="utf-8") as f:
+                return f.read()
+        elif file_type == "pdf":
+            with open(file_path, "rb") as f:
+                reader = PyPDF2.PdfReader(f)
+                text = "".join(page.extract_text() for page in reader.pages)
+                return text
+        else:
+            return f"Unsupported file type: {file_type}"
+    except Exception as e:
+        logger.error(f"Error parsing file for task {task_id}: {e}")
+        return f"Error: {str(e)}"

tools/image_parser.py CHANGED Viewed

@@ -1,66 +1,26 @@
-from langchain_openai import ChatOpenAI
-from sentence_transformers import SentenceTransformer, util
-import pytesseract
-from PIL import Image
-import base64
 import os
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
-# Debug: Verify OPENAI_API_KEY
-if not os.getenv("OPENAI_API_KEY"):
-    print("Error: OPENAI_API_KEY not loaded in image_parser.py")
-class ImageParserTool:
-    def __init__(self):
-        self.name = "image_parser"
-        self.description = "Analyzes images to extract text, identify objects, or match descriptions."
-        self.inputs = {
-            "image_path": {"type": "string", "description": "Path to image file"},
-            "task": {"type": "string", "description": "Task type (ocr, describe, match)"},
-            "match_query": {"type": "string", "description": "Query for semantic matching (optional)"}
-        }
-        self.output_type = str
-        api_key = os.getenv("OPENAI_API_KEY")
-        if not api_key:
-            raise ValueError("OPENAI_API_KEY environment variable not set")
-        self.vlm = ChatOpenAI(model="gpt-4o", api_key=api_key)
-        self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
-    async def aparse(self, image_path: str, task: str = "describe", match_query: str = "") -> str:
-        try:
-            # Read image
-            with open(image_path, "rb") as f:
-                image_data = base64.b64encode(f.read()).decode()
-            img = Image.open(image_path)
-            if task == "ocr":
-                # Extract text with Tesseract
-                text = pytesseract.image_to_string(img)
-                return text if text.strip() else "No text found in image."
-            elif task == "describe":
-                # Describe image with VLM
-                response = await self.vlm.ainvoke([
-                    {"type": "image_url", "image_url": f"data:image/jpeg;base64,{image_data}"},
-                    {"type": "text", "text": "Describe objects in the image in detail."}
-                ])
-                return response.content
-            elif task == "match" and match_query:
-                # Semantic matching with sentence-transformers
-                description = await self.vlm.ainvoke([
-                    {"type": "image_url", "image_url": f"data:image/jpeg;base64,{image_data}"},
-                    {"type": "text", "text": "List objects in the image."}
-                ])
-                objects = description.content.split(", ")
-                query_embedding = self.embedder.encode(match_query, convert_to_tensor=True)
-                object_embeddings = self.embedder.encode(objects, convert_to_tensor=True)
-                similarities = util.cos_sim(query_embedding, object_embeddings)[0]
-                best_match = objects[similarities.argmax()]
-                return f"Best match for '{match_query}': {best_match}"
-            else:
-                return "Invalid task or missing match_query for matching."
-        except Exception as e:
-            return f"Error analyzing image: {str(e)}"
-image_parser_tool = ImageParserTool()

+from langchain_core.tools import tool
+import easyocr
+import logging
 import os
+logger = logging.getLogger(__name__)
+reader = easyocr.Reader(['en'])
+@tool
+async def image_parser_tool(file_path: str, task: str = "describe", match_query: str = "") -> str:
+    """Parse text from an image"""
+    try:
+        if not os.path.exists(file_path):
+            logger.warning(f"Image not found: {file_path}")
+            return "Image not found"
+        results = reader.readtext(file_path)
+        text = " ".join(result[1] for result in results)
+        if task == "match" and match_query:
+            return str(match_query.lower() in text.lower())
+        return text
+    except Exception as e:
+        logger.error(f"Error parsing image {file_path}: {e}")
+        return f"Error: {str(e)}"

tools/search.py CHANGED Viewed

@@ -1,68 +1,91 @@
-from langchain_openai import ChatOpenAI
 from langchain_core.tools import tool
-from duckduckgo_search import DDGS
 import os
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
-api_key = os.getenv("OPENAI_API_KEY")
-if not api_key:
-    raise ValueError("OPENAI_API_KEY environment variable not set")
 @tool
-async def web_search(query: str) -> str:
-    """
-    Performs a web search using DuckDuckGo and returns a string of results.
-    Args:
-        query (str): The search query string.
-    Returns:
-        str: A string containing the search results.
-    """
     try:
-        with DDGS() as ddgs:
-            results = await ddgs.atext(keywords=query, max_results=5)
-            return "\n".join([f"{r['title']}: {r['body']}" for r in results])
-    except Exception as e:
-        return f"Error performing web search: {str(e)}"
-search_tool = web_search
-class MultiHopSearchTool:
-    def __init__(self):
-        self.name = "multi_hop_search"
-        self.description = "Performs iterative web searches to refine results for complex queries."
-        self.inputs = {
-            "query": {"type": "string", "description": "Initial search query"},
-            "steps": {"type": "integer", "description": "Number of search iterations (default: 3)"}
-        }
-        self.output_type = str
-        self.llm = ChatOpenAI(
-            model="gpt-4o",
-            api_key=api_key,
-            temperature=0,
-            http_client=None  # Explicitly disable custom HTTP client to avoid proxies
-        )
-    async def aparse(self, query: str, steps: int = 3) -> str:
-        try:
-            current_query = query
-            results = []
-            for _ in range(steps):
-                search_result = await web_search.invoke({"query": current_query})
-                results.append(search_result)
-                # Refine query using LLM
-                prompt = f"""Based on the query: {current_query}
-                And the search results: {search_result}
-                Generate a refined search query to get more precise results."""
-                response = await self.llm.ainvoke(prompt)
-                current_query = response.content
-            return "\n\n".join(results)
-        except Exception as e:
-            return f"Error in multi-hop search: {str(e)}"
-multi_hop_search_tool = MultiHopSearchTool()

 from langchain_core.tools import tool
+from langchain_huggingface import HuggingFacePipeline
+from sentence_transformers import SentenceTransformer
+import logging
+from typing import List, Dict, Any
+import requests
 import os
+logger = logging.getLogger(__name__)
+# Initialize embedding model (free, open-source)
+try:
+    embedder = SentenceTransformer("all-MiniLM-L6-v2")
+except Exception as e:
+    logger.error(f"Failed to initialize embedding model: {e}")
+    embedder = None
+# Global LLM instance
+search_llm = None
+def initialize_search_tools(llm: HuggingFacePipeline) -> None:
+    """Initialize search tools with the provided LLM"""
+    global search_llm
+    search_llm = llm
+    logger.info("Search tools initialized with HuggingFace LLM")
 @tool
+async def search_tool(query: str) -> List[Dict[str, Any]]:
+    """Perform a web search using the query"""
     try:
+        if not search_llm:
+            logger.warning("Search LLM not initialized")
+            return [{"content": "Search unavailable", "url": ""}]
+        # Refine query using LLM
+        prompt = f"Refine this search query for better results: {query}"
+        response = await search_llm.ainvoke(prompt)
+        refined_query = response.content.strip()
+        # Check for SerpAPI key (free tier available)
+        serpapi_key = os.getenv("SERPAPI_API_KEY")
+        if serpapi_key:
+            try:
+                params = {"q": refined_query, "api_key": serpapi_key}
+                response = requests.get("https://serpapi.com/search", params=params)
+                response.raise_for_status()
+                results = response.json().get("organic_results", [])
+                return [{"content": r.get("snippet", ""), "url": r.get("link", "")} for r in results]
+            except Exception as e:
+                logger.warning(f"SerpAPI failed: {e}, falling back to mock search")
+        # Mock search if no API key or API fails
+        if embedder:
+            query_embedding = embedder.encode(refined_query)
+            results = [
+                {"content": f"Mock result for {refined_query}", "url": "https://example.com"},
+                {"content": f"Another mock result for {refined_query}", "url": "https://example.org"}
+            ]
+        else:
+            results = [{"content": "Embedding model unavailable", "url": ""}]
+        logger.info(f"Search results for query '{refined_query}': {len(results)} items")
+        return results
+    except Exception as e:
+        logger.error(f"Error in search_tool: {e}")
+        return [{"content": f"Search failed: {str(e)}", "url": ""}]
+@tool
+async def multi_hop_search_tool(query: str, steps: int = 3) -> List[Dict[str, Any]]:
+    """Perform a multi-hop search by iteratively refining the query"""
+    try:
+        if not search_llm:
+            logger.warning("Search LLM not initialized")
+            return [{"content": "Multi-hop search unavailable", "url": ""}]
+        results = []
+        current_query = query
+        for step in range(steps):
+            prompt = f"Based on the query '{current_query}', generate a follow-up question to deepen the search."
+            response = await search_llm.ainvoke(prompt)
+            next_query = response.content.strip()
+            step_results = await search_tool.invoke({"query": next_query})
+            results.extend(step_results)
+            current_query = next_query
+            logger.info(f"Multi-hop step {step + 1}: {next_query}")
+        return results
+    except Exception as e:
+        logger.error(f"Error in multi_hop_search_tool: {e}")
+        return [{"content": f"Multi-hop search failed: {str(e)}", "url": ""}]