Spaces:

davidgturner
/

GaiaAgentEvaluator

Sleeping

App Files Files Community

davidgturner commited on 20 days ago

Commit

07ad0d5

1 Parent(s): f5bafc2

- changes for local model

Browse files

Files changed (5) hide show

app.py +72 -26
config.py +12 -0
requirements.txt +7 -1
test_agent.py +5 -4
utils/local_model.py +177 -0

app.py CHANGED Viewed

@@ -1,44 +1,41 @@
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
-import time
-import json
-import io
-import base64
-from typing import Dict, List, Union, Optional
-import re
-import sys
-from bs4 import BeautifulSoup
-from duckduckgo_search import DDGS
-import pytube
-from dateutil import parser
-try:
-    from youtube_transcript_api import YouTubeTranscriptApi
-except ImportError:
-    print("YouTube Transcript API not installed. Video transcription may be limited.")
-from smolagents import Tool, CodeAgent, InferenceClientModel
 # Import internal modules
 from config import (
-    DEFAULT_API_URL, LLAMA_API_URL, HF_API_TOKEN, HEADERS,
-    MAX_RETRIES, RETRY_DELAY
 )
 from tools.tool_manager import ToolManager
 class GaiaToolCallingAgent:
     """Tool-calling agent specifically designed for the GAIA system."""
-    def __init__(self):
         print("GaiaToolCallingAgent initialized.")
         self.tool_manager = ToolManager()
         self.name = "tool_agent"  # Add required name attribute for smolagents integration
         self.description = "A specialized agent that uses various tools to answer questions"  # Required by smolagents
     def run(self, query: str) -> str:
         """Process a query and return a response using available tools."""
         tools = self.tool_manager.get_tools()
         # For each tool, try to get relevant information
@@ -47,6 +44,7 @@ class GaiaToolCallingAgent:
         for tool in tools:
             try:
                 if self._should_use_tool(tool, query):
                     result = tool.forward(query)
                     if result:
                         context_info.append(f"{tool.name} Results:\n{result}")
@@ -56,7 +54,29 @@ class GaiaToolCallingAgent:
         # Combine all context information
         full_context = "\n\n".join(context_info) if context_info else ""
-        return full_context
     def __call__(self, query: str) -> str:
         """Make the agent callable so it can be used directly by CodeAgent."""
@@ -76,22 +96,47 @@ class GaiaToolCallingAgent:
             "gaia_retriever": ["gaia", "agent", "ai", "artificial intelligence"]
         }
         return any(pattern in query_lower for pattern in patterns.get(tool.name, []))
 def create_manager_agent() -> CodeAgent:
     """Create and configure the main GAIA agent."""
-    # Initialize the managed tool-calling agent
-    tool_agent = GaiaToolCallingAgent()
     # Create the manager agent
     manager_agent = CodeAgent(
-        model=InferenceClientModel(),
         tools=[],  # No direct tools for manager
         managed_agents=[tool_agent],
         additional_authorized_imports=[
             "json",
-            "pandas",
             "numpy",
             "re",
             "requests",
@@ -102,6 +147,7 @@ def create_manager_agent() -> CodeAgent:
         max_steps=10
     )
     return manager_agent
 def create_agent():

 import os
 import gradio as gr
 import requests
 import pandas as pd
+from smolagents import Tool, CodeAgent, Model
 # Import internal modules
 from config import (
+    DEFAULT_API_URL
 )
 from tools.tool_manager import ToolManager
+from utils.local_model import LocalTransformersModel
 class GaiaToolCallingAgent:
     """Tool-calling agent specifically designed for the GAIA system."""
+    def __init__(self, local_model=None):
         print("GaiaToolCallingAgent initialized.")
         self.tool_manager = ToolManager()
         self.name = "tool_agent"  # Add required name attribute for smolagents integration
         self.description = "A specialized agent that uses various tools to answer questions"  # Required by smolagents
+        # Use local model if provided, or create a simpler one
+        self.local_model = local_model
+        if not self.local_model:
+            try:
+                from utils.local_model import LocalTransformersModel
+                self.local_model = LocalTransformersModel(
+                    model_name="TinyLlama/TinyLlama-1.1B-Chat-v0.6",
+                    max_tokens=512
+                )
+            except Exception as e:
+                print(f"Couldn't initialize local model in tool agent: {e}")
+                self.local_model = None
     def run(self, query: str) -> str:
         """Process a query and return a response using available tools."""
+        print(f"Processing query: {query}")
         tools = self.tool_manager.get_tools()
         # For each tool, try to get relevant information
         for tool in tools:
             try:
                 if self._should_use_tool(tool, query):
+                    print(f"Using tool: {tool.name}")
                     result = tool.forward(query)
                     if result:
                         context_info.append(f"{tool.name} Results:\n{result}")
         # Combine all context information
         full_context = "\n\n".join(context_info) if context_info else ""
+        # If we have context and a local model, generate a proper response
+        if full_context and self.local_model:
+            try:
+                prompt = f"""
+                Based on the following information, please provide a comprehensive answer to the question: "{query}"
+                CONTEXT INFORMATION:
+                {full_context}
+                Answer:
+                """
+                response = self.local_model.generate(prompt)
+                return response
+            except Exception as e:
+                print(f"Error generating response with local model: {e}")
+                # Fall back to returning just the context
+                return full_context
+        else:
+            # No context or no model, return whatever we have
+            if not full_context:
+                return "I couldn't find any relevant information to answer your question."
+            return full_context
     def __call__(self, query: str) -> str:
         """Make the agent callable so it can be used directly by CodeAgent."""
             "gaia_retriever": ["gaia", "agent", "ai", "artificial intelligence"]
         }
+        # Use all tools if patterns dict doesn't have the tool name
+        if tool.name not in patterns:
+            return True
         return any(pattern in query_lower for pattern in patterns.get(tool.name, []))
 def create_manager_agent() -> CodeAgent:
     """Create and configure the main GAIA agent."""
+    try:
+        # Import config for local model
+        from config import LOCAL_MODEL_CONFIG
+        # Use local model to avoid credit limits
+        model = LocalTransformersModel(
+            model_name=LOCAL_MODEL_CONFIG["model_name"],
+            device=LOCAL_MODEL_CONFIG["device"],
+            max_tokens=LOCAL_MODEL_CONFIG["max_tokens"],
+            temperature=LOCAL_MODEL_CONFIG["temperature"]
+        )
+        print(f"Using local model: {LOCAL_MODEL_CONFIG['model_name']}")
+    except Exception as e:
+        print(f"Error setting up local model: {e}")
+        # Use a simplified configuration as fallback
+        model = LocalTransformersModel(
+            model_name="TinyLlama/TinyLlama-1.1B-Chat-v0.6",
+            device="cpu"
+        )
+        print("Using fallback model configuration")
+    # Initialize the managed tool-calling agent, sharing the model
+    tool_agent = GaiaToolCallingAgent(local_model=model)
     # Create the manager agent
     manager_agent = CodeAgent(
+        model=model,
         tools=[],  # No direct tools for manager
         managed_agents=[tool_agent],
         additional_authorized_imports=[
             "json",
+            "pandas",
             "numpy",
             "re",
             "requests",
         max_steps=10
     )
+    print("Manager agent created with local model")
     return manager_agent
 def create_agent():

config.py CHANGED Viewed

@@ -7,6 +7,15 @@ LLAMA_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8
 HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
 # --- Request Configuration ---
 MAX_RETRIES = 3
 RETRY_DELAY = 2  # seconds
@@ -65,3 +74,6 @@ ANSWER_PREFIXES_TO_REMOVE = [
 LLM_RESPONSE_MARKERS = ["<answer>", "<response>", "Answer:", "Response:", "Assistant:"]
 LLM_END_MARKERS = ["</answer>", "</response>", "Human:", "User:"]

 HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
+# --- Model Configuration ---
+USE_LOCAL_MODEL = True  # Set to False to use remote API model instead
+LOCAL_MODEL_CONFIG = {
+    "model_name": "TinyLlama/TinyLlama-1.1B-Chat-v0.6",  # A small but capable model
+    "device": "auto",  # Will use GPU if available
+    "max_tokens": 1024,
+    "temperature": 0.5
+}
 # --- Request Configuration ---
 MAX_RETRIES = 3
 RETRY_DELAY = 2  # seconds
 LLM_RESPONSE_MARKERS = ["<answer>", "<response>", "Answer:", "Response:", "Assistant:"]
 LLM_END_MARKERS = ["</answer>", "</response>", "Human:", "User:"]
+# Ensure knowledge base is loaded correctly
+GAIA_KNOWLEDGE = load_knowledge_base()

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 gradio
 requests
 pandas
@@ -10,4 +11,9 @@ duckduckgo-search
 rank_bm25
 pytube
 python-dateutil
-youtube-transcript-api

+--extra-index-url https://download.pytorch.org/whl/cpu
 gradio
 requests
 pandas
 rank_bm25
 pytube
 python-dateutil
+youtube-transcript-api
+torch
+transformers
+torch==2.1.1+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
+torchvision==0.14.1+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
+torchaudio==0.10.1+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html

test_agent.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
-from app import GaiaAgent
 # Initialize the agent
-agent = GaiaAgent()
 # Test cases from the logs that were failing
 test_questions = [
@@ -17,7 +18,7 @@ test_questions = [
 for question in test_questions:
     print(f"\nTesting question: {question}")
     try:
-        answer = agent(question)
-        print(f"Agent answer: {answer}")
     except Exception as e:
         print(f"Error: {e}")

 import os
+from app import create_agent
 # Initialize the agent
+print("Creating agent for testing...")
+agent = create_agent()
 # Test cases from the logs that were failing
 test_questions = [
 for question in test_questions:
     print(f"\nTesting question: {question}")
     try:
+        response = agent.run(f"Answer this question concisely: {question}")
+        print(f"Agent answer: {response}")
     except Exception as e:
         print(f"Error: {e}")

utils/local_model.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""
+Custom model implementation using Hugging Face Transformers.
+This provides a local model implementation compatible with smolagents framework.
+"""
+import logging
+from typing import Dict, List, Optional, Any
+from smolagents.models import Model
+from transformers import AutoTokenizer, pipeline
+logger = logging.getLogger(__name__)
+class LocalTransformersModel(Model):
+    """Model using local Hugging Face Transformers models that doesn't require API calls."""
+    def __init__(
+        self,
+        model_name: str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        device: str = "auto",
+        max_tokens: int = 512,
+        temperature: float = 0.7
+    ):
+        """
+        Initialize a local transformer model.
+        Args:
+            model_name: HuggingFace model identifier
+            device: "cpu", "cuda", "auto"
+            max_tokens: Maximum new tokens to generate
+            temperature: Sampling temperature
+        """
+        super().__init__()
+        try:
+            print(f"Loading model {model_name}...")
+            self.model_name = model_name
+            self.device = device
+            self.max_tokens = max_tokens
+            self.temperature = temperature
+            # Determine if we can use GPU
+            if device == "auto":
+                import torch
+                self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            # Load tokenizer and pipeline
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            # Create text generation pipeline
+            self.generator = pipeline(
+                "text-generation",
+                model=model_name,
+                tokenizer=self.tokenizer,
+                device=self.device,
+                torch_dtype="auto"
+            )
+            print(f"Model loaded on {self.device}")
+        except Exception as e:
+            logger.error(f"Error loading model {model_name}: {e}")
+            print(f"Error loading model: {e}")
+            raise
+    def generate(self, prompt: str, **kwargs) -> str:
+        """
+        Generate text completion for the given prompt.
+        Args:
+            prompt: Input text
+        Returns:
+            Generated text completion
+        """
+        try:
+            print(f"Generating with prompt: {prompt[:50]}...")
+            # Actual generation
+            response = self.generator(
+                prompt,
+                max_new_tokens=self.max_tokens,
+                temperature=self.temperature,
+                do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id
+            )
+            # Extract generated text
+            generated_text = response[0]['generated_text']
+            # Remove the prompt from the beginning
+            if generated_text.startswith(prompt):
+                generated_text = generated_text[len(prompt):]
+            return generated_text.strip()
+        except Exception as e:
+            logger.error(f"Error generating text: {e}")
+            print(f"Error generating text: {e}")
+            return f"Error: {str(e)}"
+    def generate_with_tools(
+        self,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Generate a response with tool-calling capabilities.
+        This method implements the smolagents BaseModel interface for tool-calling.
+        Args:
+            messages: List of message objects with role and content
+            tools: List of tool definitions
+        Returns:
+            Response with message and optional tool calls
+        """
+        try:
+            # Format messages into a prompt
+            prompt = self._format_messages_to_prompt(messages, tools)
+            # Generate response
+            completion = self.generate(prompt)
+            # For now, just return the text without tool parsing
+            # In a future enhancement, we could add tool parsing here
+            return {
+                "message": {
+                    "role": "assistant",
+                    "content": completion
+                }
+            }
+        except Exception as e:
+            logger.error(f"Error generating with tools: {e}")
+            print(f"Error generating with tools: {e}")
+            return {
+                "message": {
+                    "role": "assistant",
+                    "content": f"Error: {str(e)}"
+                }
+            }
+    def _format_messages_to_prompt(
+        self,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None
+    ) -> str:
+        """Format chat messages into a text prompt for the model."""
+        formatted_prompt = ""
+        # Include tool descriptions if available
+        if tools and len(tools) > 0:
+            tool_descriptions = "\n".join([
+                f"Tool {i+1}: {tool['name']} - {tool['description']}"
+                for i, tool in enumerate(tools)
+            ])
+            formatted_prompt += f"Available tools:\n{tool_descriptions}\n\n"
+        # Add conversation history
+        for msg in messages:
+            role = msg.get("role", "")
+            content = msg.get("content", "")
+            if role == "system":
+                formatted_prompt += f"System: {content}\n\n"
+            elif role == "user":
+                formatted_prompt += f"User: {content}\n\n"
+            elif role == "assistant":
+                formatted_prompt += f"Assistant: {content}\n\n"
+        # Add final prompt for assistant
+        formatted_prompt += "Assistant: "
+        return formatted_prompt
+        # return f"Error generating response: {str(e)}"