Spaces:

davidgturner
/

GaiaAgentEvaluator

Sleeping

App Files Files Community

davidgturner commited on 10 days ago

Commit

fc78ae4

1 Parent(s): 86dcf65

- changes for agent

Browse files

Files changed (5) hide show

app.py +36 -21
config.py +16 -2
requirements.txt +3 -6
utils/llama_cpp_model.py +296 -0
utils/ollama_model.py +175 -0

app.py CHANGED Viewed

@@ -6,10 +6,12 @@ from smolagents import Tool, CodeAgent, Model
 # Import internal modules
 from config import (
-    DEFAULT_API_URL
 )
 from tools.tool_manager import ToolManager
-from utils.local_model import LocalTransformersModel
 class GaiaToolCallingAgent:
     """Tool-calling agent specifically designed for the GAIA system."""
@@ -24,9 +26,8 @@ class GaiaToolCallingAgent:
         self.local_model = local_model
         if not self.local_model:
             try:
-                from utils.local_model import LocalTransformersModel
-                self.local_model = LocalTransformersModel(
-                    model_name="TinyLlama/TinyLlama-1.1B-Chat-v0.6",
                     max_tokens=512
                 )
             except Exception as e:
@@ -106,25 +107,39 @@ def create_manager_agent() -> CodeAgent:
     """Create and configure the main GAIA agent."""
     try:
-        # Import config for local model
-        from config import LOCAL_MODEL_CONFIG
-        # Use local model to avoid credit limits
-        model = LocalTransformersModel(
-            model_name=LOCAL_MODEL_CONFIG["model_name"],
-            device=LOCAL_MODEL_CONFIG["device"],
-            max_tokens=LOCAL_MODEL_CONFIG["max_tokens"],
-            temperature=LOCAL_MODEL_CONFIG["temperature"]
-        )
-        print(f"Using local model: {LOCAL_MODEL_CONFIG['model_name']}")
     except Exception as e:
-        print(f"Error setting up local model: {e}")
         # Use a simplified configuration as fallback
-        model = LocalTransformersModel(
-            model_name="TinyLlama/TinyLlama-1.1B-Chat-v0.6",
-            device="cpu"
-        )
-        print("Using fallback model configuration")
     # Initialize the managed tool-calling agent, sharing the model
     tool_agent = GaiaToolCallingAgent(local_model=model)

 # Import internal modules
 from config import (
+    DEFAULT_API_URL,
+    USE_LLAMACPP,
+    LLAMACPP_CONFIG
 )
 from tools.tool_manager import ToolManager
+from utils.llama_cpp_model import LlamaCppModel
 class GaiaToolCallingAgent:
     """Tool-calling agent specifically designed for the GAIA system."""
         self.local_model = local_model
         if not self.local_model:
             try:
+                from utils.llama_cpp_model import LlamaCppModel
+                self.local_model = LlamaCppModel(
                     max_tokens=512
                 )
             except Exception as e:
     """Create and configure the main GAIA agent."""
     try:
+        # Import config for model
+        from config import LOCAL_MODEL_CONFIG, USE_LLAMACPP, LLAMACPP_CONFIG
+        # Use llama-cpp-python model (no PyTorch dependency)
+        if USE_LLAMACPP:
+            # Initialize llama-cpp model
+            model = LlamaCppModel(
+                model_path=LLAMACPP_CONFIG.get("model_path"),
+                model_url=LLAMACPP_CONFIG.get("model_url"),
+                n_ctx=LLAMACPP_CONFIG.get("n_ctx", 2048),
+                n_gpu_layers=LLAMACPP_CONFIG.get("n_gpu_layers", 0),
+                max_tokens=LLAMACPP_CONFIG.get("max_tokens", 512),
+                temperature=LLAMACPP_CONFIG.get("temperature", 0.7)
+            )
+            print(f"Using LlamaCpp model")
+        else:
+            # Use a simpler stub model if needed
+            from smolagents import StubModel
+            model = StubModel()
+            print("Using StubModel as fallback")
     except Exception as e:
+        print(f"Error setting up model: {e}")
         # Use a simplified configuration as fallback
+        try:
+            # Simple fallback with default params
+            model = LlamaCppModel()
+            print("Using fallback LlamaCpp model configuration")
+        except Exception as e2:
+            # Last resort fallback
+            from smolagents import StubModel
+            model = StubModel()
+            print(f"Using StubModel due to error: {e2}")
     # Initialize the managed tool-calling agent, sharing the model
     tool_agent = GaiaToolCallingAgent(local_model=model)

config.py CHANGED Viewed

@@ -9,9 +9,23 @@ HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
 # --- Model Configuration ---
 USE_LOCAL_MODEL = True  # Set to False to use remote API model instead
 LOCAL_MODEL_CONFIG = {
-    "model_name": "TinyLlama/TinyLlama-1.1B-Chat-v0.6",  # A small but capable model
-    "device": "auto",  # Will use GPU if available
     "max_tokens": 1024,
     "temperature": 0.5
 }

 # --- Model Configuration ---
 USE_LOCAL_MODEL = True  # Set to False to use remote API model instead
+USE_LLAMACPP = True  # Set to True to use llama-cpp-python instead of transformers
+# Configuration for llama-cpp-python model
+LLAMACPP_CONFIG = {
+    "model_path": None,  # Will use a default small model if None
+    # Using a smaller GGUF model to avoid download issues
+    "model_url": "https://huggingface.co/eachadea/ggml-gridlocked-alpha-3b/resolve/main/ggml-gridlocked-3b-q4_0.bin",
+    "n_ctx": 2048,
+    "n_gpu_layers": 0,  # Use 0 for CPU-only
+    "max_tokens": 1024,
+    "temperature": 0.7
+}
+# Backup configuration for transformers model
 LOCAL_MODEL_CONFIG = {
+    "model_name": "TinyLlama/TinyLlama-1.1B-Chat-v0.6",
+    "device": "cpu",
     "max_tokens": 1024,
     "temperature": 0.5
 }

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
---extra-index-url https://download.pytorch.org/whl/cpu
 gradio
 requests
 pandas
@@ -12,8 +11,6 @@ rank_bm25
 pytube
 python-dateutil
 youtube-transcript-api
-torch
-transformers
-torch
-# torchvision
-# torchaudio

 gradio
 requests
 pandas
 pytube
 python-dateutil
 youtube-transcript-api
+--extra-index-url https://download.pytorch.org/whl/cpu
+--find-links https://github.com/abetlen/llama-cpp-python/releases/latest
+llama-cpp-python

utils/llama_cpp_model.py ADDED Viewed

	@@ -0,0 +1,296 @@

+"""
+Fallback model implementation for testing when llama-cpp-python is not available.
+This provides a compatible model class that doesn't require any external dependencies,
+allowing the rest of the application to function while we solve the llama-cpp-python
+installation issues.
+"""
+import os
+import logging
+from typing import Dict, List, Optional, Any, Union
+import requests
+from smolagents import Model
+from pathlib import Path
+# Try to import llama_cpp, but don't fail if not available
+try:
+    from llama_cpp import Llama
+    from pathlib import Path
+    LLAMA_CPP_AVAILABLE = True
+except ImportError:
+    LLAMA_CPP_AVAILABLE = False
+    print("llama_cpp module not available, using fallback implementation")
+logger = logging.getLogger(__name__)
+class LlamaCppModel(Model):
+    """Model using llama.cpp Python bindings for efficient local inference without PyTorch.
+    Falls back to a simple text generation if llama_cpp is not available."""
+    def __init__(
+        self,
+        model_path: str = None,
+        model_url: str = None,
+        n_ctx: int = 2048,
+        n_gpu_layers: int = 0,
+        max_tokens: int = 512,
+        temperature: float = 0.7,
+        verbose: bool = True
+    ):
+        """
+        Initialize a local llama.cpp model or fallback to a simple implementation.
+        Args:
+            model_path: Path to local GGUF model file
+            model_url: URL to download model if model_path doesn't exist
+            n_ctx: Context window size
+            n_gpu_layers: Number of layers to offload to GPU (0 means CPU only)
+            max_tokens: Maximum new tokens to generate
+            temperature: Sampling temperature
+            verbose: Whether to print verbose messages
+        """
+        super().__init__()
+        self.model_path = model_path
+        self.model_url = model_url
+        self.n_ctx = n_ctx
+        self.max_tokens = max_tokens
+        self.temperature = temperature
+        self.verbose = verbose
+        self.llm = None
+        # Check if we can use llama_cpp
+        if LLAMA_CPP_AVAILABLE:
+            try:
+                if self.verbose:
+                    print("Attempting to initialize LlamaCpp model...")
+                # Try to initialize the real model
+                if model_path and os.path.exists(model_path):
+                    if self.verbose:
+                        print(f"Loading model from {model_path}...")
+                    # Initialize the llama-cpp model
+                    self.llm = Llama(
+                        model_path=model_path,
+                        n_ctx=n_ctx,
+                        n_gpu_layers=n_gpu_layers,
+                        verbose=verbose
+                    )
+                    if self.verbose:
+                        print("LlamaCpp model loaded successfully")
+                else:
+                    if self.verbose:
+                        print(f"Model path not found or not specified. Using fallback mode.")
+            except Exception as e:
+                logger.error(f"Error initializing LlamaCpp model: {e}")
+                if self.verbose:
+                    print(f"Error initializing LlamaCpp model: {e}")
+                self.llm = None
+        else:
+            if self.verbose:
+                print("LlamaCpp not available, using fallback implementation")
+        if not self.llm and self.verbose:
+            print("Using fallback text generation mode")
+    def _resolve_model_path(self, model_path: str = None, model_url: str = None) -> str:
+        """
+        Resolve model path, downloading if necessary.
+        Returns:
+            Absolute path to model file
+        """
+        # Default to a small model if none specified
+        if not model_path:
+            models_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
+            os.makedirs(models_dir, exist_ok=True)
+            model_path = os.path.join(models_dir, "ggml-model-q4_0.bin")
+        # Convert to Path for easier handling
+        path = Path(model_path)
+        # If model exists, return it
+        if path.exists():
+            return str(path.absolute())
+        # Download if URL provided
+        if model_url and not path.exists():
+            try:
+                print(f"Downloading model from {model_url}...")
+                os.makedirs(path.parent, exist_ok=True)
+                try:
+                    # Try with streaming download first
+                    with requests.get(model_url, stream=True, timeout=30) as r:
+                        r.raise_for_status()
+                        total_size = int(r.headers.get('content-length', 0))
+                        block_size = 8192
+                        with open(path, 'wb') as f:
+                            downloaded = 0
+                            for chunk in r.iter_content(chunk_size=block_size):
+                                if chunk:
+                                    f.write(chunk)
+                                    downloaded += len(chunk)
+                                    if total_size > 0:
+                                        percent = (downloaded / total_size) * 100
+                                        if percent % 10 < (block_size / total_size) * 100:
+                                            print(f"Download progress: {int(percent)}%")
+                except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
+                    print(f"Streaming download timed out: {e}. Using a simpler approach...")
+                    # Fall back to simpler download method
+                    r = requests.get(model_url, timeout=60)
+                    r.raise_for_status()
+                    with open(path, 'wb') as f:
+                        f.write(r.content)
+                        print("Download complete with simple method")
+                print(f"Model download complete: {path}")
+                return str(path.absolute())
+            except Exception as e:
+                logger.error(f"Error downloading model: {e}")
+                print(f"Error downloading model: {e}")
+                print("Continuing with dummy model instead...")
+                # Create a small dummy model file so we can continue
+                with open(path, 'wb') as f:
+                    f.write(b"DUMMY MODEL")
+                return str(path.absolute())
+        # If we get here without a model, create a dummy one
+        print(f"Model file not found at {model_path} and no URL provided. Creating dummy model...")
+        os.makedirs(path.parent, exist_ok=True)
+        with open(path, 'wb') as f:
+            f.write(b"DUMMY MODEL")
+        return str(path.absolute())
+    def generate(self, prompt: str, **kwargs) -> str:
+        """
+        Generate text completion for the given prompt.
+        Args:
+            prompt: Input text
+        Returns:
+            Generated text completion
+        """
+        try:
+            if self.verbose:
+                print(f"Generating with prompt: {prompt[:50]}...")
+            # If we have a real model, use it
+            if self.llm:
+                # Actual generation with llama-cpp
+                response = self.llm(
+                    prompt=prompt,
+                    max_tokens=self.max_tokens,
+                    temperature=self.temperature,
+                    echo=False  # Don't include the prompt in the response
+                )
+                # Extract generated text
+                if not response:
+                    return ""
+                if isinstance(response, dict):
+                    generated_text = response.get('choices', [{}])[0].get('text', '')
+                else:
+                    # List of responses
+                    generated_text = response[0].get('text', '')
+                return generated_text.strip()
+            else:
+                # Fallback simple generation
+                if self.verbose:
+                    print("Using fallback text generation")
+                # Extract key information from prompt
+                words = prompt.strip().split()
+                last_words = ' '.join(words[-10:]) if len(words) > 10 else prompt
+                # Simple response generation based on prompt content
+                if "?" in prompt:
+                    return f"Based on the information provided, I believe the answer is related to {last_words}. This is a fallback response as the LLM model could not be loaded."
+                else:
+                    return f"I understand you're asking about {last_words}. Since I'm running in fallback mode without a proper language model, I can only acknowledge your query but not provide a detailed response."
+        except Exception as e:
+            logger.error(f"Error generating text: {e}")
+            if self.verbose:
+                print(f"Error generating text: {e}")
+            return f"Error generating response: {str(e)}"
+    def generate_with_tools(
+        self,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Generate a response with tool-calling capabilities.
+        This method implements the smolagents Model interface for tool-calling.
+        Args:
+            messages: List of message objects with role and content
+            tools: List of tool definitions
+        Returns:
+            Response with message and optional tool calls
+        """
+        try:
+            # Format messages into a prompt
+            prompt = self._format_messages_to_prompt(messages, tools)
+            # Generate response
+            completion = self.generate(prompt)
+            # For now, just return the text without tool parsing
+            return {
+                "message": {
+                    "role": "assistant",
+                    "content": completion
+                }
+            }
+        except Exception as e:
+            logger.error(f"Error generating with tools: {e}")
+            print(f"Error generating with tools: {e}")
+            return {
+                "message": {
+                    "role": "assistant",
+                    "content": f"Error: {str(e)}"
+                }
+            }
+    def _format_messages_to_prompt(
+        self,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None
+    ) -> str:
+        """Format chat messages into a text prompt for the model."""
+        formatted_prompt = ""
+        # Include tool descriptions if available
+        if tools and len(tools) > 0:
+            tool_descriptions = "\n".join([
+                f"Tool {i+1}: {tool['name']} - {tool['description']}"
+                for i, tool in enumerate(tools)
+            ])
+            formatted_prompt += f"Available tools:\n{tool_descriptions}\n\n"
+        # Add conversation history
+        for msg in messages:
+            role = msg.get("role", "")
+            content = msg.get("content", "")
+            if role == "system":
+                formatted_prompt += f"System: {content}\n\n"
+            elif role == "user":
+                formatted_prompt += f"User: {content}\n\n"
+            elif role == "assistant":
+                formatted_prompt += f"Assistant: {content}\n\n"
+        # Add final prompt for assistant
+        formatted_prompt += "Assistant: "
+        return formatted_prompt

utils/ollama_model.py ADDED Viewed

	@@ -0,0 +1,175 @@

+"""
+Alternative model implementation using Ollama API.
+This provides a local model implementation that doesn't require PyTorch,
+by connecting to a locally running Ollama server.
+"""
+import logging
+import requests
+from typing import Dict, List, Optional, Any
+from smolagents.models import Model
+logger = logging.getLogger(__name__)
+class OllamaModel(Model):
+    """Model using Ollama API for local inference without PyTorch dependency."""
+    def __init__(
+        self,
+        model_name: str = "llama2",
+        api_base: str = "http://localhost:11434",
+        max_tokens: int = 512,
+        temperature: float = 0.7
+    ):
+        """
+        Initialize a connection to local Ollama server.
+        Args:
+            model_name: Ollama model name (e.g., llama2, mistral, gemma)
+            api_base: Base URL for Ollama API
+            max_tokens: Maximum new tokens to generate
+            temperature: Sampling temperature
+        """
+        super().__init__()
+        try:
+            self.model_name = model_name
+            self.api_base = api_base.rstrip('/')
+            self.max_tokens = max_tokens
+            self.temperature = temperature
+            # Test connection to Ollama
+            print(f"Testing connection to Ollama at {api_base}...")
+            response = requests.get(f"{self.api_base}/api/tags")
+            if response.status_code == 200:
+                models = [model["name"] for model in response.json().get("models", [])]
+                print(f"Available Ollama models: {models}")
+                if model_name not in models and models:
+                    print(f"Warning: Model {model_name} not found. Available models: {models}")
+                print(f"Ollama connection successful")
+            else:
+                print(f"Warning: Ollama server not responding correctly. Status code: {response.status_code}")
+        except Exception as e:
+            logger.error(f"Error connecting to Ollama: {e}")
+            print(f"Error connecting to Ollama: {e}")
+            print("Make sure Ollama is installed and running. Visit https://ollama.ai for installation.")
+            raise
+    def generate(self, prompt: str, **kwargs) -> str:
+        """
+        Generate text completion using Ollama API.
+        Args:
+            prompt: Input text
+        Returns:
+            Generated text completion
+        """
+        try:
+            print(f"Generating with prompt: {prompt[:50]}...")
+            # Prepare request
+            data = {
+                "model": self.model_name,
+                "prompt": prompt,
+                "stream": False,
+                "options": {
+                    "temperature": self.temperature,
+                    "num_predict": self.max_tokens
+                }
+            }
+            # Make API call
+            response = requests.post(
+                f"{self.api_base}/api/generate",
+                json=data
+            )
+            if response.status_code != 200:
+                error_msg = f"Ollama API error: {response.status_code} - {response.text}"
+                print(error_msg)
+                return error_msg
+            # Extract generated text
+            result = response.json()
+            return result.get("response", "No response received")
+        except Exception as e:
+            logger.error(f"Error generating text with Ollama: {e}")
+            print(f"Error generating text with Ollama: {e}")
+            return f"Error: {str(e)}"
+    def generate_with_tools(
+        self,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Generate a response with tool-calling capabilities using Ollama.
+        Args:
+            messages: List of message objects with role and content
+            tools: List of tool definitions
+        Returns:
+            Response with message and optional tool calls
+        """
+        try:
+            # Format messages into a prompt
+            prompt = self._format_messages_to_prompt(messages, tools)
+            # Generate response
+            completion = self.generate(prompt)
+            # Return the formatted response
+            return {
+                "message": {
+                    "role": "assistant",
+                    "content": completion
+                }
+            }
+        except Exception as e:
+            logger.error(f"Error generating with tools: {e}")
+            print(f"Error generating with tools: {e}")
+            return {
+                "message": {
+                    "role": "assistant",
+                    "content": f"Error: {str(e)}"
+                }
+            }
+    def _format_messages_to_prompt(
+        self,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None
+    ) -> str:
+        """Format chat messages into a text prompt for the model."""
+        formatted_prompt = ""
+        # Include tool descriptions if available
+        if tools and len(tools) > 0:
+            tool_descriptions = "\n".join([
+                f"Tool {i+1}: {tool['name']} - {tool['description']}"
+                for i, tool in enumerate(tools)
+            ])
+            formatted_prompt += f"Available tools:\n{tool_descriptions}\n\n"
+        # Add conversation history
+        for msg in messages:
+            role = msg.get("role", "")
+            content = msg.get("content", "")
+            if role == "system":
+                formatted_prompt += f"System: {content}\n\n"
+            elif role == "user":
+                formatted_prompt += f"User: {content}\n\n"
+            elif role == "assistant":
+                formatted_prompt += f"Assistant: {content}\n\n"
+        # Add final prompt for assistant
+        formatted_prompt += "Assistant: "
+        return formatted_prompt