Spaces:

davidgturner
/

GaiaAgentEvaluator

Sleeping

App Files Files Community

davidgturner commited on Jun 2

Commit

66d6d1f

1 Parent(s): fc78ae4

- agent local model cpp

Browse files

Files changed (2) hide show

utils/llama_cpp_model.py +62 -263
utils/local_model.py +3 -2

utils/llama_cpp_model.py CHANGED Viewed

@@ -1,296 +1,95 @@
-"""
-Fallback model implementation for testing when llama-cpp-python is not available.
-This provides a compatible model class that doesn't require any external dependencies,
-allowing the rest of the application to function while we solve the llama-cpp-python
-installation issues.
-"""
 import os
 import logging
-from typing import Dict, List, Optional, Any, Union
-import requests
-from smolagents import Model
 from pathlib import Path
-# Try to import llama_cpp, but don't fail if not available
 try:
     from llama_cpp import Llama
-    from pathlib import Path
     LLAMA_CPP_AVAILABLE = True
 except ImportError:
     LLAMA_CPP_AVAILABLE = False
     print("llama_cpp module not available, using fallback implementation")
-logger = logging.getLogger(__name__)
 class LlamaCppModel(Model):
-    """Model using llama.cpp Python bindings for efficient local inference without PyTorch.
-    Falls back to a simple text generation if llama_cpp is not available."""
-    def __init__(
-        self,
-        model_path: str = None,
-        model_url: str = None,
-        n_ctx: int = 2048,
-        n_gpu_layers: int = 0,
-        max_tokens: int = 512,
-        temperature: float = 0.7,
-        verbose: bool = True
-    ):
-        """
-        Initialize a local llama.cpp model or fallback to a simple implementation.
-        Args:
-            model_path: Path to local GGUF model file
-            model_url: URL to download model if model_path doesn't exist
-            n_ctx: Context window size
-            n_gpu_layers: Number of layers to offload to GPU (0 means CPU only)
-            max_tokens: Maximum new tokens to generate
-            temperature: Sampling temperature
-            verbose: Whether to print verbose messages
-        """
         super().__init__()
         self.model_path = model_path
-        self.model_url = model_url
         self.n_ctx = n_ctx
         self.max_tokens = max_tokens
         self.temperature = temperature
         self.verbose = verbose
         self.llm = None
-        # Check if we can use llama_cpp
-        if LLAMA_CPP_AVAILABLE:
-            try:
-                if self.verbose:
-                    print("Attempting to initialize LlamaCpp model...")
-                # Try to initialize the real model
-                if model_path and os.path.exists(model_path):
-                    if self.verbose:
-                        print(f"Loading model from {model_path}...")
-                    # Initialize the llama-cpp model
-                    self.llm = Llama(
-                        model_path=model_path,
-                        n_ctx=n_ctx,
-                        n_gpu_layers=n_gpu_layers,
-                        verbose=verbose
-                    )
-                    if self.verbose:
-                        print("LlamaCpp model loaded successfully")
-                else:
-                    if self.verbose:
-                        print(f"Model path not found or not specified. Using fallback mode.")
-            except Exception as e:
-                logger.error(f"Error initializing LlamaCpp model: {e}")
-                if self.verbose:
-                    print(f"Error initializing LlamaCpp model: {e}")
-                self.llm = None
-        else:
-            if self.verbose:
-                print("LlamaCpp not available, using fallback implementation")
-        if not self.llm and self.verbose:
-            print("Using fallback text generation mode")
-    def _resolve_model_path(self, model_path: str = None, model_url: str = None) -> str:
-        """
-        Resolve model path, downloading if necessary.
-        Returns:
-            Absolute path to model file
-        """
-        # Default to a small model if none specified
-        if not model_path:
-            models_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
-            os.makedirs(models_dir, exist_ok=True)
-            model_path = os.path.join(models_dir, "ggml-model-q4_0.bin")
-        # Convert to Path for easier handling
-        path = Path(model_path)
-        # If model exists, return it
-        if path.exists():
-            return str(path.absolute())
-        # Download if URL provided
-        if model_url and not path.exists():
-            try:
-                print(f"Downloading model from {model_url}...")
-                os.makedirs(path.parent, exist_ok=True)
-                try:
-                    # Try with streaming download first
-                    with requests.get(model_url, stream=True, timeout=30) as r:
-                        r.raise_for_status()
-                        total_size = int(r.headers.get('content-length', 0))
-                        block_size = 8192
-                        with open(path, 'wb') as f:
-                            downloaded = 0
-                            for chunk in r.iter_content(chunk_size=block_size):
-                                if chunk:
-                                    f.write(chunk)
-                                    downloaded += len(chunk)
-                                    if total_size > 0:
-                                        percent = (downloaded / total_size) * 100
-                                        if percent % 10 < (block_size / total_size) * 100:
-                                            print(f"Download progress: {int(percent)}%")
-                except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
-                    print(f"Streaming download timed out: {e}. Using a simpler approach...")
-                    # Fall back to simpler download method
-                    r = requests.get(model_url, timeout=60)
-                    r.raise_for_status()
-                    with open(path, 'wb') as f:
-                        f.write(r.content)
-                        print("Download complete with simple method")
-                print(f"Model download complete: {path}")
-                return str(path.absolute())
-            except Exception as e:
-                logger.error(f"Error downloading model: {e}")
-                print(f"Error downloading model: {e}")
-                print("Continuing with dummy model instead...")
-                # Create a small dummy model file so we can continue
-                with open(path, 'wb') as f:
-                    f.write(b"DUMMY MODEL")
-                return str(path.absolute())
-        # If we get here without a model, create a dummy one
-        print(f"Model file not found at {model_path} and no URL provided. Creating dummy model...")
-        os.makedirs(path.parent, exist_ok=True)
-        with open(path, 'wb') as f:
-            f.write(b"DUMMY MODEL")
-        return str(path.absolute())
     def generate(self, prompt: str, **kwargs) -> str:
-        """
-        Generate text completion for the given prompt.
-        Args:
-            prompt: Input text
-        Returns:
-            Generated text completion
-        """
         try:
-            if self.verbose:
-                print(f"Generating with prompt: {prompt[:50]}...")
-            # If we have a real model, use it
-            if self.llm:
-                # Actual generation with llama-cpp
-                response = self.llm(
-                    prompt=prompt,
-                    max_tokens=self.max_tokens,
-                    temperature=self.temperature,
-                    echo=False  # Don't include the prompt in the response
-                )
-                # Extract generated text
-                if not response:
-                    return ""
-                if isinstance(response, dict):
-                    generated_text = response.get('choices', [{}])[0].get('text', '')
-                else:
-                    # List of responses
-                    generated_text = response[0].get('text', '')
-                return generated_text.strip()
             else:
-                # Fallback simple generation
-                if self.verbose:
-                    print("Using fallback text generation")
-                # Extract key information from prompt
-                words = prompt.strip().split()
-                last_words = ' '.join(words[-10:]) if len(words) > 10 else prompt
-                # Simple response generation based on prompt content
-                if "?" in prompt:
-                    return f"Based on the information provided, I believe the answer is related to {last_words}. This is a fallback response as the LLM model could not be loaded."
-                else:
-                    return f"I understand you're asking about {last_words}. Since I'm running in fallback mode without a proper language model, I can only acknowledge your query but not provide a detailed response."
         except Exception as e:
-            logger.error(f"Error generating text: {e}")
-            if self.verbose:
-                print(f"Error generating text: {e}")
-            return f"Error generating response: {str(e)}"
-    def generate_with_tools(
-        self,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
-        **kwargs
-    ) -> Dict[str, Any]:
-        """
-        Generate a response with tool-calling capabilities.
-        This method implements the smolagents Model interface for tool-calling.
-        Args:
-            messages: List of message objects with role and content
-            tools: List of tool definitions
-        Returns:
-            Response with message and optional tool calls
-        """
         try:
-            # Format messages into a prompt
             prompt = self._format_messages_to_prompt(messages, tools)
-            # Generate response
             completion = self.generate(prompt)
-            # For now, just return the text without tool parsing
-            return {
-                "message": {
-                    "role": "assistant",
-                    "content": completion
-                }
-            }
         except Exception as e:
-            logger.error(f"Error generating with tools: {e}")
-            print(f"Error generating with tools: {e}")
-            return {
-                "message": {
-                    "role": "assistant",
-                    "content": f"Error: {str(e)}"
-                }
-            }
-    def _format_messages_to_prompt(
-        self,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None
-    ) -> str:
-        """Format chat messages into a text prompt for the model."""
         formatted_prompt = ""
-        # Include tool descriptions if available
-        if tools and len(tools) > 0:
-            tool_descriptions = "\n".join([
-                f"Tool {i+1}: {tool['name']} - {tool['description']}"
-                for i, tool in enumerate(tools)
-            ])
-            formatted_prompt += f"Available tools:\n{tool_descriptions}\n\n"
-        # Add conversation history
         for msg in messages:
             role = msg.get("role", "")
             content = msg.get("content", "")
-            if role == "system":
-                formatted_prompt += f"System: {content}\n\n"
-            elif role == "user":
-                formatted_prompt += f"User: {content}\n\n"
-            elif role == "assistant":
-                formatted_prompt += f"Assistant: {content}\n\n"
-        # Add final prompt for assistant
         formatted_prompt += "Assistant: "
-        return formatted_prompt

 import os
 import logging
+from typing import Dict, List, Optional, Any
 from pathlib import Path
+from smolagents import Model
 try:
     from llama_cpp import Llama
     LLAMA_CPP_AVAILABLE = True
 except ImportError:
     LLAMA_CPP_AVAILABLE = False
     print("llama_cpp module not available, using fallback implementation")
+logger = logging.getLogger("LlamaCppModel")
+logger.setLevel(logging.DEBUG)
+ch = logging.StreamHandler()
+ch.setLevel(logging.DEBUG)
+formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+ch.setFormatter(formatter)
+logger.addHandler(ch)
 class LlamaCppModel(Model):
+    def __init__(self, model_path: str, n_ctx: int = 2048, n_gpu_layers: int = 0, max_tokens: int = 512, temperature: float = 0.7, verbose: bool = True):
         super().__init__()
         self.model_path = model_path
         self.n_ctx = n_ctx
         self.max_tokens = max_tokens
         self.temperature = temperature
         self.verbose = verbose
         self.llm = None
+        if not LLAMA_CPP_AVAILABLE:
+            logger.error("llama_cpp is not installed. Please install with 'pip install llama-cpp-python'")
+            raise ImportError("llama_cpp is required but not installed.")
+        if not os.path.exists(model_path):
+            logger.error(f"Model file not found at: {model_path}")
+            raise FileNotFoundError(f"Model file not found at: {model_path}")
+        try:
+            logger.info(f"Loading Llama model from: {model_path}")
+            self.llm = Llama(model_path=model_path, n_ctx=n_ctx, n_gpu_layers=n_gpu_layers, verbose=verbose)
+            logger.info("Llama model loaded successfully.")
+        except Exception as e:
+            logger.exception(f"Failed to initialize Llama model: {e}")
+            raise
     def generate(self, prompt: str, **kwargs) -> str:
         try:
+            logger.debug(f"Generating with prompt: {prompt[:100]}...")
+            response = self.llm(prompt=prompt, max_tokens=self.max_tokens, temperature=self.temperature, echo=False)
+            logger.debug(f"Raw response: {response}")
+            if isinstance(response, dict) and 'choices' in response:
+                text = response['choices'][0]['text'].strip()
+            elif isinstance(response, list):
+                text = response[0].get('text', '').strip()
             else:
+                logger.warning("Unexpected response format from Llama.")
+                text = str(response)
+            logger.debug(f"Generated text: {text}")
+            return text
         except Exception as e:
+            logger.exception(f"Error generating text: {e}")
+            return f"Error generating response: {e}"
+    def generate_with_tools(self, messages: List[Dict[str, Any]], tools: Optional[List[Dict[str, Any]]] = None, **kwargs) -> Dict[str, Any]:
         try:
             prompt = self._format_messages_to_prompt(messages, tools)
+            logger.debug(f"Formatted prompt: {prompt}")
             completion = self.generate(prompt)
+            return {"message": {"role": "assistant", "content": completion}}
         except Exception as e:
+            logger.exception(f"Error generating with tools: {e}")
+            return {"message": {"role": "assistant", "content": f"Error: {e}"}}
+    def _format_messages_to_prompt(self, messages: List[Dict[str, Any]], tools: Optional[List[Dict[str, Any]]] = None) -> str:
         formatted_prompt = ""
+        if tools:
+            tool_desc = "\n".join([f"Tool {i+1}: {t['name']} - {t['description']}" for i, t in enumerate(tools)])
+            formatted_prompt += f"Available tools:\n{tool_desc}\n\n"
         for msg in messages:
             role = msg.get("role", "")
             content = msg.get("content", "")
+            if isinstance(content, list):
+                content = " ".join([c.get("text", str(c)) if isinstance(c, dict) else str(c) for c in content])
+            formatted_prompt += f"{role.capitalize()}: {content}\n\n"
         formatted_prompt += "Assistant: "
+        logger.debug(f"Constructed prompt: {formatted_prompt}")
+        return formatted_prompt
+# Example usage (for testing):
+# model = LlamaCppModel(model_path="/path/to/your/llama-model.gguf")
+# print(model.generate("Hello, how are you?"))

utils/local_model.py CHANGED Viewed

@@ -96,8 +96,9 @@ class LocalTransformersModel(Model):
             return generated_text.strip()
         except Exception as e:
-            logger.error(f"Error generating text: {e}")
-            print(f"Error generating text: {e}")
             return f"Error: {str(e)}"
     def generate_with_tools(

             return generated_text.strip()
         except Exception as e:
+            error_msg = f"Error generating text (Local model): {e}"
+            logger.error(error_msg)
+            print(error_msg)
             return f"Error: {str(e)}"
     def generate_with_tools(