davidgturner commited on
Commit
66d6d1f
·
1 Parent(s): fc78ae4

- agent local model cpp

Browse files
Files changed (2) hide show
  1. utils/llama_cpp_model.py +62 -263
  2. utils/local_model.py +3 -2
utils/llama_cpp_model.py CHANGED
@@ -1,296 +1,95 @@
1
- """
2
- Fallback model implementation for testing when llama-cpp-python is not available.
3
-
4
- This provides a compatible model class that doesn't require any external dependencies,
5
- allowing the rest of the application to function while we solve the llama-cpp-python
6
- installation issues.
7
- """
8
-
9
  import os
10
  import logging
11
- from typing import Dict, List, Optional, Any, Union
12
- import requests
13
- from smolagents import Model
14
  from pathlib import Path
 
15
 
16
- # Try to import llama_cpp, but don't fail if not available
17
  try:
18
  from llama_cpp import Llama
19
- from pathlib import Path
20
  LLAMA_CPP_AVAILABLE = True
21
  except ImportError:
22
  LLAMA_CPP_AVAILABLE = False
23
  print("llama_cpp module not available, using fallback implementation")
24
 
25
- logger = logging.getLogger(__name__)
 
 
 
 
 
 
26
 
27
  class LlamaCppModel(Model):
28
- """Model using llama.cpp Python bindings for efficient local inference without PyTorch.
29
- Falls back to a simple text generation if llama_cpp is not available."""
30
- def __init__(
31
- self,
32
- model_path: str = None,
33
- model_url: str = None,
34
- n_ctx: int = 2048,
35
- n_gpu_layers: int = 0,
36
- max_tokens: int = 512,
37
- temperature: float = 0.7,
38
- verbose: bool = True
39
- ):
40
- """
41
- Initialize a local llama.cpp model or fallback to a simple implementation.
42
-
43
- Args:
44
- model_path: Path to local GGUF model file
45
- model_url: URL to download model if model_path doesn't exist
46
- n_ctx: Context window size
47
- n_gpu_layers: Number of layers to offload to GPU (0 means CPU only)
48
- max_tokens: Maximum new tokens to generate
49
- temperature: Sampling temperature
50
- verbose: Whether to print verbose messages
51
- """
52
  super().__init__()
53
-
54
  self.model_path = model_path
55
- self.model_url = model_url
56
  self.n_ctx = n_ctx
57
  self.max_tokens = max_tokens
58
  self.temperature = temperature
59
  self.verbose = verbose
60
  self.llm = None
61
-
62
- # Check if we can use llama_cpp
63
- if LLAMA_CPP_AVAILABLE:
64
- try:
65
- if self.verbose:
66
- print("Attempting to initialize LlamaCpp model...")
67
-
68
- # Try to initialize the real model
69
- if model_path and os.path.exists(model_path):
70
- if self.verbose:
71
- print(f"Loading model from {model_path}...")
72
-
73
- # Initialize the llama-cpp model
74
- self.llm = Llama(
75
- model_path=model_path,
76
- n_ctx=n_ctx,
77
- n_gpu_layers=n_gpu_layers,
78
- verbose=verbose
79
- )
80
-
81
- if self.verbose:
82
- print("LlamaCpp model loaded successfully")
83
- else:
84
- if self.verbose:
85
- print(f"Model path not found or not specified. Using fallback mode.")
86
- except Exception as e:
87
- logger.error(f"Error initializing LlamaCpp model: {e}")
88
- if self.verbose:
89
- print(f"Error initializing LlamaCpp model: {e}")
90
- self.llm = None
91
- else:
92
- if self.verbose:
93
- print("LlamaCpp not available, using fallback implementation")
94
-
95
- if not self.llm and self.verbose:
96
- print("Using fallback text generation mode")
97
-
98
- def _resolve_model_path(self, model_path: str = None, model_url: str = None) -> str:
99
- """
100
- Resolve model path, downloading if necessary.
101
-
102
- Returns:
103
- Absolute path to model file
104
- """
105
- # Default to a small model if none specified
106
- if not model_path:
107
- models_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
108
- os.makedirs(models_dir, exist_ok=True)
109
- model_path = os.path.join(models_dir, "ggml-model-q4_0.bin")
110
-
111
- # Convert to Path for easier handling
112
- path = Path(model_path)
113
-
114
- # If model exists, return it
115
- if path.exists():
116
- return str(path.absolute())
117
-
118
- # Download if URL provided
119
- if model_url and not path.exists():
120
- try:
121
- print(f"Downloading model from {model_url}...")
122
- os.makedirs(path.parent, exist_ok=True)
123
-
124
- try:
125
- # Try with streaming download first
126
- with requests.get(model_url, stream=True, timeout=30) as r:
127
- r.raise_for_status()
128
- total_size = int(r.headers.get('content-length', 0))
129
- block_size = 8192
130
-
131
- with open(path, 'wb') as f:
132
- downloaded = 0
133
- for chunk in r.iter_content(chunk_size=block_size):
134
- if chunk:
135
- f.write(chunk)
136
- downloaded += len(chunk)
137
- if total_size > 0:
138
- percent = (downloaded / total_size) * 100
139
- if percent % 10 < (block_size / total_size) * 100:
140
- print(f"Download progress: {int(percent)}%")
141
- except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
142
- print(f"Streaming download timed out: {e}. Using a simpler approach...")
143
- # Fall back to simpler download method
144
- r = requests.get(model_url, timeout=60)
145
- r.raise_for_status()
146
- with open(path, 'wb') as f:
147
- f.write(r.content)
148
- print("Download complete with simple method")
149
-
150
- print(f"Model download complete: {path}")
151
- return str(path.absolute())
152
- except Exception as e:
153
- logger.error(f"Error downloading model: {e}")
154
- print(f"Error downloading model: {e}")
155
- print("Continuing with dummy model instead...")
156
- # Create a small dummy model file so we can continue
157
- with open(path, 'wb') as f:
158
- f.write(b"DUMMY MODEL")
159
- return str(path.absolute())
160
-
161
- # If we get here without a model, create a dummy one
162
- print(f"Model file not found at {model_path} and no URL provided. Creating dummy model...")
163
- os.makedirs(path.parent, exist_ok=True)
164
- with open(path, 'wb') as f:
165
- f.write(b"DUMMY MODEL")
166
- return str(path.absolute())
167
-
168
  def generate(self, prompt: str, **kwargs) -> str:
169
- """
170
- Generate text completion for the given prompt.
171
-
172
- Args:
173
- prompt: Input text
174
-
175
- Returns:
176
- Generated text completion
177
- """
178
  try:
179
- if self.verbose:
180
- print(f"Generating with prompt: {prompt[:50]}...")
181
-
182
- # If we have a real model, use it
183
- if self.llm:
184
- # Actual generation with llama-cpp
185
- response = self.llm(
186
- prompt=prompt,
187
- max_tokens=self.max_tokens,
188
- temperature=self.temperature,
189
- echo=False # Don't include the prompt in the response
190
- )
191
-
192
- # Extract generated text
193
- if not response:
194
- return ""
195
-
196
- if isinstance(response, dict):
197
- generated_text = response.get('choices', [{}])[0].get('text', '')
198
- else:
199
- # List of responses
200
- generated_text = response[0].get('text', '')
201
-
202
- return generated_text.strip()
203
  else:
204
- # Fallback simple generation
205
- if self.verbose:
206
- print("Using fallback text generation")
207
-
208
- # Extract key information from prompt
209
- words = prompt.strip().split()
210
- last_words = ' '.join(words[-10:]) if len(words) > 10 else prompt
211
-
212
- # Simple response generation based on prompt content
213
- if "?" in prompt:
214
- return f"Based on the information provided, I believe the answer is related to {last_words}. This is a fallback response as the LLM model could not be loaded."
215
- else:
216
- return f"I understand you're asking about {last_words}. Since I'm running in fallback mode without a proper language model, I can only acknowledge your query but not provide a detailed response."
217
-
218
  except Exception as e:
219
- logger.error(f"Error generating text: {e}")
220
- if self.verbose:
221
- print(f"Error generating text: {e}")
222
- return f"Error generating response: {str(e)}"
223
-
224
- def generate_with_tools(
225
- self,
226
- messages: List[Dict[str, Any]],
227
- tools: Optional[List[Dict[str, Any]]] = None,
228
- **kwargs
229
- ) -> Dict[str, Any]:
230
- """
231
- Generate a response with tool-calling capabilities.
232
- This method implements the smolagents Model interface for tool-calling.
233
-
234
- Args:
235
- messages: List of message objects with role and content
236
- tools: List of tool definitions
237
-
238
- Returns:
239
- Response with message and optional tool calls
240
- """
241
  try:
242
- # Format messages into a prompt
243
  prompt = self._format_messages_to_prompt(messages, tools)
244
-
245
- # Generate response
246
  completion = self.generate(prompt)
247
-
248
- # For now, just return the text without tool parsing
249
- return {
250
- "message": {
251
- "role": "assistant",
252
- "content": completion
253
- }
254
- }
255
  except Exception as e:
256
- logger.error(f"Error generating with tools: {e}")
257
- print(f"Error generating with tools: {e}")
258
- return {
259
- "message": {
260
- "role": "assistant",
261
- "content": f"Error: {str(e)}"
262
- }
263
- }
264
-
265
- def _format_messages_to_prompt(
266
- self,
267
- messages: List[Dict[str, Any]],
268
- tools: Optional[List[Dict[str, Any]]] = None
269
- ) -> str:
270
- """Format chat messages into a text prompt for the model."""
271
  formatted_prompt = ""
272
-
273
- # Include tool descriptions if available
274
- if tools and len(tools) > 0:
275
- tool_descriptions = "\n".join([
276
- f"Tool {i+1}: {tool['name']} - {tool['description']}"
277
- for i, tool in enumerate(tools)
278
- ])
279
- formatted_prompt += f"Available tools:\n{tool_descriptions}\n\n"
280
-
281
- # Add conversation history
282
  for msg in messages:
283
  role = msg.get("role", "")
284
  content = msg.get("content", "")
285
-
286
- if role == "system":
287
- formatted_prompt += f"System: {content}\n\n"
288
- elif role == "user":
289
- formatted_prompt += f"User: {content}\n\n"
290
- elif role == "assistant":
291
- formatted_prompt += f"Assistant: {content}\n\n"
292
-
293
- # Add final prompt for assistant
294
  formatted_prompt += "Assistant: "
295
-
296
- return formatted_prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import logging
3
+ from typing import Dict, List, Optional, Any
 
 
4
  from pathlib import Path
5
+ from smolagents import Model
6
 
 
7
  try:
8
  from llama_cpp import Llama
 
9
  LLAMA_CPP_AVAILABLE = True
10
  except ImportError:
11
  LLAMA_CPP_AVAILABLE = False
12
  print("llama_cpp module not available, using fallback implementation")
13
 
14
+ logger = logging.getLogger("LlamaCppModel")
15
+ logger.setLevel(logging.DEBUG)
16
+ ch = logging.StreamHandler()
17
+ ch.setLevel(logging.DEBUG)
18
+ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
19
+ ch.setFormatter(formatter)
20
+ logger.addHandler(ch)
21
 
22
  class LlamaCppModel(Model):
23
+ def __init__(self, model_path: str, n_ctx: int = 2048, n_gpu_layers: int = 0, max_tokens: int = 512, temperature: float = 0.7, verbose: bool = True):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  super().__init__()
 
25
  self.model_path = model_path
 
26
  self.n_ctx = n_ctx
27
  self.max_tokens = max_tokens
28
  self.temperature = temperature
29
  self.verbose = verbose
30
  self.llm = None
31
+
32
+ if not LLAMA_CPP_AVAILABLE:
33
+ logger.error("llama_cpp is not installed. Please install with 'pip install llama-cpp-python'")
34
+ raise ImportError("llama_cpp is required but not installed.")
35
+
36
+ if not os.path.exists(model_path):
37
+ logger.error(f"Model file not found at: {model_path}")
38
+ raise FileNotFoundError(f"Model file not found at: {model_path}")
39
+
40
+ try:
41
+ logger.info(f"Loading Llama model from: {model_path}")
42
+ self.llm = Llama(model_path=model_path, n_ctx=n_ctx, n_gpu_layers=n_gpu_layers, verbose=verbose)
43
+ logger.info("Llama model loaded successfully.")
44
+ except Exception as e:
45
+ logger.exception(f"Failed to initialize Llama model: {e}")
46
+ raise
47
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def generate(self, prompt: str, **kwargs) -> str:
 
 
 
 
 
 
 
 
 
49
  try:
50
+ logger.debug(f"Generating with prompt: {prompt[:100]}...")
51
+ response = self.llm(prompt=prompt, max_tokens=self.max_tokens, temperature=self.temperature, echo=False)
52
+ logger.debug(f"Raw response: {response}")
53
+
54
+ if isinstance(response, dict) and 'choices' in response:
55
+ text = response['choices'][0]['text'].strip()
56
+ elif isinstance(response, list):
57
+ text = response[0].get('text', '').strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  else:
59
+ logger.warning("Unexpected response format from Llama.")
60
+ text = str(response)
61
+
62
+ logger.debug(f"Generated text: {text}")
63
+ return text
 
 
 
 
 
 
 
 
 
64
  except Exception as e:
65
+ logger.exception(f"Error generating text: {e}")
66
+ return f"Error generating response: {e}"
67
+
68
+ def generate_with_tools(self, messages: List[Dict[str, Any]], tools: Optional[List[Dict[str, Any]]] = None, **kwargs) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  try:
 
70
  prompt = self._format_messages_to_prompt(messages, tools)
71
+ logger.debug(f"Formatted prompt: {prompt}")
 
72
  completion = self.generate(prompt)
73
+ return {"message": {"role": "assistant", "content": completion}}
 
 
 
 
 
 
 
74
  except Exception as e:
75
+ logger.exception(f"Error generating with tools: {e}")
76
+ return {"message": {"role": "assistant", "content": f"Error: {e}"}}
77
+
78
+ def _format_messages_to_prompt(self, messages: List[Dict[str, Any]], tools: Optional[List[Dict[str, Any]]] = None) -> str:
 
 
 
 
 
 
 
 
 
 
 
79
  formatted_prompt = ""
80
+ if tools:
81
+ tool_desc = "\n".join([f"Tool {i+1}: {t['name']} - {t['description']}" for i, t in enumerate(tools)])
82
+ formatted_prompt += f"Available tools:\n{tool_desc}\n\n"
 
 
 
 
 
 
 
83
  for msg in messages:
84
  role = msg.get("role", "")
85
  content = msg.get("content", "")
86
+ if isinstance(content, list):
87
+ content = " ".join([c.get("text", str(c)) if isinstance(c, dict) else str(c) for c in content])
88
+ formatted_prompt += f"{role.capitalize()}: {content}\n\n"
 
 
 
 
 
 
89
  formatted_prompt += "Assistant: "
90
+ logger.debug(f"Constructed prompt: {formatted_prompt}")
91
+ return formatted_prompt
92
+
93
+ # Example usage (for testing):
94
+ # model = LlamaCppModel(model_path="/path/to/your/llama-model.gguf")
95
+ # print(model.generate("Hello, how are you?"))
utils/local_model.py CHANGED
@@ -96,8 +96,9 @@ class LocalTransformersModel(Model):
96
  return generated_text.strip()
97
 
98
  except Exception as e:
99
- logger.error(f"Error generating text: {e}")
100
- print(f"Error generating text: {e}")
 
101
  return f"Error: {str(e)}"
102
 
103
  def generate_with_tools(
 
96
  return generated_text.strip()
97
 
98
  except Exception as e:
99
+ error_msg = f"Error generating text (Local model): {e}"
100
+ logger.error(error_msg)
101
+ print(error_msg)
102
  return f"Error: {str(e)}"
103
 
104
  def generate_with_tools(