helloparthshah commited on
Commit
1476939
·
1 Parent(s): 0e323dc

Giving agent access to the model costs

Browse files
src/manager/agent_manager.py CHANGED
@@ -141,8 +141,8 @@ class AgentManager():
141
 
142
  def validate_budget(self, amount: float) -> None:
143
  if not self.budget_manager.can_spend(amount):
144
- raise ValueError(f"Do not have enough budget to create the tool. "
145
- +f"Creating the tool costs {amount} but only {self.budget_manager.get_current_remaining_budget()} is remaining")
146
 
147
  def create_agent_class(self, agent_name: str, base_model: str, system_prompt: str, description: str = "", create_cost: float = 0, invoke_cost: float = 0,
148
  **additional_params) -> Agent:
 
141
 
142
  def validate_budget(self, amount: float) -> None:
143
  if not self.budget_manager.can_spend(amount):
144
+ raise ValueError(f"Do not have enough budget to create/use the agent. "
145
+ +f"Creating/Using the agent costs {amount} but only {self.budget_manager.get_current_remaining_budget()} is remaining")
146
 
147
  def create_agent_class(self, agent_name: str, base_model: str, system_prompt: str, description: str = "", create_cost: float = 0, invoke_cost: float = 0,
148
  **additional_params) -> Agent:
src/manager/budget_manager.py CHANGED
@@ -1,6 +1,7 @@
1
  from src.manager.utils.singleton import singleton
2
  import torch
3
  import psutil
 
4
  @singleton
5
  class BudgetManager():
6
  TOTAL_BUDGET = 100
 
1
  from src.manager.utils.singleton import singleton
2
  import torch
3
  import psutil
4
+
5
  @singleton
6
  class BudgetManager():
7
  TOTAL_BUDGET = 100
src/manager/manager.py CHANGED
@@ -163,6 +163,7 @@ class GeminiManager:
163
  top_k = min(k, len(memories))
164
  # Semantic Retrieval with GPU
165
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
166
  model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
167
  doc_embeddings = model.encode(memories, convert_to_tensor=True, device=device)
168
  query_embedding = model.encode(query, convert_to_tensor=True, device=device)
@@ -195,13 +196,13 @@ class GeminiManager:
195
  logger.debug(f"Chat history: {chat_history}")
196
  try:
197
  response = suppress_output(self.generate_response)(chat_history)
 
198
  except Exception as e:
199
- logger.debug(f"Error generating response: {e}")
200
  messages.append({
201
  "role": "assistant",
202
  "content": f"Error generating response: {e}"
203
  })
204
- logger.error(f"Error generating response: {e}")
205
  yield messages
206
  return
207
  logger.debug(f"Response: {response}")
 
163
  top_k = min(k, len(memories))
164
  # Semantic Retrieval with GPU
165
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
166
+ print(f"Using device: {device}")
167
  model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
168
  doc_embeddings = model.encode(memories, convert_to_tensor=True, device=device)
169
  query_embedding = model.encode(query, convert_to_tensor=True, device=device)
 
196
  logger.debug(f"Chat history: {chat_history}")
197
  try:
198
  response = suppress_output(self.generate_response)(chat_history)
199
+ print(f"Response: {response}")
200
  except Exception as e:
 
201
  messages.append({
202
  "role": "assistant",
203
  "content": f"Error generating response: {e}"
204
  })
205
+ logger.error(f"Error generating response", e)
206
  yield messages
207
  return
208
  logger.debug(f"Response: {response}")
src/tools/default_tools/agent_cost_manager.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __all__ = ['AgentCostManager']
2
+
3
+
4
+ class AgentCostManager():
5
+ dependencies = []
6
+
7
+ inputSchema = {
8
+ "name": "AgentCostManager",
9
+ "description": "Retrieves the cost of creating and invoking an agent. Please make sure to use this before creating an agent.",
10
+ "parameters": {
11
+ "type": "object",
12
+ "properties": {},
13
+ "required": [],
14
+ }
15
+ }
16
+
17
+ costs = {
18
+ "llama3.2": {
19
+ "description": "3 Billion parameter model",
20
+ "create_cost": 10,
21
+ "invoke_cost": 20,
22
+ },
23
+ "mistral": {
24
+ "description": "7 Billion parameter model",
25
+ "create_cost": 20,
26
+ "invoke_cost": 50,
27
+ },
28
+ "gemini-2.5-flash-preview-04-17": {
29
+ "description": "Adaptive thinking, cost efficiency",
30
+ "create_cost": 20,
31
+ "invoke_cost": 50
32
+ },
33
+ "gemini-2.5-pro-preview-03-25": {
34
+ "description": "Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more",
35
+ "create_cost": 20,
36
+ "invoke_cost": 50
37
+ },
38
+ "gemini-2.0-flash": {
39
+ "description": "Next generation features, speed, thinking, realtime streaming, and multimodal generation",
40
+ "create_cost": 20,
41
+ "invoke_cost": 50
42
+ },
43
+ "gemini-2.0-flash-lite": {
44
+ "description": "Cost efficiency and low latency",
45
+ "create_cost": 20,
46
+ "invoke_cost": 50
47
+ },
48
+ "gemini-1.5-flash": {
49
+ "description": "Fast and versatile performance across a diverse variety of tasks",
50
+ "create_cost": 20,
51
+ "invoke_cost": 50
52
+ },
53
+ "gemini-1.5-flash-8b": {
54
+ "description": "High volume and lower intelligence tasks",
55
+ "create_cost": 20,
56
+ "invoke_cost": 50
57
+ },
58
+ "gemini-1.5-pro": {
59
+ "description": "Complex reasoning tasks requiring more intelligence",
60
+ "create_cost": 20,
61
+ "invoke_cost": 50
62
+ },
63
+ "gemini-2.0-flash-live-001": {
64
+ "description": "Low-latency bidirectional voice and video interactions",
65
+ "create_cost": 20,
66
+ "invoke_cost": 50
67
+ }
68
+ }
69
+
70
+ def get_costs(self):
71
+ return self.costs
72
+
73
+ def run(self, **kwargs):
74
+ return {
75
+ "status": "success",
76
+ "message": "Cost of creating and invoking an agent",
77
+ "output": self.costs,
78
+ }
src/tools/default_tools/agent_creater_tool.py CHANGED
@@ -1,6 +1,7 @@
1
  from src.manager.agent_manager import AgentManager
2
  from src.manager.config.model_selector import choose_best_model
3
  from src.manager.utils.runtime_selector import detect_runtime_environment
 
4
  __all__ = ['AgentCreator']
5
 
6
  class AgentCreator():
@@ -20,7 +21,7 @@ class AgentCreator():
20
  },
21
  "base_model": {
22
  "type": "string",
23
- "description": "A base model from which the new agent mode is to be created. Available models are: llama3.2, mistral, gemini-2.5-flash-preview-04-17, gemini-2.5-pro-preview-03-25, gemini-2.0-flash, gemini-2.0-flash-lite, gemini-1.5-flash, gemini-1.5-flash-8b, gemini-1.5-pro, and gemini-2.0-flash-live-001"
24
  },
25
  "system_prompt": {
26
  "type": "string",
@@ -31,78 +32,7 @@ class AgentCreator():
31
  "description": "Description of the agent. This is a string that describes the agent and its capabilities. It should be a single line description.",
32
  },
33
  },
34
- "required": ["agent_name", "system_prompt", "description"],
35
- #"required": ["agent_name", "base_model", "system_prompt", "description"],
36
- },
37
- "creates": {
38
- "selector": "base_model",
39
- "types": {
40
- "llama3.2":{
41
- "description": "3 Billion parameter model",
42
- "create_cost": 10,
43
- "invoke_cost": 20,
44
- },
45
- "mistral":{
46
- "description": "7 Billion parameter model",
47
- "create_cost": 20,
48
- "invoke_cost": 50,
49
- },
50
- "gemini-2.5-flash-preview-04-17": {
51
- "description": "Adaptive thinking, cost efficiency",
52
- "create_cost": 20,
53
- "invoke_cost": 50
54
- },
55
- "gemini-2.5-pro-preview-03-25": {
56
- "description": "Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more",
57
- "create_cost": 20,
58
- "invoke_cost": 50
59
- },
60
- "gemini-2.0-flash": {
61
- "description": "Next generation features, speed, thinking, realtime streaming, and multimodal generation",
62
- "create_cost": 20,
63
- "invoke_cost": 50
64
- },
65
- "gemini-2.0-flash-lite": {
66
- "description": "Cost efficiency and low latency",
67
- "create_cost": 20,
68
- "invoke_cost": 50
69
- },
70
- "gemini-1.5-flash": {
71
- "description": "Fast and versatile performance across a diverse variety of tasks",
72
- "create_cost": 20,
73
- "invoke_cost": 50
74
- },
75
- "gemini-1.5-flash-8b": {
76
- "description": "High volume and lower intelligence tasks",
77
- "create_cost": 20,
78
- "invoke_cost": 50
79
- },
80
- "gemini-1.5-pro": {
81
- "description": "Complex reasoning tasks requiring more intelligence",
82
- "create_cost": 20,
83
- "invoke_cost": 50
84
- },
85
- # "gemini-embedding-exp": {
86
- # "description": "Measuring the relatedness of text strings",
87
- # "create_cost": 20,
88
- # "invoke_cost": 50
89
- # },
90
- # "imagen-3.0-generate-002": {
91
- # "description": "Our most advanced image generation model",
92
- # "create_cost": 20,
93
- # "invoke_cost": 50
94
- # },
95
- # "veo-2.0-generate-001": {
96
- # "description": "High quality video generation",
97
- # "create_cost": 20,
98
- # "invoke_cost": 50
99
- # },
100
- "gemini-2.0-flash-live-001": {
101
- "description": "Low-latency bidirectional voice and video interactions",
102
- "create_cost": 20,
103
- "invoke_cost": 50
104
- }
105
- }
106
  }
107
  }
108
 
@@ -118,11 +48,12 @@ class AgentCreator():
118
 
119
  system_prompt = kwargs.get("system_prompt")
120
  description = kwargs.get("description")
121
- create_cost = self.inputSchema["creates"]["types"][base_model]["create_cost"]
122
- if base_model not in self.inputSchema["creates"]["types"]:
 
123
  print(f"[WARN] Auto-selected model '{base_model}' not in schema. Falling back to gemini-2.0-flash")
124
  base_model = "gemini-2.0-flash"
125
- invoke_cost = self.inputSchema["creates"]["types"][base_model]["invoke_cost"]
126
 
127
  agent_manager = AgentManager()
128
  try:
 
1
  from src.manager.agent_manager import AgentManager
2
  from src.manager.config.model_selector import choose_best_model
3
  from src.manager.utils.runtime_selector import detect_runtime_environment
4
+ from src.tools.default_tools.agent_cost_manager import AgentCostManager
5
  __all__ = ['AgentCreator']
6
 
7
  class AgentCreator():
 
21
  },
22
  "base_model": {
23
  "type": "string",
24
+ "description": "A base model from which the new agent mode is to be created. Check the available models using the AgentCostManager tool.",
25
  },
26
  "system_prompt": {
27
  "type": "string",
 
32
  "description": "Description of the agent. This is a string that describes the agent and its capabilities. It should be a single line description.",
33
  },
34
  },
35
+ "required": ["agent_name", "base_model", "system_prompt", "description"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
  }
38
 
 
48
 
49
  system_prompt = kwargs.get("system_prompt")
50
  description = kwargs.get("description")
51
+ model_costs = AgentCostManager().get_costs()
52
+ create_cost = model_costs[base_model]["create_cost"]
53
+ if base_model not in model_costs:
54
  print(f"[WARN] Auto-selected model '{base_model}' not in schema. Falling back to gemini-2.0-flash")
55
  base_model = "gemini-2.0-flash"
56
+ invoke_cost = model_costs[base_model]["invoke_cost"]
57
 
58
  agent_manager = AgentManager()
59
  try: