Spaces:
Running
Running
Commit
·
1476939
1
Parent(s):
0e323dc
Giving agent access to the model costs
Browse files
src/manager/agent_manager.py
CHANGED
@@ -141,8 +141,8 @@ class AgentManager():
|
|
141 |
|
142 |
def validate_budget(self, amount: float) -> None:
|
143 |
if not self.budget_manager.can_spend(amount):
|
144 |
-
raise ValueError(f"Do not have enough budget to create the
|
145 |
-
+f"Creating the
|
146 |
|
147 |
def create_agent_class(self, agent_name: str, base_model: str, system_prompt: str, description: str = "", create_cost: float = 0, invoke_cost: float = 0,
|
148 |
**additional_params) -> Agent:
|
|
|
141 |
|
142 |
def validate_budget(self, amount: float) -> None:
|
143 |
if not self.budget_manager.can_spend(amount):
|
144 |
+
raise ValueError(f"Do not have enough budget to create/use the agent. "
|
145 |
+
+f"Creating/Using the agent costs {amount} but only {self.budget_manager.get_current_remaining_budget()} is remaining")
|
146 |
|
147 |
def create_agent_class(self, agent_name: str, base_model: str, system_prompt: str, description: str = "", create_cost: float = 0, invoke_cost: float = 0,
|
148 |
**additional_params) -> Agent:
|
src/manager/budget_manager.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from src.manager.utils.singleton import singleton
|
2 |
import torch
|
3 |
import psutil
|
|
|
4 |
@singleton
|
5 |
class BudgetManager():
|
6 |
TOTAL_BUDGET = 100
|
|
|
1 |
from src.manager.utils.singleton import singleton
|
2 |
import torch
|
3 |
import psutil
|
4 |
+
|
5 |
@singleton
|
6 |
class BudgetManager():
|
7 |
TOTAL_BUDGET = 100
|
src/manager/manager.py
CHANGED
@@ -163,6 +163,7 @@ class GeminiManager:
|
|
163 |
top_k = min(k, len(memories))
|
164 |
# Semantic Retrieval with GPU
|
165 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
166 |
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
|
167 |
doc_embeddings = model.encode(memories, convert_to_tensor=True, device=device)
|
168 |
query_embedding = model.encode(query, convert_to_tensor=True, device=device)
|
@@ -195,13 +196,13 @@ class GeminiManager:
|
|
195 |
logger.debug(f"Chat history: {chat_history}")
|
196 |
try:
|
197 |
response = suppress_output(self.generate_response)(chat_history)
|
|
|
198 |
except Exception as e:
|
199 |
-
logger.debug(f"Error generating response: {e}")
|
200 |
messages.append({
|
201 |
"role": "assistant",
|
202 |
"content": f"Error generating response: {e}"
|
203 |
})
|
204 |
-
logger.error(f"Error generating response
|
205 |
yield messages
|
206 |
return
|
207 |
logger.debug(f"Response: {response}")
|
|
|
163 |
top_k = min(k, len(memories))
|
164 |
# Semantic Retrieval with GPU
|
165 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
166 |
+
print(f"Using device: {device}")
|
167 |
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
|
168 |
doc_embeddings = model.encode(memories, convert_to_tensor=True, device=device)
|
169 |
query_embedding = model.encode(query, convert_to_tensor=True, device=device)
|
|
|
196 |
logger.debug(f"Chat history: {chat_history}")
|
197 |
try:
|
198 |
response = suppress_output(self.generate_response)(chat_history)
|
199 |
+
print(f"Response: {response}")
|
200 |
except Exception as e:
|
|
|
201 |
messages.append({
|
202 |
"role": "assistant",
|
203 |
"content": f"Error generating response: {e}"
|
204 |
})
|
205 |
+
logger.error(f"Error generating response", e)
|
206 |
yield messages
|
207 |
return
|
208 |
logger.debug(f"Response: {response}")
|
src/tools/default_tools/agent_cost_manager.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__all__ = ['AgentCostManager']
|
2 |
+
|
3 |
+
|
4 |
+
class AgentCostManager():
|
5 |
+
dependencies = []
|
6 |
+
|
7 |
+
inputSchema = {
|
8 |
+
"name": "AgentCostManager",
|
9 |
+
"description": "Retrieves the cost of creating and invoking an agent. Please make sure to use this before creating an agent.",
|
10 |
+
"parameters": {
|
11 |
+
"type": "object",
|
12 |
+
"properties": {},
|
13 |
+
"required": [],
|
14 |
+
}
|
15 |
+
}
|
16 |
+
|
17 |
+
costs = {
|
18 |
+
"llama3.2": {
|
19 |
+
"description": "3 Billion parameter model",
|
20 |
+
"create_cost": 10,
|
21 |
+
"invoke_cost": 20,
|
22 |
+
},
|
23 |
+
"mistral": {
|
24 |
+
"description": "7 Billion parameter model",
|
25 |
+
"create_cost": 20,
|
26 |
+
"invoke_cost": 50,
|
27 |
+
},
|
28 |
+
"gemini-2.5-flash-preview-04-17": {
|
29 |
+
"description": "Adaptive thinking, cost efficiency",
|
30 |
+
"create_cost": 20,
|
31 |
+
"invoke_cost": 50
|
32 |
+
},
|
33 |
+
"gemini-2.5-pro-preview-03-25": {
|
34 |
+
"description": "Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more",
|
35 |
+
"create_cost": 20,
|
36 |
+
"invoke_cost": 50
|
37 |
+
},
|
38 |
+
"gemini-2.0-flash": {
|
39 |
+
"description": "Next generation features, speed, thinking, realtime streaming, and multimodal generation",
|
40 |
+
"create_cost": 20,
|
41 |
+
"invoke_cost": 50
|
42 |
+
},
|
43 |
+
"gemini-2.0-flash-lite": {
|
44 |
+
"description": "Cost efficiency and low latency",
|
45 |
+
"create_cost": 20,
|
46 |
+
"invoke_cost": 50
|
47 |
+
},
|
48 |
+
"gemini-1.5-flash": {
|
49 |
+
"description": "Fast and versatile performance across a diverse variety of tasks",
|
50 |
+
"create_cost": 20,
|
51 |
+
"invoke_cost": 50
|
52 |
+
},
|
53 |
+
"gemini-1.5-flash-8b": {
|
54 |
+
"description": "High volume and lower intelligence tasks",
|
55 |
+
"create_cost": 20,
|
56 |
+
"invoke_cost": 50
|
57 |
+
},
|
58 |
+
"gemini-1.5-pro": {
|
59 |
+
"description": "Complex reasoning tasks requiring more intelligence",
|
60 |
+
"create_cost": 20,
|
61 |
+
"invoke_cost": 50
|
62 |
+
},
|
63 |
+
"gemini-2.0-flash-live-001": {
|
64 |
+
"description": "Low-latency bidirectional voice and video interactions",
|
65 |
+
"create_cost": 20,
|
66 |
+
"invoke_cost": 50
|
67 |
+
}
|
68 |
+
}
|
69 |
+
|
70 |
+
def get_costs(self):
|
71 |
+
return self.costs
|
72 |
+
|
73 |
+
def run(self, **kwargs):
|
74 |
+
return {
|
75 |
+
"status": "success",
|
76 |
+
"message": "Cost of creating and invoking an agent",
|
77 |
+
"output": self.costs,
|
78 |
+
}
|
src/tools/default_tools/agent_creater_tool.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from src.manager.agent_manager import AgentManager
|
2 |
from src.manager.config.model_selector import choose_best_model
|
3 |
from src.manager.utils.runtime_selector import detect_runtime_environment
|
|
|
4 |
__all__ = ['AgentCreator']
|
5 |
|
6 |
class AgentCreator():
|
@@ -20,7 +21,7 @@ class AgentCreator():
|
|
20 |
},
|
21 |
"base_model": {
|
22 |
"type": "string",
|
23 |
-
"description": "A base model from which the new agent mode is to be created.
|
24 |
},
|
25 |
"system_prompt": {
|
26 |
"type": "string",
|
@@ -31,78 +32,7 @@ class AgentCreator():
|
|
31 |
"description": "Description of the agent. This is a string that describes the agent and its capabilities. It should be a single line description.",
|
32 |
},
|
33 |
},
|
34 |
-
"required": ["agent_name", "system_prompt", "description"],
|
35 |
-
#"required": ["agent_name", "base_model", "system_prompt", "description"],
|
36 |
-
},
|
37 |
-
"creates": {
|
38 |
-
"selector": "base_model",
|
39 |
-
"types": {
|
40 |
-
"llama3.2":{
|
41 |
-
"description": "3 Billion parameter model",
|
42 |
-
"create_cost": 10,
|
43 |
-
"invoke_cost": 20,
|
44 |
-
},
|
45 |
-
"mistral":{
|
46 |
-
"description": "7 Billion parameter model",
|
47 |
-
"create_cost": 20,
|
48 |
-
"invoke_cost": 50,
|
49 |
-
},
|
50 |
-
"gemini-2.5-flash-preview-04-17": {
|
51 |
-
"description": "Adaptive thinking, cost efficiency",
|
52 |
-
"create_cost": 20,
|
53 |
-
"invoke_cost": 50
|
54 |
-
},
|
55 |
-
"gemini-2.5-pro-preview-03-25": {
|
56 |
-
"description": "Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more",
|
57 |
-
"create_cost": 20,
|
58 |
-
"invoke_cost": 50
|
59 |
-
},
|
60 |
-
"gemini-2.0-flash": {
|
61 |
-
"description": "Next generation features, speed, thinking, realtime streaming, and multimodal generation",
|
62 |
-
"create_cost": 20,
|
63 |
-
"invoke_cost": 50
|
64 |
-
},
|
65 |
-
"gemini-2.0-flash-lite": {
|
66 |
-
"description": "Cost efficiency and low latency",
|
67 |
-
"create_cost": 20,
|
68 |
-
"invoke_cost": 50
|
69 |
-
},
|
70 |
-
"gemini-1.5-flash": {
|
71 |
-
"description": "Fast and versatile performance across a diverse variety of tasks",
|
72 |
-
"create_cost": 20,
|
73 |
-
"invoke_cost": 50
|
74 |
-
},
|
75 |
-
"gemini-1.5-flash-8b": {
|
76 |
-
"description": "High volume and lower intelligence tasks",
|
77 |
-
"create_cost": 20,
|
78 |
-
"invoke_cost": 50
|
79 |
-
},
|
80 |
-
"gemini-1.5-pro": {
|
81 |
-
"description": "Complex reasoning tasks requiring more intelligence",
|
82 |
-
"create_cost": 20,
|
83 |
-
"invoke_cost": 50
|
84 |
-
},
|
85 |
-
# "gemini-embedding-exp": {
|
86 |
-
# "description": "Measuring the relatedness of text strings",
|
87 |
-
# "create_cost": 20,
|
88 |
-
# "invoke_cost": 50
|
89 |
-
# },
|
90 |
-
# "imagen-3.0-generate-002": {
|
91 |
-
# "description": "Our most advanced image generation model",
|
92 |
-
# "create_cost": 20,
|
93 |
-
# "invoke_cost": 50
|
94 |
-
# },
|
95 |
-
# "veo-2.0-generate-001": {
|
96 |
-
# "description": "High quality video generation",
|
97 |
-
# "create_cost": 20,
|
98 |
-
# "invoke_cost": 50
|
99 |
-
# },
|
100 |
-
"gemini-2.0-flash-live-001": {
|
101 |
-
"description": "Low-latency bidirectional voice and video interactions",
|
102 |
-
"create_cost": 20,
|
103 |
-
"invoke_cost": 50
|
104 |
-
}
|
105 |
-
}
|
106 |
}
|
107 |
}
|
108 |
|
@@ -118,11 +48,12 @@ class AgentCreator():
|
|
118 |
|
119 |
system_prompt = kwargs.get("system_prompt")
|
120 |
description = kwargs.get("description")
|
121 |
-
|
122 |
-
|
|
|
123 |
print(f"[WARN] Auto-selected model '{base_model}' not in schema. Falling back to gemini-2.0-flash")
|
124 |
base_model = "gemini-2.0-flash"
|
125 |
-
invoke_cost =
|
126 |
|
127 |
agent_manager = AgentManager()
|
128 |
try:
|
|
|
1 |
from src.manager.agent_manager import AgentManager
|
2 |
from src.manager.config.model_selector import choose_best_model
|
3 |
from src.manager.utils.runtime_selector import detect_runtime_environment
|
4 |
+
from src.tools.default_tools.agent_cost_manager import AgentCostManager
|
5 |
__all__ = ['AgentCreator']
|
6 |
|
7 |
class AgentCreator():
|
|
|
21 |
},
|
22 |
"base_model": {
|
23 |
"type": "string",
|
24 |
+
"description": "A base model from which the new agent mode is to be created. Check the available models using the AgentCostManager tool.",
|
25 |
},
|
26 |
"system_prompt": {
|
27 |
"type": "string",
|
|
|
32 |
"description": "Description of the agent. This is a string that describes the agent and its capabilities. It should be a single line description.",
|
33 |
},
|
34 |
},
|
35 |
+
"required": ["agent_name", "base_model", "system_prompt", "description"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
}
|
37 |
}
|
38 |
|
|
|
48 |
|
49 |
system_prompt = kwargs.get("system_prompt")
|
50 |
description = kwargs.get("description")
|
51 |
+
model_costs = AgentCostManager().get_costs()
|
52 |
+
create_cost = model_costs[base_model]["create_cost"]
|
53 |
+
if base_model not in model_costs:
|
54 |
print(f"[WARN] Auto-selected model '{base_model}' not in schema. Falling back to gemini-2.0-flash")
|
55 |
base_model = "gemini-2.0-flash"
|
56 |
+
invoke_cost = model_costs[base_model]["invoke_cost"]
|
57 |
|
58 |
agent_manager = AgentManager()
|
59 |
try:
|