saisha09 commited on
Commit
703f2c8
·
1 Parent(s): b3d741d

cost_benefit modified

Browse files
src/cost_benefit.py CHANGED
@@ -3,48 +3,58 @@ import subprocess
3
  import time
4
  import requests
5
 
6
-
7
- def get_best_model(weights: dict, runtime_env: str) -> dict:
8
- #placeholders
9
- models = {
10
- "llama3.2": {"size": 2.5, "token_cost": 0.0001, "speed": 30},
11
- "mistral": {"size": 4.2, "token_cost": 0.0002, "speed": 50},
12
- "gemini-2.0-flash": {"size": 6.1, "token_cost": 0.0005, "speed": 60},
13
- "gemini-2.5-pro-preview-03-25": {"size": 8.2, "token_cost": 0.002, "speed": 45}
14
- }
15
-
16
- penalty = {
17
- "gpu": 1.0,
18
- "cpu-local": 2.0,
19
- "cloud-only": 1.5
 
 
20
  }
21
 
22
- best_model = None
23
- best_score = float("-inf") # Track max score
 
 
 
 
 
24
 
25
- for model, metrics in models.items():
26
- p = penalty.get(runtime_env, 2.0)
27
 
28
- cost_score = (
29
- weights["w_size"] * metrics["size"] * p +
30
- weights["w_token_cost"] * metrics["token_cost"] * p +
31
- weights["w_speed"] * (100 - metrics["speed"])
32
- )
33
- benefit_score = weights["w_speed"] * metrics["speed"]
34
-
35
- decision_score = benefit_score / cost_score if cost_score != 0 else 0
36
-
37
- if decision_score > best_score:
38
- best_score = decision_score
39
  best_model = model
 
40
 
41
  if not best_model:
42
- return "No suitable model found"
 
 
 
 
 
43
 
44
  return {
45
  "model": best_model,
46
- "score": best_score,
47
- "token_cost": models[best_model]["token_cost"],
48
- "tokens_sec": models[best_model]["speed"],
49
- "output": f"Sample output from {best_model}"
50
  }
 
3
  import time
4
  import requests
5
 
6
+ def detect_available_budget(runtime_env: str) -> int:
7
+ import torch
8
+ if "local" in runtime_env and torch.cuda.is_available():
9
+ total_vram_mb = torch.cuda.get_device_properties(0).total_memory // (1024 ** 2)
10
+ return min(total_vram_mb, 100)
11
+ else:
12
+ return 100
13
+
14
+
15
+ def get_best_model(runtime_env: str, use_local_only=False, use_api_only=False) -> dict:
16
+ # Model info (cost, tokens/sec, type)
17
+ static_costs = {
18
+ "llama3.2": {"size": 20, "token_cost": 0.0001, "tokens_sec": 30, "type": "local"},
19
+ "mistral": {"size": 40, "token_cost": 0.0002, "tokens_sec": 50, "type": "local"},
20
+ "gemini-2.0-flash": {"size": 60, "token_cost": 0.0005, "tokens_sec": 60, "type": "api"},
21
+ "gemini-2.5-pro-preview-03-25": {"size": 80, "token_cost": 0.002, "tokens_sec": 45, "type": "api"}
22
  }
23
 
24
+ def detect_available_budget(runtime_env: str) -> int:
25
+ import torch
26
+ if "local" in runtime_env and torch.cuda.is_available():
27
+ total_vram_mb = torch.cuda.get_device_properties(0).total_memory // (1024 ** 2)
28
+ return min(total_vram_mb, 100)
29
+ else:
30
+ return 100
31
 
32
+ budget = detect_available_budget(runtime_env)
 
33
 
34
+ best_model = None
35
+ best_speed = -1
36
+
37
+ for model, info in static_costs.items():
38
+ if info["size"] > budget:
39
+ continue
40
+ if use_local_only and info["type"] != "local":
41
+ continue
42
+ if use_api_only and info["type"] != "api":
43
+ continue
44
+ if info["tokens_sec"] > best_speed:
45
  best_model = model
46
+ best_speed = info["tokens_sec"]
47
 
48
  if not best_model:
49
+ return {
50
+ "model": "llama3.2",
51
+ "token_cost": static_costs["llama3.2"]["token_cost"],
52
+ "tokens_sec": static_costs["llama3.2"]["tokens_sec"],
53
+ "note": "Defaulted due to no models fitting filters"
54
+ }
55
 
56
  return {
57
  "model": best_model,
58
+ "token_cost": static_costs[best_model]["token_cost"],
59
+ "tokens_sec": static_costs[best_model]["tokens_sec"]
 
 
60
  }
src/manager/config/model_selector.py CHANGED
@@ -7,24 +7,13 @@ load_dotenv()
7
  def choose_best_model(return_full=False):
8
  env = detect_runtime_environment()
9
  print(f"[INFO] Runtime Environment: {env}")
10
-
11
- weights = {
12
- "w_size": 0.1,
13
- "w_token_cost": 100,
14
- "w_speed": 0.5
15
- }
16
 
17
- result = get_best_model(weights, env)
18
 
19
- if isinstance(result, str) or not result.get("model"):
20
- if env == "cpu-local":
21
- if os.getenv("GEMINI_KEY"):
22
- print("[INFO] Falling back to Gemini for cpu-local.")
23
- return {"model": "gemini-2.0-flash"} if return_full else "gemini-2.0-flash"
24
- else:
25
- print("[WARN] GOOGLE_API_KEY missing. Falling back to llama3.2.")
26
- return {"model": "llama3.2"} if return_full else "llama3.2"
27
- return {"model": "llama3.2"} if return_full else "llama3.2"
28
 
29
- print(f"[INFO] Auto-selected model: {result['model']}")
30
  return result if return_full else result["model"]
 
7
  def choose_best_model(return_full=False):
8
  env = detect_runtime_environment()
9
  print(f"[INFO] Runtime Environment: {env}")
 
 
 
 
 
 
10
 
11
+ result = get_best_model(env)
12
 
13
+ if not result.get("model"):
14
+ print("[WARN] No model found under budget — using fallback.")
15
+ fallback_model = "gemini-2.0-flash" if os.getenv("GEMINI_KEY") else "llama3.2"
16
+ return {"model": fallback_model} if return_full else fallback_model
 
 
 
 
 
17
 
18
+ print(f"[INFO] Auto-selected model: {result['model']} (token cost: {result['token_cost']}, tokens/sec: {result['tokens_sec']})")
19
  return result if return_full else result["model"]
src/tools/default_tools/test_cost/agent_creator_tool.py CHANGED
@@ -109,34 +109,39 @@ class AgentCreator():
109
 
110
  def run(self, **kwargs):
111
  print("Running Agent Creator")
 
112
  agent_name = kwargs.get("agent_name")
 
 
 
 
 
113
 
114
- # Get full model info (not just name)
115
- model_info = choose_best_model(return_full=True)
116
- base_model = kwargs.get("base_model") or choose_best_model()
117
- base_model = model_info["model"]
118
- token_cost = model_info.get("token_cost", 0.0001)
119
- speed = model_info.get("tokens_sec", 30)
120
- score = model_info.get("score", 1)
 
 
 
 
 
 
121
 
122
- env = detect_runtime_environment()
123
- print(f"\n[DEBUG] Detected Runtime Environment: {env}")
124
  print(f"[DEBUG] Selected Model: {base_model}")
125
- print(f"[DEBUG] Token Cost: {token_cost}, Speed: {speed}, Score: {score}")
 
 
 
126
 
127
  system_prompt = kwargs.get("system_prompt")
128
  description = kwargs.get("description")
129
- #create_cost = self.inputSchema["creates"]["types"][base_model]["create_cost"]
130
- #if base_model not in self.inputSchema["creates"]["types"]:
131
- # print(f"[WARN] Auto-selected model '{base_model}' not in schema. Falling back to gemini-2.0-flash")
132
- # base_model = "gemini-2.0-flash"
133
- #invoke_cost = self.inputSchema["creates"]["types"][base_model]["invoke_cost"]
134
-
135
- # Dynamically calculated costs
136
- create_cost = round(10 + (token_cost * 10000) + (50 / (speed + 1)), 2)
137
- invoke_cost = round(create_cost * 2, 2)
138
 
139
- print(f"[INFO] Assigned Create Cost: {create_cost}, Invoke Cost: {invoke_cost}")
 
140
 
141
  agent_manager = AgentManager()
142
  try:
@@ -157,8 +162,7 @@ class AgentCreator():
157
 
158
  return {
159
  "status": "success",
160
- "message": "Agent successfully created",
 
161
  "remaining_budget": remaining_budget,
162
  }
163
-
164
-
 
109
 
110
  def run(self, **kwargs):
111
  print("Running Agent Creator")
112
+
113
  agent_name = kwargs.get("agent_name")
114
+ base_model = kwargs.get("base_model")
115
+
116
+ # NEW: read flags from kwargs
117
+ use_local_only = kwargs.get("use_local_only", False)
118
+ use_api_only = kwargs.get("use_api_only", False)
119
 
120
+ if not base_model:
121
+ env = detect_runtime_environment()
122
+ print(f"\n[DEBUG] Detected Runtime Environment: {env}")
123
+
124
+ from src.cost_benefit import get_best_model
125
+ model_meta = get_best_model(
126
+ runtime_env=env,
127
+ use_local_only=use_local_only,
128
+ use_api_only=use_api_only
129
+ )
130
+ base_model = model_meta["model"]
131
+ else:
132
+ model_meta = {"model": base_model}
133
 
 
 
134
  print(f"[DEBUG] Selected Model: {base_model}")
135
+
136
+ if base_model not in self.inputSchema["creates"]["types"]:
137
+ print(f"[WARN] Auto-selected model '{base_model}' not in schema. Falling back to gemini-2.0-flash")
138
+ base_model = "gemini-2.0-flash"
139
 
140
  system_prompt = kwargs.get("system_prompt")
141
  description = kwargs.get("description")
 
 
 
 
 
 
 
 
 
142
 
143
+ create_cost = self.inputSchema["creates"]["types"][base_model]["create_cost"]
144
+ invoke_cost = self.inputSchema["creates"]["types"][base_model]["invoke_cost"]
145
 
146
  agent_manager = AgentManager()
147
  try:
 
162
 
163
  return {
164
  "status": "success",
165
+ "message": f"Agent '{agent_name}' created using model '{base_model}'",
166
+ "model_info": model_meta,
167
  "remaining_budget": remaining_budget,
168
  }