saisha09 commited on
Commit
a997e09
Β·
1 Parent(s): e9ca387

api link placeholder added to cost_benefit.py

Browse files
Files changed (1) hide show
  1. cost_benefit.py +82 -26
cost_benefit.py CHANGED
@@ -1,14 +1,32 @@
1
  import argparse
2
  import subprocess
3
  import time
 
4
 
5
- # Model info --> Placeholder
6
- # More models to add
7
  MODEL_INFO = {
8
- "mistral": {"size": 7, "token_cost": 0.002},
9
- "llama": {"size": 13, "token_cost": 0.0025},
10
- "deepseek": {"size": 1.3, "token_cost": 0.0015},
11
- "gemini": {"size": 15, "token_cost": 0.003}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  }
13
 
14
  def run_model_ollama(model, prompt):
@@ -22,15 +40,43 @@ def run_model_ollama(model, prompt):
22
  timeout=60
23
  )
24
  end = time.time()
25
- except Exception as e:
26
  return None
27
 
28
  output = result.stdout.decode().strip()
29
- duration = end - start #
30
- token_count = len(output.split()) #Number of tokens generated
31
- tokens_per_sec = token_count / duration if duration > 0 else 0 #Tokens generated in a second
32
- latency_ms = duration * 1000
33
- token_cost = MODEL_INFO[model]["token_cost"] * token_count #Cost of all the tokens generated
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  return {
36
  "model": model,
@@ -40,16 +86,24 @@ def run_model_ollama(model, prompt):
40
  "output": output
41
  }
42
 
43
- def get_best_model(prompt, weights, models=["mistral", "llama", "deepseek", "gemini"]):
 
 
 
 
 
 
 
 
 
44
  results = []
45
  for model in models:
46
- res = run_model_ollama(model, prompt)
47
  if not res:
48
  continue
49
 
50
- #Redefine
51
  size = MODEL_INFO[model]["size"]
52
- cost_score = (1 / 1.0) + weights["w_lat"] * res["latency_ms"] + \
53
  weights["w_size"] * size + weights["w_token_cost"] * res["token_cost"]
54
  benefit_score = weights["w_speed"] * res["tokens_sec"]
55
  decision_score = benefit_score / cost_score
@@ -60,11 +114,10 @@ def get_best_model(prompt, weights, models=["mistral", "llama", "deepseek", "gem
60
  if not results:
61
  return "No models succeeded."
62
 
63
- best = max(results, key=lambda x: x["decision_score"])
64
- return best
65
 
66
  if __name__ == "__main__":
67
- parser = argparse.ArgumentParser(description="Choose best Ollama model for a task")
68
  parser.add_argument('--prompt', required=True, help='The task or question to ask the models')
69
  parser.add_argument('--latency', type=int, default=3, help='Priority for latency (1–5)')
70
  parser.add_argument('--size', type=int, default=3, help='Priority for model size (1–5)')
@@ -72,7 +125,6 @@ if __name__ == "__main__":
72
  parser.add_argument('--speed', type=int, default=3, help='Priority for tokens/sec (1–5)')
73
  args = parser.parse_args()
74
 
75
- # Scale weights from priority. Can be redefined
76
  weights = {
77
  "w_lat": 0.002 * args.latency,
78
  "w_size": 0.1 * args.size,
@@ -81,9 +133,13 @@ if __name__ == "__main__":
81
  }
82
 
83
  best = get_best_model(args.prompt, weights)
84
- print(f"\nBest Model: {best['model']}")
85
- print(f"Decision Score: {round(best['decision_score'], 4)}")
86
- print(f"Latency (ms): {round(best['latency_ms'], 2)}")
87
- print(f"Tokens/sec: {round(best['tokens_sec'], 2)}")
88
- print(f"Token Cost ($): {round(best['token_cost'], 5)}")
89
- print(f"\nOutput:\n{best['output']}")
 
 
 
 
 
1
  import argparse
2
  import subprocess
3
  import time
4
+ import requests
5
 
6
+ # Model info with both ollama and API usage
 
7
  MODEL_INFO = {
8
+ "mistral": {
9
+ "size": 7,
10
+ "token_cost": 0.002,
11
+ "use_api": False
12
+ },
13
+ "llama": {
14
+ "size": 13,
15
+ "token_cost": 0.0025,
16
+ "use_api": False
17
+ },
18
+ "deepseek": {
19
+ "size": 1.3,
20
+ "token_cost": 0.0015,
21
+ "use_api": False,
22
+ "api": ".." # Example API
23
+ },
24
+ "gemini": {
25
+ "size": 15,
26
+ "token_cost": 0.003,
27
+ "use_api": False,
28
+ "api": ".."
29
+ }
30
  }
31
 
32
  def run_model_ollama(model, prompt):
 
40
  timeout=60
41
  )
42
  end = time.time()
43
+ except Exception:
44
  return None
45
 
46
  output = result.stdout.decode().strip()
47
+ duration = end - start
48
+ token_count = len(output.split())
49
+ tokens_per_sec = token_count / duration if duration > 0 else 0
50
+ latency_ms = duration * 1000
51
+ token_cost = MODEL_INFO[model]["token_cost"] * token_count
52
+
53
+ return {
54
+ "model": model,
55
+ "latency_ms": latency_ms,
56
+ "tokens_sec": tokens_per_sec,
57
+ "token_cost": token_cost,
58
+ "output": output
59
+ }
60
+
61
+ def run_model_api(model, prompt):
62
+ try:
63
+ start = time.time()
64
+ response = requests.post(
65
+ MODEL_INFO[model]["api"],
66
+ json={"prompt": prompt},
67
+ timeout=60
68
+ )
69
+ end = time.time()
70
+ response.raise_for_status()
71
+ output = response.json().get("response", "") # Adjust key as needed
72
+ except Exception:
73
+ return None
74
+
75
+ duration = end - start
76
+ token_count = len(output.split())
77
+ tokens_per_sec = token_count / duration if duration > 0 else 0
78
+ latency_ms = duration * 1000
79
+ token_cost = MODEL_INFO[model]["token_cost"] * token_count
80
 
81
  return {
82
  "model": model,
 
86
  "output": output
87
  }
88
 
89
+ def run_model(model, prompt):
90
+ if MODEL_INFO[model].get("use_api", False):
91
+ return run_model_api(model, prompt)
92
+ else:
93
+ return run_model_ollama(model, prompt)
94
+
95
+ def get_best_model(prompt, weights, models=None):
96
+ if models is None:
97
+ models = list(MODEL_INFO.keys())
98
+
99
  results = []
100
  for model in models:
101
+ res = run_model(model, prompt)
102
  if not res:
103
  continue
104
 
 
105
  size = MODEL_INFO[model]["size"]
106
+ cost_score = 1 + weights["w_lat"] * res["latency_ms"] + \
107
  weights["w_size"] * size + weights["w_token_cost"] * res["token_cost"]
108
  benefit_score = weights["w_speed"] * res["tokens_sec"]
109
  decision_score = benefit_score / cost_score
 
114
  if not results:
115
  return "No models succeeded."
116
 
117
+ return max(results, key=lambda x: x["decision_score"])
 
118
 
119
  if __name__ == "__main__":
120
+ parser = argparse.ArgumentParser(description="Choose best model for a task")
121
  parser.add_argument('--prompt', required=True, help='The task or question to ask the models')
122
  parser.add_argument('--latency', type=int, default=3, help='Priority for latency (1–5)')
123
  parser.add_argument('--size', type=int, default=3, help='Priority for model size (1–5)')
 
125
  parser.add_argument('--speed', type=int, default=3, help='Priority for tokens/sec (1–5)')
126
  args = parser.parse_args()
127
 
 
128
  weights = {
129
  "w_lat": 0.002 * args.latency,
130
  "w_size": 0.1 * args.size,
 
133
  }
134
 
135
  best = get_best_model(args.prompt, weights)
136
+
137
+ if isinstance(best, str):
138
+ print(best)
139
+ else:
140
+ print(f"\nBest Model: {best['model']}")
141
+ print(f"Decision Score: {round(best['decision_score'], 4)}")
142
+ print(f"Latency (ms): {round(best['latency_ms'], 2)}")
143
+ print(f"Tokens/sec: {round(best['tokens_sec'], 2)}")
144
+ print(f"Token Cost ($): {round(best['token_cost'], 5)}")
145
+ print(f"\nOutput:\n{best['output']}")