harshil-21 commited on
Commit
a827aa8
·
1 Parent(s): 4be2bb5

chore(ci): restore missing src/cost_benefit.py [skip ci]

Browse files
Files changed (1) hide show
  1. src/cost_benefit.py +48 -0
src/cost_benefit.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import subprocess
3
+ import time
4
+ import requests
5
+
6
+ def detect_available_budget(runtime_env: str) -> int:
7
+ """
8
+ Return an approximate VRAM‑based budget (MB) when running locally,
9
+ else default to 100.
10
+ """
11
+ import torch
12
+ if "local" in runtime_env and torch.cuda.is_available():
13
+ total_vram_mb = torch.cuda.get_device_properties(0).total_memory // (1024 ** 2)
14
+ return min(total_vram_mb, 100)
15
+ return 100
16
+
17
+ def get_best_model(runtime_env: str, *, use_local_only: bool = False, use_api_only: bool = False) -> dict:
18
+ """
19
+ Pick the fastest model that fits in the detected budget while
20
+ respecting the locality filters.
21
+ """
22
+ static_costs = {
23
+ "llama3.2": {"size": 20, "token_cost": 0.0001, "tokens_sec": 30, "type": "local"},
24
+ "mistral": {"size": 40, "token_cost": 0.0002, "tokens_sec": 50, "type": "local"},
25
+ "gemini-2.0-flash": {"size": 60, "token_cost": 0.0005, "tokens_sec": 60, "type": "api"},
26
+ "gemini-2.5-pro-preview-03-25": {"size": 80, "token_cost": 0.002 , "tokens_sec": 45, "type": "api"},
27
+ }
28
+
29
+ budget = detect_available_budget(runtime_env)
30
+ best_model, best_speed = None, -1
31
+
32
+ for model, info in static_costs.items():
33
+ if info["size"] > budget:
34
+ continue
35
+ if use_local_only and info["type"] != "local":
36
+ continue
37
+ if use_api_only and info["type"] != "api":
38
+ continue
39
+ if info["tokens_sec"] > best_speed:
40
+ best_model, best_speed = model, info["tokens_sec"]
41
+
42
+ chosen = best_model or "llama3.2" # sensible default
43
+ return {
44
+ "model": chosen,
45
+ "token_cost": static_costs[chosen]["token_cost"],
46
+ "tokens_sec": static_costs[chosen]["tokens_sec"],
47
+ "note": None if best_model else "Defaulted because no model met the constraints",
48
+ }