saisha09 commited on
Commit
c978556
·
1 Parent(s): 2155e91

cost_benefit file deleted

Browse files
Files changed (1) hide show
  1. src/cost_benefit.py +0 -60
src/cost_benefit.py DELETED
@@ -1,60 +0,0 @@
1
- import argparse
2
- import subprocess
3
- import time
4
- import requests
5
-
6
- def detect_available_budget(runtime_env: str) -> int:
7
- import torch
8
- if "local" in runtime_env and torch.cuda.is_available():
9
- total_vram_mb = torch.cuda.get_device_properties(0).total_memory // (1024 ** 2)
10
- return min(total_vram_mb, 100)
11
- else:
12
- return 100
13
-
14
-
15
- def get_best_model(runtime_env: str, use_local_only=False, use_api_only=False) -> dict:
16
- # Model info (cost, tokens/sec, type)
17
- static_costs = {
18
- "llama3.2": {"size": 20, "token_cost": 0.0001, "tokens_sec": 30, "type": "local"},
19
- "mistral": {"size": 40, "token_cost": 0.0002, "tokens_sec": 50, "type": "local"},
20
- "gemini-2.0-flash": {"size": 60, "token_cost": 0.0005, "tokens_sec": 60, "type": "api"},
21
- "gemini-2.5-pro-preview-03-25": {"size": 80, "token_cost": 0.002, "tokens_sec": 45, "type": "api"}
22
- }
23
-
24
- def detect_available_budget(runtime_env: str) -> int:
25
- import torch
26
- if "local" in runtime_env and torch.cuda.is_available():
27
- total_vram_mb = torch.cuda.get_device_properties(0).total_memory // (1024 ** 2)
28
- return min(total_vram_mb, 100)
29
- else:
30
- return 100
31
-
32
- budget = detect_available_budget(runtime_env)
33
-
34
- best_model = None
35
- best_speed = -1
36
-
37
- for model, info in static_costs.items():
38
- if info["size"] > budget:
39
- continue
40
- if use_local_only and info["type"] != "local":
41
- continue
42
- if use_api_only and info["type"] != "api":
43
- continue
44
- if info["tokens_sec"] > best_speed:
45
- best_model = model
46
- best_speed = info["tokens_sec"]
47
-
48
- if not best_model:
49
- return {
50
- "model": "llama3.2",
51
- "token_cost": static_costs["llama3.2"]["token_cost"],
52
- "tokens_sec": static_costs["llama3.2"]["tokens_sec"],
53
- "note": "Defaulted due to no models fitting filters"
54
- }
55
-
56
- return {
57
- "model": best_model,
58
- "token_cost": static_costs[best_model]["token_cost"],
59
- "tokens_sec": static_costs[best_model]["tokens_sec"]
60
- }