hashiruAI / src /cost_benefit.py
helloparthshah's picture
refactor
2f85c93
raw
history blame
1.49 kB
import argparse
import subprocess
import time
import requests
def get_best_model(weights: dict, runtime_env: str) -> dict:
#placeholders
models = {
"llama3.2": {"size": 2.5, "token_cost": 0.0001, "speed": 30},
"mistral": {"size": 4.2, "token_cost": 0.0002, "speed": 50},
"gemini-2.0-flash": {"size": 6.1, "token_cost": 0.0005, "speed": 60},
"gemini-2.5-pro-preview-03-25": {"size": 8.2, "token_cost": 0.002, "speed": 45}
}
penalty = {
"gpu": 1.0,
"cpu-local": 2.0,
"cloud-only": 1.5
}
best_model = None
best_score = float("-inf") # Track max score
for model, metrics in models.items():
p = penalty.get(runtime_env, 2.0)
cost_score = (
weights["w_size"] * metrics["size"] * p +
weights["w_token_cost"] * metrics["token_cost"] * p +
weights["w_speed"] * (100 - metrics["speed"])
)
benefit_score = weights["w_speed"] * metrics["speed"]
decision_score = benefit_score / cost_score if cost_score != 0 else 0
if decision_score > best_score:
best_score = decision_score
best_model = model
if not best_model:
return "No suitable model found"
return {
"model": best_model,
"score": best_score,
"token_cost": models[best_model]["token_cost"],
"tokens_sec": models[best_model]["speed"],
"output": f"Sample output from {best_model}"
}