import argparse import subprocess import time import requests def get_best_model(weights: dict, runtime_env: str) -> dict: #placeholders models = { "llama3.2": {"size": 2.5, "token_cost": 0.0001, "speed": 30}, "mistral": {"size": 4.2, "token_cost": 0.0002, "speed": 50}, "gemini-2.0-flash": {"size": 6.1, "token_cost": 0.0005, "speed": 60}, "gemini-2.5-pro-preview-03-25": {"size": 8.2, "token_cost": 0.002, "speed": 45} } penalty = { "gpu": 1.0, "cpu-local": 2.0, "cloud-only": 1.5 } best_model = None best_score = float("-inf") # Track max score for model, metrics in models.items(): p = penalty.get(runtime_env, 2.0) cost_score = ( weights["w_size"] * metrics["size"] * p + weights["w_token_cost"] * metrics["token_cost"] * p + weights["w_speed"] * (100 - metrics["speed"]) ) benefit_score = weights["w_speed"] * metrics["speed"] decision_score = benefit_score / cost_score if cost_score != 0 else 0 if decision_score > best_score: best_score = decision_score best_model = model if not best_model: return "No suitable model found" return { "model": best_model, "score": best_score, "token_cost": models[best_model]["token_cost"], "tokens_sec": models[best_model]["speed"], "output": f"Sample output from {best_model}" }