| from src.manager.utils.runtime_selector import detect_runtime_environment | |
| from src.cost_benefit import get_best_model | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| def choose_best_model(return_full=False): | |
| env = detect_runtime_environment() | |
| print(f"[INFO] Runtime Environment: {env}") | |
| result = get_best_model(env) | |
| if not result.get("model"): | |
| print("[WARN] No model found under budget — using fallback.") | |
| fallback_model = "gemini-2.0-flash" if os.getenv("GEMINI_KEY") else "llama3.2" | |
| return {"model": fallback_model} if return_full else fallback_model | |
| print(f"[INFO] Auto-selected model: {result['model']} (token cost: {result['token_cost']}, tokens/sec: {result['tokens_sec']})") | |
| return result if return_full else result["model"] | |