Merge branch 'main' of https://github.com/kunpai/HASHIRU into auth0
Browse files- .github/workflows/restore_cost_benefit.yml +92 -0
- .gitignore +2 -1
- bench/benchmarking_hle.py +5 -5
- bench/benchmarking_paper_reviews.py +9 -8
- paper/conference_101719.tex +226 -0
- requirements.txt +1 -1
- src/cost_benefit.py +21 -33
- src/manager/agent_manager.py +297 -112
- src/manager/budget_manager.py +74 -26
- src/manager/llm_models.py +40 -0
- src/manager/manager.py +115 -42
- src/manager/tool_manager.py +50 -16
- src/models/system4.prompt +83 -39
- src/tools/default_tools/agent_cost_manager.py +25 -20
- src/tools/default_tools/agent_creater_tool.py +11 -6
- src/tools/default_tools/ask_agent_tool.py +3 -2
- src/tools/default_tools/fire_agent.py +3 -2
- src/tools/default_tools/get_budget.py +11 -4
- src/tools/default_tools/google_search_tool.py +1 -1
- src/tools/default_tools/memory_manager.py +5 -2
- src/tools/default_tools/test_cost/agent_creator_tool.py +2 -2
- src/tools/default_tools/tool_deletor.py +1 -1
- src/tools/user_tools/get_website_tool.py +60 -7
- src/tools/user_tools/python_sandbox_tool.py +74 -0
- src/tools/user_tools/weather_tool.py +1 -1
.github/workflows/restore_cost_benefit.yml
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# .github/workflows/restore_cost_benefit.yml
|
2 |
+
name: Restore src/cost_benefit.py
|
3 |
+
|
4 |
+
on:
|
5 |
+
push:
|
6 |
+
branches: [main]
|
7 |
+
pull_request:
|
8 |
+
branches: [main]
|
9 |
+
|
10 |
+
jobs:
|
11 |
+
restore:
|
12 |
+
runs-on: ubuntu-latest
|
13 |
+
# allow the GITHUB_TOKEN to push a commit back
|
14 |
+
permissions:
|
15 |
+
contents: write
|
16 |
+
|
17 |
+
steps:
|
18 |
+
- name: Checkout repository
|
19 |
+
uses: actions/checkout@v3
|
20 |
+
with:
|
21 |
+
# you need the token available for the later push
|
22 |
+
token: ${{ secrets.GITHUB_TOKEN }}
|
23 |
+
|
24 |
+
- name: Recreate src/cost_benefit.py
|
25 |
+
run: |
|
26 |
+
mkdir -p src
|
27 |
+
cat > src/cost_benefit.py <<'PY'
|
28 |
+
import argparse
|
29 |
+
import subprocess
|
30 |
+
import time
|
31 |
+
import requests
|
32 |
+
|
33 |
+
def detect_available_budget(runtime_env: str) -> int:
|
34 |
+
"""
|
35 |
+
Return an approximate VRAM‑based budget (MB) when running locally,
|
36 |
+
else default to 100.
|
37 |
+
"""
|
38 |
+
import torch
|
39 |
+
if "local" in runtime_env and torch.cuda.is_available():
|
40 |
+
total_vram_mb = torch.cuda.get_device_properties(0).total_memory // (1024 ** 2)
|
41 |
+
return min(total_vram_mb, 100)
|
42 |
+
return 100
|
43 |
+
|
44 |
+
def get_best_model(runtime_env: str, *, use_local_only: bool = False, use_api_only: bool = False) -> dict:
|
45 |
+
"""
|
46 |
+
Pick the fastest model that fits in the detected budget while
|
47 |
+
respecting the locality filters.
|
48 |
+
"""
|
49 |
+
static_costs = {
|
50 |
+
"llama3.2": {"size": 20, "token_cost": 0.0001, "tokens_sec": 30, "type": "local"},
|
51 |
+
"mistral": {"size": 40, "token_cost": 0.0002, "tokens_sec": 50, "type": "local"},
|
52 |
+
"gemini-2.0-flash": {"size": 60, "token_cost": 0.0005, "tokens_sec": 60, "type": "api"},
|
53 |
+
"gemini-2.5-pro-preview-03-25": {"size": 80, "token_cost": 0.002 , "tokens_sec": 45, "type": "api"},
|
54 |
+
}
|
55 |
+
|
56 |
+
budget = detect_available_budget(runtime_env)
|
57 |
+
best_model, best_speed = None, -1
|
58 |
+
|
59 |
+
for model, info in static_costs.items():
|
60 |
+
if info["size"] > budget:
|
61 |
+
continue
|
62 |
+
if use_local_only and info["type"] != "local":
|
63 |
+
continue
|
64 |
+
if use_api_only and info["type"] != "api":
|
65 |
+
continue
|
66 |
+
if info["tokens_sec"] > best_speed:
|
67 |
+
best_model, best_speed = model, info["tokens_sec"]
|
68 |
+
|
69 |
+
chosen = best_model or "llama3.2" # sensible default
|
70 |
+
return {
|
71 |
+
"model": chosen,
|
72 |
+
"token_cost": static_costs[chosen]["token_cost"],
|
73 |
+
"tokens_sec": static_costs[chosen]["tokens_sec"],
|
74 |
+
"note": None if best_model else "Defaulted because no model met the constraints",
|
75 |
+
}
|
76 |
+
PY
|
77 |
+
|
78 |
+
- name: Commit & push only when the file was missing
|
79 |
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
80 |
+
run: |
|
81 |
+
set -e
|
82 |
+
git config user.name "HARSHIL PATEL"
|
83 |
+
git config user.email "hpppatel@ucdavis.edu"
|
84 |
+
|
85 |
+
# If the file is NOT in the current Git index, add‑commit‑push.
|
86 |
+
if ! git ls-files --error-unmatch src/cost_benefit.py >/dev/null 2>&1; then
|
87 |
+
git add src/cost_benefit.py
|
88 |
+
git commit -m "chore(ci): restore missing src/cost_benefit.py [skip ci]"
|
89 |
+
git push origin HEAD:main
|
90 |
+
else
|
91 |
+
echo "src/cost_benefit.py already exists in the repository – nothing to restore"
|
92 |
+
fi
|
.gitignore
CHANGED
@@ -15,4 +15,5 @@ results/
|
|
15 |
*.fls
|
16 |
*.fdb_latexmk
|
17 |
*.synctex.gz
|
18 |
-
secret_word.json
|
|
|
|
15 |
*.fls
|
16 |
*.fdb_latexmk
|
17 |
*.synctex.gz
|
18 |
+
secret_word.json
|
19 |
+
src/data/
|
bench/benchmarking_hle.py
CHANGED
@@ -101,15 +101,15 @@ def benchmark_hle(num_samples=20, categories=None):
|
|
101 |
# Send query to agent
|
102 |
try:
|
103 |
start_time = time.time()
|
104 |
-
response = client.predict(
|
105 |
-
|
106 |
-
api_name="/
|
107 |
)
|
108 |
end_time = time.time()
|
109 |
|
110 |
target_answer_phrase = sample.get('answer', '').strip()
|
111 |
|
112 |
-
agent_final_response_content = get_last_assistant_content(
|
113 |
|
114 |
is_correct = False
|
115 |
|
@@ -125,7 +125,7 @@ def benchmark_hle(num_samples=20, categories=None):
|
|
125 |
"category": category,
|
126 |
"input": prompt,
|
127 |
"target_output": sample.get('answer', ''),
|
128 |
-
"agent_full_response":
|
129 |
"agent_final_response": agent_final_response_content,
|
130 |
"response_time": end_time - start_time,
|
131 |
"is_correct": is_correct
|
|
|
101 |
# Send query to agent
|
102 |
try:
|
103 |
start_time = time.time()
|
104 |
+
response, history = client.predict(
|
105 |
+
message={"text": prompt, "files": []},
|
106 |
+
api_name="/chat"
|
107 |
)
|
108 |
end_time = time.time()
|
109 |
|
110 |
target_answer_phrase = sample.get('answer', '').strip()
|
111 |
|
112 |
+
agent_final_response_content = get_last_assistant_content(history)
|
113 |
|
114 |
is_correct = False
|
115 |
|
|
|
125 |
"category": category,
|
126 |
"input": prompt,
|
127 |
"target_output": sample.get('answer', ''),
|
128 |
+
"agent_full_response": history,
|
129 |
"agent_final_response": agent_final_response_content,
|
130 |
"response_time": end_time - start_time,
|
131 |
"is_correct": is_correct
|
bench/benchmarking_paper_reviews.py
CHANGED
@@ -64,25 +64,26 @@ def benchmark_paper_reviews(
|
|
64 |
for idx, row in df.iterrows():
|
65 |
paper_id = row[id_col]
|
66 |
title = row["Title"]
|
67 |
-
prompt = "Create THREE agents with
|
68 |
"Each agent should provide a review of the paper, and recommend Accept/Reject for ICLR 2023. " \
|
69 |
"The review should be detailed and include strengths and weaknesses. " \
|
70 |
-
"You
|
|
|
71 |
"The paper title is: " + title + "\n\n" + row[text_col]
|
72 |
print(f"[{idx+1}/{len(df)}] Paper ID: {paper_id}")
|
73 |
|
74 |
try:
|
75 |
start = time.time()
|
76 |
-
resp = client.predict(
|
77 |
-
|
78 |
-
api_name="/
|
79 |
)
|
80 |
elapsed = time.time() - start
|
81 |
|
82 |
result = {
|
83 |
"paper_id": paper_id,
|
84 |
"prompt_snippet": prompt[:200],
|
85 |
-
"agent_review":
|
86 |
"ground_truth": row["Decision"],
|
87 |
"response_time": elapsed
|
88 |
}
|
@@ -91,7 +92,7 @@ def benchmark_paper_reviews(
|
|
91 |
with open(out_path, "a") as f:
|
92 |
f.write(json.dumps(result) + "\n")
|
93 |
|
94 |
-
print(f" → {elapsed:.2f}s, review length {len(
|
95 |
results.append(result)
|
96 |
|
97 |
# small delay
|
@@ -105,6 +106,6 @@ def benchmark_paper_reviews(
|
|
105 |
if __name__ == "__main__":
|
106 |
# example usage: adjust path & sample count as needed
|
107 |
benchmark_paper_reviews(
|
108 |
-
csv_path="ICLR_2023.csv",
|
109 |
num_samples=1
|
110 |
)
|
|
|
64 |
for idx, row in df.iterrows():
|
65 |
paper_id = row[id_col]
|
66 |
title = row["Title"]
|
67 |
+
prompt = "Create THREE agents with relevant personalities, expertise, and review styles. " \
|
68 |
"Each agent should provide a review of the paper, and recommend Accept/Reject for ICLR 2023. " \
|
69 |
"The review should be detailed and include strengths and weaknesses. " \
|
70 |
+
"You MUST use ArxivTool and WikipediaTool to get more information about novelty and correctness. " \
|
71 |
+
"GIVE A FINAL DECISION in the form of \"FINAL DECISION: <Accept/Reject>\". " \
|
72 |
"The paper title is: " + title + "\n\n" + row[text_col]
|
73 |
print(f"[{idx+1}/{len(df)}] Paper ID: {paper_id}")
|
74 |
|
75 |
try:
|
76 |
start = time.time()
|
77 |
+
resp, history = client.predict(
|
78 |
+
message={"text": prompt, "files": []},
|
79 |
+
api_name="/chat"
|
80 |
)
|
81 |
elapsed = time.time() - start
|
82 |
|
83 |
result = {
|
84 |
"paper_id": paper_id,
|
85 |
"prompt_snippet": prompt[:200],
|
86 |
+
"agent_review": history,
|
87 |
"ground_truth": row["Decision"],
|
88 |
"response_time": elapsed
|
89 |
}
|
|
|
92 |
with open(out_path, "a") as f:
|
93 |
f.write(json.dumps(result) + "\n")
|
94 |
|
95 |
+
print(f" → {elapsed:.2f}s, review length {len(history)} chars")
|
96 |
results.append(result)
|
97 |
|
98 |
# small delay
|
|
|
106 |
if __name__ == "__main__":
|
107 |
# example usage: adjust path & sample count as needed
|
108 |
benchmark_paper_reviews(
|
109 |
+
csv_path="bench/data/ICLR_2023.csv",
|
110 |
num_samples=1
|
111 |
)
|
paper/conference_101719.tex
CHANGED
@@ -8,6 +8,15 @@
|
|
8 |
\usepackage{textcomp}
|
9 |
\usepackage{xcolor}
|
10 |
\usepackage{hyperref}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
\def\BibTeX{{\rm B\kern-.05em{\sc i\kern-.025em b}\kern-.08em
|
12 |
T\kern-.1667em\lower.7ex\hbox{E}\kern-.125emX}}
|
13 |
\begin{document}
|
@@ -184,6 +193,19 @@ To enable HASHIRU agents to learn from past interactions and rectify previous er
|
|
184 |
|
185 |
Memory retrieval is based on semantic similarity between the current context (e.g., task description, recent actions, error messages) and the stored memory entries. We utilize embeddings generated by the \textbf{all-MiniLM-L6-v2} model \cite{wang2020minilmdeepselfattentiondistillation} to represent both the query and the stored memories in a high-dimensional vector space. Relevance is determined by calculating the \textbf{cosine similarity} between the query embedding and each memory embedding. Memories exceeding a predefined similarity threshold are retrieved and provided to the CEO agent (or relevant Employee agents) as contextual information. This allows the system to draw upon past experiences, understand why previous approaches failed, and potentially adjust its strategy to avoid repeating mistakes, thereby improving performance and efficiency over time.
|
186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
\section{Experimental Setup}
|
188 |
\label{sec:experiments}
|
189 |
|
@@ -250,4 +272,208 @@ We evaluate using quantitative and qualitative metrics:
|
|
250 |
\bibliography{references}
|
251 |
\bibliographystyle{plain}
|
252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
\end{document}
|
|
|
8 |
\usepackage{textcomp}
|
9 |
\usepackage{xcolor}
|
10 |
\usepackage{hyperref}
|
11 |
+
\usepackage{longtable}
|
12 |
+
\usepackage{tabularx}
|
13 |
+
\usepackage{tcolorbox}
|
14 |
+
\tcbuselibrary{skins}
|
15 |
+
|
16 |
+
% Define styles for user and assistant boxes
|
17 |
+
\newtcolorbox{userbox}{colback=blue!5,colframe=blue!40!black,title=User}
|
18 |
+
\newtcolorbox{assistantbox}{colback=gray!10,colframe=black,title=Assistant}
|
19 |
+
|
20 |
\def\BibTeX{{\rm B\kern-.05em{\sc i\kern-.025em b}\kern-.08em
|
21 |
T\kern-.1667em\lower.7ex\hbox{E}\kern-.125emX}}
|
22 |
\begin{document}
|
|
|
193 |
|
194 |
Memory retrieval is based on semantic similarity between the current context (e.g., task description, recent actions, error messages) and the stored memory entries. We utilize embeddings generated by the \textbf{all-MiniLM-L6-v2} model \cite{wang2020minilmdeepselfattentiondistillation} to represent both the query and the stored memories in a high-dimensional vector space. Relevance is determined by calculating the \textbf{cosine similarity} between the query embedding and each memory embedding. Memories exceeding a predefined similarity threshold are retrieved and provided to the CEO agent (or relevant Employee agents) as contextual information. This allows the system to draw upon past experiences, understand why previous approaches failed, and potentially adjust its strategy to avoid repeating mistakes, thereby improving performance and efficiency over time.
|
195 |
|
196 |
+
\section{Case Studies}
|
197 |
+
\label{sec:casestudies}
|
198 |
+
This section presents two case studies demonstrating HASHIRU's self-improvement capabilities in practical settings. We highlight two instances where HASHIRU enhanced its own architecture and functionality: (1) by generating a comprehensive cost model for base models suitable for specialized agent creation, and (2) by autonomously integrating new tools for the CEO agent.
|
199 |
+
|
200 |
+
\subsection{Case Study 1: Self-Generating the Cost Model for Agent Specialization}
|
201 |
+
\label{sec:casestudy1_costmodel}
|
202 |
+
An accurate cost model is essential for optimizing resource allocation and ensuring the efficiency of specialized agents within HASHIRU. Traditionally, constructing this model involves manual research into local model performance relative to hardware (e.g., 16~GiB VRAM) and the API costs of cloud-hosted alternatives. HASHIRU automated this labor-intensive process by leveraging its web search capabilities to autonomously identify and incorporate the necessary cost data into its internal model. The results were successfully committed to the codebase\footnote{\url{https://github.com/kunpai/HASHIRU/commit/70dc268b121cbd7c50c6691645d8a99912766965}}.
|
203 |
+
|
204 |
+
\subsection{Case Study 2: Autonomous Tool Integration for the CEO Agent}
|
205 |
+
\label{sec:casestudy2_tools}
|
206 |
+
Extending the CEO agent's capabilities through tool integration is vital for broadening HASHIRU's operational scope. Manual tool development typically requires detailed analysis of existing tool schemas and diligent code implementation. HASHIRU streamlined this process by employing a few-shot learning approach, using an existing tool as a template to guide the autonomous creation of new tools~\cite{brown2020language}. The newly generated tools were directly integrated into the codebase\footnote{\url{https://github.com/kunpai/HASHIRU/commit/193e10b2b00917256b7cc01cb3aa5ac7b6a6c174}}.
|
207 |
+
|
208 |
+
|
209 |
\section{Experimental Setup}
|
210 |
\label{sec:experiments}
|
211 |
|
|
|
272 |
\bibliography{references}
|
273 |
\bibliographystyle{plain}
|
274 |
|
275 |
+
\appendix
|
276 |
+
\section{Examples of HASHIRU in Action}
|
277 |
+
\label{sec:examples}
|
278 |
+
In this section, we provide examples of how HASHIRU operates in practice, demonstrating its dynamic management, hybrid intelligence, memory and autonomous tool creation capabilities.
|
279 |
+
|
280 |
+
\subsection{Example 1: Memory}
|
281 |
+
\begin{figure}[h]
|
282 |
+
\centering
|
283 |
+
\begin{minipage}{0.45\textwidth}
|
284 |
+
\begin{userbox}
|
285 |
+
I am vegetarian.
|
286 |
+
\end{userbox}
|
287 |
+
|
288 |
+
\begin{assistantbox}
|
289 |
+
Okay, I'll keep that in mind. I'll store this preference in my memory.
|
290 |
+
|
291 |
+
\texttt{Invoking MemoryManager with \{`memory': `The user is vegetarian.', `action': `add\_memory', `key': `vegetarian'\}}
|
292 |
+
|
293 |
+
I have stored that you are vegetarian. How can I help you today?
|
294 |
+
\end{assistantbox}
|
295 |
+
|
296 |
+
\begin{userbox}
|
297 |
+
Give me a bacon pasta recipe.
|
298 |
+
\end{userbox}
|
299 |
+
|
300 |
+
\begin{assistantbox}
|
301 |
+
Memories
|
302 |
+
|
303 |
+
Memories: [`The user is vegetarian.']
|
304 |
+
|
305 |
+
My apologies, but I cannot provide a bacon pasta recipe since you are vegetarian. Would you like a vegetarian pasta recipe instead?
|
306 |
+
\end{assistantbox}
|
307 |
+
\end{minipage}
|
308 |
+
\caption{HASHIRU enforcing a stored dietary preference in dialogue using its Memory Function.}
|
309 |
+
\label{fig:memory_example}
|
310 |
+
\end{figure}
|
311 |
+
|
312 |
+
\begin{figure}[ht]
|
313 |
+
\centering
|
314 |
+
\begin{minipage}
|
315 |
+
{0.45\textwidth}
|
316 |
+
\begin{userbox}
|
317 |
+
I can eat bacon actually
|
318 |
+
\end{userbox}
|
319 |
+
|
320 |
+
\begin{assistantbox}
|
321 |
+
Memories
|
322 |
+
|
323 |
+
Memories: [’The user is vegetarian.’]
|
324 |
+
|
325 |
+
I see. My apologies for the confusion. I had a conflicting memory that you were vegetarian. I will remove that memory and proceed with finding a bacon pasta recipe.
|
326 |
+
|
327 |
+
\texttt{Invoking MemoryManager with \{'action': 'delete\_memory', 'key': 'The user is vegetarian.'\}}
|
328 |
+
|
329 |
+
OK, I have removed the memory. Now I will search for a bacon pasta recipe.
|
330 |
+
|
331 |
+
\texttt{Invoking GoogleSearchTool with \{'query': 'bacon pasta recipe'\}}
|
332 |
+
|
333 |
+
Here are some bacon pasta recipes I found:
|
334 |
+
\begin{itemize}
|
335 |
+
\item \href{https://www.sprinklesandsprouts.com/garlic-bacon-pasta/}{Garlic Bacon Pasta}
|
336 |
+
\item \href{https://www.theburntbuttertable.com/creamy-bacon-pasta-sauce/}{Creamy Bacon Pasta Sauce}
|
337 |
+
\item \dots
|
338 |
+
\end{itemize}
|
339 |
+
|
340 |
+
Would you like me to get the content of any of these websites?
|
341 |
+
\end{assistantbox}
|
342 |
+
\end{minipage}
|
343 |
+
\caption{HASHIRU updating and deleting its memory based on user input. Continuation of Figure \ref{fig:memory_example}.}
|
344 |
+
\label{fig:memory_example2}
|
345 |
+
\end{figure}
|
346 |
+
|
347 |
+
\subsection{Example 2: Novelty Assessment via Coordinated Tool Use}
|
348 |
+
|
349 |
+
\begin{figure}[h]
|
350 |
+
\centering
|
351 |
+
\begin{minipage}{0.95\textwidth}
|
352 |
+
|
353 |
+
\begin{userbox}
|
354 |
+
Is this idea novel: ``a dynamically managed, resource-aware hierarchical multi-agent system that prioritizes cost-efficient local models, integrates autonomous tool creation, and employs an internal economic model to balance performance, adaptability, and stability in intelligent task coordination''?
|
355 |
+
\end{userbox}
|
356 |
+
|
357 |
+
\begin{assistantbox}
|
358 |
+
To determine if the idea is novel, I will break it down into smaller parts and search for each part. Then I will combine the results to see if the whole idea is novel.
|
359 |
+
|
360 |
+
Here's the breakdown:
|
361 |
+
\begin{itemize}
|
362 |
+
\item Dynamically managed multi-agent system
|
363 |
+
\item Resource-aware hierarchical multi-agent system
|
364 |
+
\item Cost-efficient local models in multi-agent systems
|
365 |
+
\item Autonomous tool creation in multi-agent systems
|
366 |
+
\item Internal economic model for multi-agent systems
|
367 |
+
\end{itemize}
|
368 |
+
|
369 |
+
I will use the \texttt{GoogleSearchTool} to search for each of these parts.
|
370 |
+
|
371 |
+
\texttt{Invoking GoogleSearchTool with \{'query': 'dynamically managed multi-agent system'\}}
|
372 |
+
|
373 |
+
\texttt{Invoking GoogleSearchTool with \{'query': 'resource-aware hierarchical multi-agent system'\}}
|
374 |
+
|
375 |
+
\texttt{Invoking GoogleSearchTool with \{'query': 'cost-efficient local models in multi-agent systems'\}}
|
376 |
+
|
377 |
+
\texttt{Invoking GoogleSearchTool with \{'query': 'autonomous tool creation in multi-agent systems'\}}
|
378 |
+
|
379 |
+
\texttt{Invoking GoogleSearchTool with \{'query': 'internal economic model for multi-agent systems'\}}
|
380 |
+
|
381 |
+
The search results indicate that each of the individual components of the idea has been explored in existing research. However, the novelty lies in the specific combination and integration of these elements.
|
382 |
+
|
383 |
+
To further assess the novelty, I will use the \texttt{ArxivTool} to search for papers that combine multiple aspects of the idea.
|
384 |
+
|
385 |
+
\texttt{Invoking ArxivTool with \{'query': 'dynamically managed resource-aware hierarchical multi-agent system'\}}
|
386 |
+
|
387 |
+
\texttt{Invoking ArxivTool with \{'query': 'multi-agent system autonomous tool creation economic model'\}}
|
388 |
+
|
389 |
+
The ArxivTool results show some papers that touch on combinations of these concepts, but none seem to address the entire idea as presented. Specifically, the combination of dynamic management, resource-awareness, hierarchical structure, cost-efficient local models, autonomous tool creation, and an internal economic model appears to be a novel synthesis.
|
390 |
+
|
391 |
+
\textbf{Therefore, based on the search results, the idea seems novel.}
|
392 |
+
\end{assistantbox}
|
393 |
+
\caption{HASHIRU assessing the novelty of a complex idea by breaking it down into components and searching for each part.}
|
394 |
+
\end{minipage}
|
395 |
+
\label{fig:novelty_assessment}
|
396 |
+
\end{figure}
|
397 |
+
|
398 |
+
\clearpage
|
399 |
+
\twocolumn
|
400 |
+
|
401 |
+
\begin{figure}
|
402 |
+
\centering
|
403 |
+
\begin{minipage}{0.95\textwidth}
|
404 |
+
\begin{userbox}
|
405 |
+
Which papers mention the ideas?
|
406 |
+
\end{userbox}
|
407 |
+
|
408 |
+
\begin{assistantbox}
|
409 |
+
To provide a comprehensive answer, I will analyze the summaries of the papers returned by the ArxivTool and GoogleSearchTool to identify which papers mention the key ideas in your prompt.
|
410 |
+
|
411 |
+
\textbf{The key ideas are:}
|
412 |
+
\begin{itemize}
|
413 |
+
\item Dynamically managed
|
414 |
+
\item Resource-aware
|
415 |
+
\item Hierarchical
|
416 |
+
\item Cost-efficient local models
|
417 |
+
\item Autonomous tool creation
|
418 |
+
\item Internal economic model
|
419 |
+
\end{itemize}
|
420 |
+
|
421 |
+
I will go through each paper and identify which of these ideas are mentioned.
|
422 |
+
|
423 |
+
\texttt{Invoking ArxivTool with \{'query': 'dynamically managed resource-aware hierarchical multi-agent system'\}} \\
|
424 |
+
\texttt{Invoking ArxivTool with \{'query': 'multi-agent system autonomous tool creation economic model'\}}
|
425 |
+
|
426 |
+
\textbf{ArxivTool Results (Query: ``dynamically managed resource-aware hierarchical multi-agent system'')}
|
427 |
+
|
428 |
+
\begin{itemize}
|
429 |
+
\item \textit{Hierarchical stock assessment methods improve management performance in multi-species, data-limited fisheries} \\
|
430 |
+
Hierarchical: Yes \\
|
431 |
+
Dynamically managed: Implied through ``management performance''
|
432 |
+
|
433 |
+
\item \textit{Hierarchical Reinforcement Learning for Optimal Agent Grouping in Cooperative Systems} \\
|
434 |
+
Hierarchical: Yes \\
|
435 |
+
Dynamically managed: Yes, through ``dynamic and optimal policy adjustments.''
|
436 |
+
|
437 |
+
\item \textit{Efficient Hierarchical Storage Management Framework Empowered by Reinforcement Learning} \\
|
438 |
+
Hierarchical: Yes \\
|
439 |
+
Cost-efficient local models: Implied through ``efficient'' \\
|
440 |
+
Dynamically managed: Yes, through ``dynamic migration policy''
|
441 |
+
|
442 |
+
\item \textit{Autonomous Deep Agent} \\
|
443 |
+
Hierarchical: Yes \\
|
444 |
+
Autonomous tool creation: Yes, via ``Autonomous API \& Tool Creation (AATC) system'' \\
|
445 |
+
Dynamically managed: Yes, ``dynamically decomposes high-level objectives''
|
446 |
+
|
447 |
+
\item \textit{Adaptive Network Intervention for Complex Systems: A Hierarchical Graph Reinforcement Learning Approach} \\
|
448 |
+
Hierarchical: Yes \\
|
449 |
+
Dynamically managed: Yes, through ``dynamic networks''
|
450 |
+
\end{itemize}
|
451 |
+
|
452 |
+
\textbf{ArxivTool Results (Query: ``multi-agent system autonomous tool creation economic model'')}
|
453 |
+
|
454 |
+
\begin{itemize}
|
455 |
+
\item \textit{Autonomics: an autonomous and intelligent economic platform and next generation money tool} \\
|
456 |
+
Autonomous tool creation: Implied \\
|
457 |
+
Internal economic model: Yes
|
458 |
+
|
459 |
+
\item \textit{From Single Agent to Multi-Agent: Improving Traffic Signal Control} \\
|
460 |
+
No relevant keywords mentioned.
|
461 |
+
|
462 |
+
\item \textit{Autonomous Deep Agent} \\
|
463 |
+
Hierarchical: Yes \\
|
464 |
+
Autonomous tool creation: Yes
|
465 |
+
|
466 |
+
\item \textit{Beyond the Sum: Unlocking AI Agents Potential Through Market Forces} \\
|
467 |
+
Internal economic model: Yes
|
468 |
+
|
469 |
+
\item \textit{Particle-Hole Creation in Condensed Matter: A Conceptual Framework for Modeling Money-Debt Dynamics in Economics} \\
|
470 |
+
Internal economic model: Yes
|
471 |
+
\end{itemize}
|
472 |
+
|
473 |
+
\end{assistantbox}
|
474 |
+
\caption{Continuation of Figure \ref{fig:novelty_assessment}. HASHIRU analyzing the summaries of the papers returned by the ArxivTool and GoogleSearchTool to identify which papers mention the key ideas in the user prompt.}
|
475 |
+
\end{minipage}
|
476 |
+
|
477 |
+
\end{figure}
|
478 |
+
|
479 |
\end{document}
|
requirements.txt
CHANGED
@@ -51,7 +51,7 @@ protobuf==5.29.4
|
|
51 |
pyarrow==20.0.0
|
52 |
pyasn1==0.6.1
|
53 |
pyasn1_modules==0.4.2
|
54 |
-
pydantic==2.
|
55 |
pydantic_core==2.33.1
|
56 |
pydub==0.25.1
|
57 |
Pygments==2.19.1
|
|
|
51 |
pyarrow==20.0.0
|
52 |
pyasn1==0.6.1
|
53 |
pyasn1_modules==0.4.2
|
54 |
+
pydantic==2.10.6
|
55 |
pydantic_core==2.33.1
|
56 |
pydub==0.25.1
|
57 |
Pygments==2.19.1
|
src/cost_benefit.py
CHANGED
@@ -4,35 +4,30 @@ import time
|
|
4 |
import requests
|
5 |
|
6 |
def detect_available_budget(runtime_env: str) -> int:
|
|
|
|
|
|
|
|
|
7 |
import torch
|
8 |
if "local" in runtime_env and torch.cuda.is_available():
|
9 |
total_vram_mb = torch.cuda.get_device_properties(0).total_memory // (1024 ** 2)
|
10 |
return min(total_vram_mb, 100)
|
11 |
-
|
12 |
-
return 100
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
17 |
static_costs = {
|
18 |
-
"llama3.2":
|
19 |
-
"mistral":
|
20 |
-
"gemini-2.0-flash":
|
21 |
-
"gemini-2.5-pro-preview-03-25": {"size": 80, "token_cost": 0.002, "tokens_sec": 45, "type": "api"}
|
22 |
}
|
23 |
|
24 |
-
def detect_available_budget(runtime_env: str) -> int:
|
25 |
-
import torch
|
26 |
-
if "local" in runtime_env and torch.cuda.is_available():
|
27 |
-
total_vram_mb = torch.cuda.get_device_properties(0).total_memory // (1024 ** 2)
|
28 |
-
return min(total_vram_mb, 100)
|
29 |
-
else:
|
30 |
-
return 100
|
31 |
-
|
32 |
budget = detect_available_budget(runtime_env)
|
33 |
-
|
34 |
-
best_model = None
|
35 |
-
best_speed = -1
|
36 |
|
37 |
for model, info in static_costs.items():
|
38 |
if info["size"] > budget:
|
@@ -42,19 +37,12 @@ def get_best_model(runtime_env: str, use_local_only=False, use_api_only=False) -
|
|
42 |
if use_api_only and info["type"] != "api":
|
43 |
continue
|
44 |
if info["tokens_sec"] > best_speed:
|
45 |
-
best_model = model
|
46 |
-
best_speed = info["tokens_sec"]
|
47 |
-
|
48 |
-
if not best_model:
|
49 |
-
return {
|
50 |
-
"model": "llama3.2",
|
51 |
-
"token_cost": static_costs["llama3.2"]["token_cost"],
|
52 |
-
"tokens_sec": static_costs["llama3.2"]["tokens_sec"],
|
53 |
-
"note": "Defaulted due to no models fitting filters"
|
54 |
-
}
|
55 |
|
|
|
56 |
return {
|
57 |
-
"model":
|
58 |
-
"token_cost": static_costs[
|
59 |
-
"tokens_sec": static_costs[
|
|
|
60 |
}
|
|
|
4 |
import requests
|
5 |
|
6 |
def detect_available_budget(runtime_env: str) -> int:
|
7 |
+
"""
|
8 |
+
Return an approximate VRAM‑based budget (MB) when running locally,
|
9 |
+
else default to 100.
|
10 |
+
"""
|
11 |
import torch
|
12 |
if "local" in runtime_env and torch.cuda.is_available():
|
13 |
total_vram_mb = torch.cuda.get_device_properties(0).total_memory // (1024 ** 2)
|
14 |
return min(total_vram_mb, 100)
|
15 |
+
return 100
|
|
|
16 |
|
17 |
+
def get_best_model(runtime_env: str, *, use_local_only: bool = False, use_api_only: bool = False) -> dict:
|
18 |
+
"""
|
19 |
+
Pick the fastest model that fits in the detected budget while
|
20 |
+
respecting the locality filters.
|
21 |
+
"""
|
22 |
static_costs = {
|
23 |
+
"llama3.2": {"size": 20, "token_cost": 0.0001, "tokens_sec": 30, "type": "local"},
|
24 |
+
"mistral": {"size": 40, "token_cost": 0.0002, "tokens_sec": 50, "type": "local"},
|
25 |
+
"gemini-2.0-flash": {"size": 60, "token_cost": 0.0005, "tokens_sec": 60, "type": "api"},
|
26 |
+
"gemini-2.5-pro-preview-03-25": {"size": 80, "token_cost": 0.002 , "tokens_sec": 45, "type": "api"},
|
27 |
}
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
budget = detect_available_budget(runtime_env)
|
30 |
+
best_model, best_speed = None, -1
|
|
|
|
|
31 |
|
32 |
for model, info in static_costs.items():
|
33 |
if info["size"] > budget:
|
|
|
37 |
if use_api_only and info["type"] != "api":
|
38 |
continue
|
39 |
if info["tokens_sec"] > best_speed:
|
40 |
+
best_model, best_speed = model, info["tokens_sec"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
chosen = best_model or "llama3.2" # sensible default
|
43 |
return {
|
44 |
+
"model": chosen,
|
45 |
+
"token_cost": static_costs[chosen]["token_cost"],
|
46 |
+
"tokens_sec": static_costs[chosen]["tokens_sec"],
|
47 |
+
"note": None if best_model else "Defaulted because no model met the constraints",
|
48 |
}
|
src/manager/agent_manager.py
CHANGED
@@ -8,6 +8,7 @@ from src.manager.utils.streamlit_interface import output_assistant_response
|
|
8 |
from google import genai
|
9 |
from google.genai import types
|
10 |
from google.genai.types import *
|
|
|
11 |
import os
|
12 |
from dotenv import load_dotenv
|
13 |
from src.manager.budget_manager import BudgetManager
|
@@ -15,75 +16,114 @@ from src.manager.budget_manager import BudgetManager
|
|
15 |
MODEL_PATH = "./src/models/"
|
16 |
MODEL_FILE_PATH = "./src/models/models.json"
|
17 |
|
|
|
18 |
class Agent(ABC):
|
19 |
-
|
20 |
-
def __init__(self, agent_name: str,
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
self.agent_name = agent_name
|
22 |
self.base_model = base_model
|
23 |
self.system_prompt = system_prompt
|
24 |
-
self.
|
25 |
-
self.
|
|
|
|
|
26 |
self.create_model()
|
27 |
-
|
28 |
@abstractmethod
|
29 |
def create_model(self) -> None:
|
30 |
"""Create and Initialize agent"""
|
31 |
pass
|
32 |
-
|
33 |
@abstractmethod
|
34 |
def ask_agent(self, prompt: str) -> str:
|
35 |
"""ask agent a question"""
|
36 |
pass
|
37 |
-
|
38 |
@abstractmethod
|
39 |
-
def delete_agent(self) ->None:
|
40 |
"""delete agent"""
|
41 |
pass
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
43 |
def get_costs(self):
|
44 |
return {
|
45 |
-
"
|
46 |
-
"
|
|
|
|
|
47 |
}
|
48 |
-
|
|
|
49 |
class OllamaAgent(Agent):
|
50 |
-
|
|
|
51 |
def create_model(self):
|
52 |
ollama_response = ollama.create(
|
53 |
-
model
|
54 |
-
from_
|
55 |
-
system
|
56 |
-
stream
|
57 |
)
|
58 |
-
|
59 |
def ask_agent(self, prompt):
|
60 |
output_assistant_response(f"Asked Agent {self.agent_name} a question")
|
61 |
agent_response = ollama.chat(
|
62 |
model=self.agent_name,
|
63 |
messages=[{"role": "user", "content": prompt}],
|
64 |
)
|
65 |
-
output_assistant_response(
|
|
|
66 |
return agent_response.message.content
|
67 |
-
|
68 |
def delete_agent(self):
|
69 |
ollama.delete(self.agent_name)
|
70 |
-
|
|
|
|
|
|
|
|
|
71 |
class GeminiAgent(Agent):
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
load_dotenv()
|
74 |
self.api_key = os.getenv("GEMINI_KEY")
|
75 |
if not self.api_key:
|
76 |
-
raise ValueError(
|
77 |
-
|
|
|
78 |
# Initialize the Gemini API
|
79 |
self.client = genai.Client(api_key=self.api_key)
|
80 |
-
|
81 |
# Call parent constructor after API setup
|
82 |
-
super().__init__(agent_name,
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
def create_model(self):
|
85 |
self.messages = []
|
86 |
-
|
87 |
def ask_agent(self, prompt):
|
88 |
response = self.client.models.generate_content(
|
89 |
model=self.base_model,
|
@@ -93,70 +133,175 @@ class GeminiAgent(Agent):
|
|
93 |
)
|
94 |
)
|
95 |
return response.text
|
96 |
-
|
97 |
def delete_agent(self):
|
98 |
self.messages = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
@singleton
|
100 |
class AgentManager():
|
101 |
budget_manager: BudgetManager = BudgetManager()
|
|
|
|
|
|
|
|
|
102 |
def __init__(self):
|
103 |
self._agents: Dict[str, Agent] = {}
|
104 |
-
self._agent_types ={
|
105 |
"ollama": OllamaAgent,
|
106 |
-
"gemini": GeminiAgent
|
|
|
107 |
}
|
108 |
-
|
109 |
self._load_agents()
|
110 |
-
|
111 |
-
def
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
if agent_name in self._agents:
|
118 |
raise ValueError(f"Agent {agent_name} already exists")
|
119 |
-
|
120 |
self._agents[agent_name] = self.create_agent_class(
|
121 |
-
agent_name,
|
122 |
-
base_model,
|
123 |
-
system_prompt,
|
124 |
description=description,
|
125 |
-
|
126 |
-
|
|
|
|
|
127 |
**additional_params # For any future parameters we might want to add
|
128 |
)
|
129 |
-
|
130 |
-
#save agent to file
|
131 |
self._save_agent(
|
132 |
-
agent_name,
|
133 |
-
base_model,
|
134 |
-
system_prompt,
|
135 |
description=description,
|
136 |
-
|
137 |
-
|
|
|
|
|
138 |
**additional_params # For any future parameters we might want to add
|
139 |
)
|
140 |
-
return (self._agents[agent_name],
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
agent_type = self._get_agent_type(base_model)
|
150 |
agent_class = self._agent_types.get(agent_type)
|
151 |
-
|
152 |
if not agent_class:
|
153 |
raise ValueError(f"Unsupported base model {base_model}")
|
154 |
-
|
155 |
-
created_agent = agent_class(agent_name,
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
# create agent
|
161 |
return created_agent
|
162 |
|
@@ -165,21 +310,23 @@ class AgentManager():
|
|
165 |
if agent_name not in self._agents:
|
166 |
raise ValueError(f"Agent {agent_name} does not exists")
|
167 |
return self._agents[agent_name]
|
168 |
-
|
169 |
def list_agents(self) -> dict:
|
170 |
"""Return agent information (name, description, costs)"""
|
171 |
try:
|
172 |
if os.path.exists(MODEL_FILE_PATH):
|
173 |
with open(MODEL_FILE_PATH, "r", encoding="utf8") as f:
|
174 |
full_models = json.loads(f.read())
|
175 |
-
|
176 |
# Create a simplified version with only the description and costs
|
177 |
simplified_agents = {}
|
178 |
for name, data in full_models.items():
|
179 |
simplified_agents[name] = {
|
180 |
"description": data.get("description", ""),
|
181 |
-
"
|
182 |
-
"
|
|
|
|
|
183 |
"base_model": data.get("base_model", ""),
|
184 |
}
|
185 |
return simplified_agents
|
@@ -188,116 +335,154 @@ class AgentManager():
|
|
188 |
except Exception as e:
|
189 |
output_assistant_response(f"Error listing agents: {e}")
|
190 |
return {}
|
191 |
-
|
192 |
def delete_agent(self, agent_name: str) -> int:
|
193 |
-
agent = self.get_agent(agent_name)
|
194 |
-
|
195 |
-
self.budget_manager.
|
|
|
196 |
agent.delete_agent()
|
197 |
-
|
198 |
del self._agents[agent_name]
|
199 |
try:
|
200 |
if os.path.exists(MODEL_FILE_PATH):
|
201 |
with open(MODEL_FILE_PATH, "r", encoding="utf8") as f:
|
202 |
models = json.loads(f.read())
|
203 |
-
|
204 |
del models[agent_name]
|
205 |
with open(MODEL_FILE_PATH, "w", encoding="utf8") as f:
|
206 |
f.write(json.dumps(models, indent=4))
|
207 |
except Exception as e:
|
208 |
output_assistant_response(f"Error deleting agent: {e}")
|
209 |
-
return self.budget_manager.
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
**additional_params) -> None:
|
222 |
"""Save a single agent to the models.json file"""
|
223 |
try:
|
224 |
# Ensure the directory exists
|
225 |
os.makedirs(MODEL_PATH, exist_ok=True)
|
226 |
-
|
227 |
# Read existing models file or create empty dict if it doesn't exist
|
228 |
try:
|
229 |
with open(MODEL_FILE_PATH, "r", encoding="utf8") as f:
|
230 |
models = json.loads(f.read())
|
231 |
except (FileNotFoundError, json.JSONDecodeError):
|
232 |
models = {}
|
233 |
-
|
234 |
# Update the models dict with the new agent
|
235 |
models[agent_name] = {
|
236 |
"base_model": base_model,
|
237 |
"description": description,
|
238 |
"system_prompt": system_prompt,
|
239 |
-
"
|
240 |
-
"
|
|
|
|
|
241 |
}
|
242 |
-
|
243 |
# Add any additional parameters that were passed
|
244 |
for key, value in additional_params.items():
|
245 |
models[agent_name][key] = value
|
246 |
-
|
247 |
# Write the updated models back to the file
|
248 |
with open(MODEL_FILE_PATH, "w", encoding="utf8") as f:
|
249 |
f.write(json.dumps(models, indent=4))
|
250 |
-
|
251 |
except Exception as e:
|
252 |
output_assistant_response(f"Error saving agent {agent_name}: {e}")
|
253 |
|
254 |
-
def _get_agent_type(self, base_model)->str:
|
255 |
|
256 |
if base_model == "llama3.2":
|
257 |
return "ollama"
|
258 |
elif base_model == "mistral":
|
259 |
return "ollama"
|
|
|
|
|
260 |
elif "gemini" in base_model:
|
261 |
return "gemini"
|
|
|
|
|
262 |
else:
|
263 |
return "unknown"
|
264 |
-
|
265 |
def _load_agents(self) -> None:
|
266 |
"""Load agent configurations from disk"""
|
267 |
try:
|
268 |
if not os.path.exists(MODEL_FILE_PATH):
|
269 |
return
|
270 |
-
|
271 |
with open(MODEL_FILE_PATH, "r", encoding="utf8") as f:
|
272 |
models = json.loads(f.read())
|
273 |
-
|
274 |
for name, data in models.items():
|
275 |
if name in self._agents:
|
276 |
continue
|
277 |
base_model = data["base_model"]
|
278 |
system_prompt = data["system_prompt"]
|
279 |
-
|
280 |
-
|
|
|
|
|
281 |
model_type = self._get_agent_type(base_model)
|
282 |
manager_class = self._agent_types.get(model_type)
|
283 |
-
|
284 |
if manager_class:
|
285 |
# Create the agent with the appropriate manager class
|
286 |
self._agents[name] = self.create_agent_class(
|
287 |
-
name,
|
288 |
-
base_model,
|
289 |
-
system_prompt,
|
290 |
description=data.get("description", ""),
|
291 |
-
|
292 |
-
|
|
|
|
|
293 |
**data.get("additional_params", {})
|
294 |
)
|
295 |
self._agents[name] = manager_class(
|
296 |
-
name,
|
297 |
base_model,
|
298 |
system_prompt,
|
299 |
-
|
300 |
-
|
|
|
|
|
301 |
)
|
302 |
except Exception as e:
|
303 |
-
output_assistant_response(f"Error loading agents: {e}")
|
|
|
8 |
from google import genai
|
9 |
from google.genai import types
|
10 |
from google.genai.types import *
|
11 |
+
from groq import Groq
|
12 |
import os
|
13 |
from dotenv import load_dotenv
|
14 |
from src.manager.budget_manager import BudgetManager
|
|
|
16 |
MODEL_PATH = "./src/models/"
|
17 |
MODEL_FILE_PATH = "./src/models/models.json"
|
18 |
|
19 |
+
|
20 |
class Agent(ABC):
|
21 |
+
|
22 |
+
def __init__(self, agent_name: str,
|
23 |
+
base_model: str,
|
24 |
+
system_prompt: str,
|
25 |
+
create_resource_cost: int,
|
26 |
+
invoke_resource_cost: int,
|
27 |
+
create_expense_cost: int = 0,
|
28 |
+
invoke_expense_cost: int = 0,):
|
29 |
self.agent_name = agent_name
|
30 |
self.base_model = base_model
|
31 |
self.system_prompt = system_prompt
|
32 |
+
self.create_resource_cost = create_resource_cost
|
33 |
+
self.invoke_resource_cost = invoke_resource_cost
|
34 |
+
self.create_expense_cost = create_expense_cost
|
35 |
+
self.invoke_expense_cost = invoke_expense_cost
|
36 |
self.create_model()
|
37 |
+
|
38 |
@abstractmethod
|
39 |
def create_model(self) -> None:
|
40 |
"""Create and Initialize agent"""
|
41 |
pass
|
42 |
+
|
43 |
@abstractmethod
|
44 |
def ask_agent(self, prompt: str) -> str:
|
45 |
"""ask agent a question"""
|
46 |
pass
|
47 |
+
|
48 |
@abstractmethod
|
49 |
+
def delete_agent(self) -> None:
|
50 |
"""delete agent"""
|
51 |
pass
|
52 |
+
|
53 |
+
@abstractmethod
|
54 |
+
def get_type(self) -> None:
|
55 |
+
"""get agent type"""
|
56 |
+
pass
|
57 |
+
|
58 |
def get_costs(self):
|
59 |
return {
|
60 |
+
"create_resource_cost": self.create_resource_cost,
|
61 |
+
"invoke_resource_cost": self.invoke_resource_cost,
|
62 |
+
"create_expense_cost": self.create_expense_cost,
|
63 |
+
"invoke_expense_cost": self.invoke_expense_cost
|
64 |
}
|
65 |
+
|
66 |
+
|
67 |
class OllamaAgent(Agent):
|
68 |
+
type = "local"
|
69 |
+
|
70 |
def create_model(self):
|
71 |
ollama_response = ollama.create(
|
72 |
+
model=self.agent_name,
|
73 |
+
from_=self.base_model,
|
74 |
+
system=self.system_prompt,
|
75 |
+
stream=False
|
76 |
)
|
77 |
+
|
78 |
def ask_agent(self, prompt):
|
79 |
output_assistant_response(f"Asked Agent {self.agent_name} a question")
|
80 |
agent_response = ollama.chat(
|
81 |
model=self.agent_name,
|
82 |
messages=[{"role": "user", "content": prompt}],
|
83 |
)
|
84 |
+
output_assistant_response(
|
85 |
+
f"Agent {self.agent_name} answered with {agent_response.message.content}")
|
86 |
return agent_response.message.content
|
87 |
+
|
88 |
def delete_agent(self):
|
89 |
ollama.delete(self.agent_name)
|
90 |
+
|
91 |
+
def get_type(self):
|
92 |
+
return self.type
|
93 |
+
|
94 |
+
|
95 |
class GeminiAgent(Agent):
|
96 |
+
type = "cloud"
|
97 |
+
|
98 |
+
def __init__(self,
|
99 |
+
agent_name: str,
|
100 |
+
base_model: str,
|
101 |
+
system_prompt: str,
|
102 |
+
create_resource_cost: int,
|
103 |
+
invoke_resource_cost: int,
|
104 |
+
create_expense_cost: int = 0,
|
105 |
+
invoke_expense_cost: int = 0,):
|
106 |
load_dotenv()
|
107 |
self.api_key = os.getenv("GEMINI_KEY")
|
108 |
if not self.api_key:
|
109 |
+
raise ValueError(
|
110 |
+
"Google API key is required for Gemini models. Set GOOGLE_API_KEY environment variable or pass api_key parameter.")
|
111 |
+
|
112 |
# Initialize the Gemini API
|
113 |
self.client = genai.Client(api_key=self.api_key)
|
114 |
+
|
115 |
# Call parent constructor after API setup
|
116 |
+
super().__init__(agent_name,
|
117 |
+
base_model,
|
118 |
+
system_prompt,
|
119 |
+
create_resource_cost,
|
120 |
+
invoke_resource_cost,
|
121 |
+
create_expense_cost,
|
122 |
+
invoke_expense_cost)
|
123 |
|
124 |
def create_model(self):
|
125 |
self.messages = []
|
126 |
+
|
127 |
def ask_agent(self, prompt):
|
128 |
response = self.client.models.generate_content(
|
129 |
model=self.base_model,
|
|
|
133 |
)
|
134 |
)
|
135 |
return response.text
|
136 |
+
|
137 |
def delete_agent(self):
|
138 |
self.messages = []
|
139 |
+
|
140 |
+
def get_type(self):
|
141 |
+
return self.type
|
142 |
+
|
143 |
+
|
144 |
+
class GroqAgent(Agent):
|
145 |
+
type = "cloud"
|
146 |
+
|
147 |
+
def __init__(
|
148 |
+
self,
|
149 |
+
agent_name: str,
|
150 |
+
base_model: str = "llama-3.3-70b-versatile",
|
151 |
+
system_prompt: str = "system.prompt",
|
152 |
+
):
|
153 |
+
self.agent_name = agent_name
|
154 |
+
self.base_model = base_model
|
155 |
+
# load API key from environment
|
156 |
+
api_key = os.getenv("GROQ_API_KEY")
|
157 |
+
self.client = Groq(api_key=api_key)
|
158 |
+
# read system prompt content
|
159 |
+
with open(system_prompt, 'r') as f:
|
160 |
+
self.system_instruction = f.read()
|
161 |
+
|
162 |
+
def create_model(self) -> None:
|
163 |
+
# Groq models are available by name; no creation step
|
164 |
+
pass
|
165 |
+
|
166 |
+
def ask_agent(self, prompt: str) -> str:
|
167 |
+
messages = [
|
168 |
+
{"role": "system", "content": self.system_instruction},
|
169 |
+
{"role": "user", "content": prompt},
|
170 |
+
]
|
171 |
+
response = self.client.chat.completions.create(
|
172 |
+
messages=messages,
|
173 |
+
model=self.base_model,
|
174 |
+
)
|
175 |
+
result = response.choices[0].message.content
|
176 |
+
print(result)
|
177 |
+
return result
|
178 |
+
|
179 |
+
def delete_agent(self) -> None:
|
180 |
+
# No delete support for Groq
|
181 |
+
pass
|
182 |
+
|
183 |
+
def get_type(self):
|
184 |
+
return self.type
|
185 |
+
|
186 |
+
|
187 |
@singleton
|
188 |
class AgentManager():
|
189 |
budget_manager: BudgetManager = BudgetManager()
|
190 |
+
is_creation_enabled: bool = True
|
191 |
+
is_cloud_invocation_enabled: bool = True
|
192 |
+
is_local_invocation_enabled: bool = True
|
193 |
+
|
194 |
def __init__(self):
|
195 |
self._agents: Dict[str, Agent] = {}
|
196 |
+
self._agent_types = {
|
197 |
"ollama": OllamaAgent,
|
198 |
+
"gemini": GeminiAgent,
|
199 |
+
"groq": GroqAgent,
|
200 |
}
|
201 |
+
|
202 |
self._load_agents()
|
203 |
+
|
204 |
+
def set_creation_mode(self, status: bool):
|
205 |
+
self.is_creation_enabled = status
|
206 |
+
if status:
|
207 |
+
output_assistant_response("Agent creation mode is enabled.")
|
208 |
+
else:
|
209 |
+
output_assistant_response("Agent creation mode is disabled.")
|
210 |
+
|
211 |
+
def set_cloud_invocation_mode(self, status: bool):
|
212 |
+
self.is_cloud_invocation_enabled = status
|
213 |
+
if status:
|
214 |
+
output_assistant_response("Cloud invocation mode is enabled.")
|
215 |
+
else:
|
216 |
+
output_assistant_response("Cloud invocation mode is disabled.")
|
217 |
+
|
218 |
+
def set_local_invocation_mode(self, status: bool):
|
219 |
+
self.is_local_invocation_enabled = status
|
220 |
+
if status:
|
221 |
+
output_assistant_response("Local invocation mode is enabled.")
|
222 |
+
else:
|
223 |
+
output_assistant_response("Local invocation mode is disabled.")
|
224 |
+
|
225 |
+
def create_agent(self, agent_name: str,
|
226 |
+
base_model: str, system_prompt: str,
|
227 |
+
description: str = "", create_resource_cost: float = 0,
|
228 |
+
invoke_resource_cost: float = 0,
|
229 |
+
create_expense_cost: float = 0,
|
230 |
+
invoke_expense_cost: float = 0,
|
231 |
+
**additional_params) -> Tuple[Agent, int]:
|
232 |
+
if not self.is_creation_enabled:
|
233 |
+
raise ValueError("Agent creation mode is disabled.")
|
234 |
+
|
235 |
if agent_name in self._agents:
|
236 |
raise ValueError(f"Agent {agent_name} already exists")
|
237 |
+
|
238 |
self._agents[agent_name] = self.create_agent_class(
|
239 |
+
agent_name,
|
240 |
+
base_model,
|
241 |
+
system_prompt,
|
242 |
description=description,
|
243 |
+
create_resource_cost=create_resource_cost,
|
244 |
+
invoke_resource_cost=invoke_resource_cost,
|
245 |
+
create_expense_cost=create_expense_cost,
|
246 |
+
invoke_expense_cost=invoke_expense_cost,
|
247 |
**additional_params # For any future parameters we might want to add
|
248 |
)
|
249 |
+
|
250 |
+
# save agent to file
|
251 |
self._save_agent(
|
252 |
+
agent_name,
|
253 |
+
base_model,
|
254 |
+
system_prompt,
|
255 |
description=description,
|
256 |
+
create_resource_cost=create_resource_cost,
|
257 |
+
invoke_resource_cost=invoke_resource_cost,
|
258 |
+
create_expense_cost=create_expense_cost,
|
259 |
+
invoke_expense_cost=invoke_expense_cost,
|
260 |
**additional_params # For any future parameters we might want to add
|
261 |
)
|
262 |
+
return (self._agents[agent_name],
|
263 |
+
self.budget_manager.get_current_remaining_resource_budget(),
|
264 |
+
self.budget_manager.get_current_remaining_expense_budget())
|
265 |
+
|
266 |
+
def validate_budget(self,
|
267 |
+
resource_cost: float = 0,
|
268 |
+
expense_cost: float = 0) -> None:
|
269 |
+
if not self.budget_manager.can_spend_resource(resource_cost):
|
270 |
+
raise ValueError(f"Do not have enough resource budget to create/use the agent. "
|
271 |
+
+ f"Creating/Using the agent costs {resource_cost} but only {self.budget_manager.get_current_remaining_resource_budget()} is remaining")
|
272 |
+
if not self.budget_manager.can_spend_expense(expense_cost):
|
273 |
+
raise ValueError(f"Do not have enough expense budget to create/use the agent. "
|
274 |
+
+ f"Creating/Using the agent costs {expense_cost} but only {self.budget_manager.get_current_remaining_expense_budget()} is remaining")
|
275 |
+
|
276 |
+
def create_agent_class(self,
|
277 |
+
agent_name: str,
|
278 |
+
base_model: str,
|
279 |
+
system_prompt: str,
|
280 |
+
description: str = "",
|
281 |
+
create_resource_cost: float = 0,
|
282 |
+
invoke_resource_cost: float = 0,
|
283 |
+
create_expense_cost: float = 0,
|
284 |
+
invoke_expense_cost: float = 0,
|
285 |
+
**additional_params) -> Agent:
|
286 |
agent_type = self._get_agent_type(base_model)
|
287 |
agent_class = self._agent_types.get(agent_type)
|
288 |
+
|
289 |
if not agent_class:
|
290 |
raise ValueError(f"Unsupported base model {base_model}")
|
291 |
+
|
292 |
+
created_agent = agent_class(agent_name,
|
293 |
+
base_model,
|
294 |
+
system_prompt,
|
295 |
+
create_resource_cost,
|
296 |
+
invoke_resource_cost,
|
297 |
+
create_expense_cost,
|
298 |
+
invoke_expense_cost,)
|
299 |
+
|
300 |
+
self.validate_budget(create_resource_cost,
|
301 |
+
create_expense_cost)
|
302 |
+
|
303 |
+
self.budget_manager.add_to_resource_budget(create_resource_cost)
|
304 |
+
self.budget_manager.add_to_expense_budget(create_expense_cost)
|
305 |
# create agent
|
306 |
return created_agent
|
307 |
|
|
|
310 |
if agent_name not in self._agents:
|
311 |
raise ValueError(f"Agent {agent_name} does not exists")
|
312 |
return self._agents[agent_name]
|
313 |
+
|
314 |
def list_agents(self) -> dict:
|
315 |
"""Return agent information (name, description, costs)"""
|
316 |
try:
|
317 |
if os.path.exists(MODEL_FILE_PATH):
|
318 |
with open(MODEL_FILE_PATH, "r", encoding="utf8") as f:
|
319 |
full_models = json.loads(f.read())
|
320 |
+
|
321 |
# Create a simplified version with only the description and costs
|
322 |
simplified_agents = {}
|
323 |
for name, data in full_models.items():
|
324 |
simplified_agents[name] = {
|
325 |
"description": data.get("description", ""),
|
326 |
+
"create_resource_cost": data.get("create_resource_cost", 0),
|
327 |
+
"invoke_resource_cost": data.get("invoke_resource_cost", 0),
|
328 |
+
"create_expense_cost": data.get("create_expense_cost", 0),
|
329 |
+
"invoke_expense_cost": data.get("invoke_expense_cost", 0),
|
330 |
"base_model": data.get("base_model", ""),
|
331 |
}
|
332 |
return simplified_agents
|
|
|
335 |
except Exception as e:
|
336 |
output_assistant_response(f"Error listing agents: {e}")
|
337 |
return {}
|
338 |
+
|
339 |
def delete_agent(self, agent_name: str) -> int:
|
340 |
+
agent: Agent = self.get_agent(agent_name)
|
341 |
+
|
342 |
+
self.budget_manager.remove_from_resource_expense(
|
343 |
+
agent.create_resource_cost)
|
344 |
agent.delete_agent()
|
345 |
+
|
346 |
del self._agents[agent_name]
|
347 |
try:
|
348 |
if os.path.exists(MODEL_FILE_PATH):
|
349 |
with open(MODEL_FILE_PATH, "r", encoding="utf8") as f:
|
350 |
models = json.loads(f.read())
|
351 |
+
|
352 |
del models[agent_name]
|
353 |
with open(MODEL_FILE_PATH, "w", encoding="utf8") as f:
|
354 |
f.write(json.dumps(models, indent=4))
|
355 |
except Exception as e:
|
356 |
output_assistant_response(f"Error deleting agent: {e}")
|
357 |
+
return (self.budget_manager.get_current_remaining_resource_budget(),
|
358 |
+
self.budget_manager.get_current_remaining_expense_budget())
|
359 |
+
|
360 |
+
def ask_agent(self, agent_name: str, prompt: str) -> Tuple[str, int]:
|
361 |
+
agent: Agent = self.get_agent(agent_name)
|
362 |
+
print(agent.get_type())
|
363 |
+
print(agent_name)
|
364 |
+
print(self.is_local_invocation_enabled,
|
365 |
+
self.is_cloud_invocation_enabled)
|
366 |
+
if not self.is_local_invocation_enabled and agent.get_type() == "local":
|
367 |
+
raise ValueError("Local invocation mode is disabled.")
|
368 |
+
|
369 |
+
if not self.is_cloud_invocation_enabled and agent.get_type() == "cloud":
|
370 |
+
raise ValueError("Cloud invocation mode is disabled.")
|
371 |
+
|
372 |
+
n_tokens = len(prompt.split())/1000
|
373 |
+
|
374 |
+
self.validate_budget(agent.invoke_resource_cost,
|
375 |
+
agent.invoke_expense_cost*n_tokens)
|
376 |
+
|
377 |
+
self.budget_manager.add_to_expense_budget(
|
378 |
+
agent.invoke_expense_cost*n_tokens)
|
379 |
+
|
380 |
+
response = agent.ask_agent(prompt)
|
381 |
+
return (response,
|
382 |
+
self.budget_manager.get_current_remaining_resource_budget(),
|
383 |
+
self.budget_manager.get_current_remaining_expense_budget())
|
384 |
+
|
385 |
+
def _save_agent(self,
|
386 |
+
agent_name: str,
|
387 |
+
base_model: str,
|
388 |
+
system_prompt: str,
|
389 |
+
description: str = "",
|
390 |
+
create_resource_cost: float = 0,
|
391 |
+
invoke_resource_cost: float = 0,
|
392 |
+
create_expense_cost: float = 0,
|
393 |
+
invoke_expense_cost: float = 0,
|
394 |
**additional_params) -> None:
|
395 |
"""Save a single agent to the models.json file"""
|
396 |
try:
|
397 |
# Ensure the directory exists
|
398 |
os.makedirs(MODEL_PATH, exist_ok=True)
|
399 |
+
|
400 |
# Read existing models file or create empty dict if it doesn't exist
|
401 |
try:
|
402 |
with open(MODEL_FILE_PATH, "r", encoding="utf8") as f:
|
403 |
models = json.loads(f.read())
|
404 |
except (FileNotFoundError, json.JSONDecodeError):
|
405 |
models = {}
|
406 |
+
|
407 |
# Update the models dict with the new agent
|
408 |
models[agent_name] = {
|
409 |
"base_model": base_model,
|
410 |
"description": description,
|
411 |
"system_prompt": system_prompt,
|
412 |
+
"create_resource_cost": create_resource_cost,
|
413 |
+
"invoke_resource_cost": invoke_resource_cost,
|
414 |
+
"create_expense_cost": create_expense_cost,
|
415 |
+
"invoke_expense_cost": invoke_expense_cost,
|
416 |
}
|
417 |
+
|
418 |
# Add any additional parameters that were passed
|
419 |
for key, value in additional_params.items():
|
420 |
models[agent_name][key] = value
|
421 |
+
|
422 |
# Write the updated models back to the file
|
423 |
with open(MODEL_FILE_PATH, "w", encoding="utf8") as f:
|
424 |
f.write(json.dumps(models, indent=4))
|
425 |
+
|
426 |
except Exception as e:
|
427 |
output_assistant_response(f"Error saving agent {agent_name}: {e}")
|
428 |
|
429 |
+
def _get_agent_type(self, base_model) -> str:
|
430 |
|
431 |
if base_model == "llama3.2":
|
432 |
return "ollama"
|
433 |
elif base_model == "mistral":
|
434 |
return "ollama"
|
435 |
+
elif base_model == "deepseek-r1":
|
436 |
+
return "ollama"
|
437 |
elif "gemini" in base_model:
|
438 |
return "gemini"
|
439 |
+
elif "groq" in base_model:
|
440 |
+
return "groq"
|
441 |
else:
|
442 |
return "unknown"
|
443 |
+
|
444 |
def _load_agents(self) -> None:
|
445 |
"""Load agent configurations from disk"""
|
446 |
try:
|
447 |
if not os.path.exists(MODEL_FILE_PATH):
|
448 |
return
|
449 |
+
|
450 |
with open(MODEL_FILE_PATH, "r", encoding="utf8") as f:
|
451 |
models = json.loads(f.read())
|
452 |
+
|
453 |
for name, data in models.items():
|
454 |
if name in self._agents:
|
455 |
continue
|
456 |
base_model = data["base_model"]
|
457 |
system_prompt = data["system_prompt"]
|
458 |
+
create_resource_cost = data.get("create_resource_cost", 0)
|
459 |
+
invoke_resource_cost = data.get("invoke_resource_cost", 0)
|
460 |
+
create_expense_cost = data.get("create_expense_cost", 0)
|
461 |
+
invoke_expense_cost = data.get("invoke_expense_cost", 0)
|
462 |
model_type = self._get_agent_type(base_model)
|
463 |
manager_class = self._agent_types.get(model_type)
|
464 |
+
|
465 |
if manager_class:
|
466 |
# Create the agent with the appropriate manager class
|
467 |
self._agents[name] = self.create_agent_class(
|
468 |
+
name,
|
469 |
+
base_model,
|
470 |
+
system_prompt,
|
471 |
description=data.get("description", ""),
|
472 |
+
create_resource_cost=create_resource_cost,
|
473 |
+
invoke_resource_cost=invoke_resource_cost,
|
474 |
+
create_expense_cost=create_expense_cost,
|
475 |
+
invoke_expense_cost=invoke_expense_cost,
|
476 |
**data.get("additional_params", {})
|
477 |
)
|
478 |
self._agents[name] = manager_class(
|
479 |
+
name,
|
480 |
base_model,
|
481 |
system_prompt,
|
482 |
+
create_resource_cost,
|
483 |
+
invoke_resource_cost,
|
484 |
+
create_expense_cost,
|
485 |
+
invoke_expense_cost,
|
486 |
)
|
487 |
except Exception as e:
|
488 |
+
output_assistant_response(f"Error loading agents: {e}")
|
src/manager/budget_manager.py
CHANGED
@@ -4,48 +4,96 @@ import psutil
|
|
4 |
|
5 |
@singleton
|
6 |
class BudgetManager():
|
7 |
-
|
|
|
|
|
8 |
current_expense = 0
|
9 |
is_budget_initialized = False
|
|
|
|
|
|
|
10 |
def __init__(self):
|
11 |
if not self.is_budget_initialized:
|
12 |
-
self.
|
13 |
self.is_budget_initialized = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
def
|
16 |
total_mem = 0
|
|
|
|
|
17 |
if torch.cuda.is_available():
|
18 |
gpu_index = torch.cuda.current_device()
|
19 |
gpu_name = torch.cuda.get_device_name(gpu_index)
|
20 |
total_vram = torch.cuda.get_device_properties(gpu_index).total_memory
|
21 |
-
|
22 |
print(f"GPU detected: {gpu_name}")
|
23 |
-
print(f"Total VRAM: {
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
return round((total_mem / 16) * 100)
|
30 |
|
31 |
-
def
|
32 |
-
return self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
def get_current_expense(self):
|
35 |
return self.current_expense
|
36 |
|
37 |
-
def
|
38 |
-
return self.
|
39 |
|
40 |
-
def
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
self.
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
self.current_expense -= cost
|
|
|
4 |
|
5 |
@singleton
|
6 |
class BudgetManager():
|
7 |
+
total_resource_budget = 100
|
8 |
+
current_resource_usage = 0
|
9 |
+
total_expense_budget = 10
|
10 |
current_expense = 0
|
11 |
is_budget_initialized = False
|
12 |
+
is_resource_budget_enabled = True
|
13 |
+
is_expense_budget_enabled = True
|
14 |
+
|
15 |
def __init__(self):
|
16 |
if not self.is_budget_initialized:
|
17 |
+
self.total_resource_budget = self.calculate_total_budget()
|
18 |
self.is_budget_initialized = True
|
19 |
+
|
20 |
+
def set_resource_budget_status(self, status: bool):
|
21 |
+
self.is_enabled = status
|
22 |
+
if status:
|
23 |
+
print("Budget manager is enabled.")
|
24 |
+
else:
|
25 |
+
print("Budget manager is disabled.")
|
26 |
+
|
27 |
+
def set_expense_budget_status(self, status: bool):
|
28 |
+
self.is_expense_budget_enabled = status
|
29 |
+
if status:
|
30 |
+
print("Expense budget manager is enabled.")
|
31 |
+
else:
|
32 |
+
print("Expense budget manager is disabled.")
|
33 |
|
34 |
+
def calculate_total_budget(self)-> int:
|
35 |
total_mem = 0
|
36 |
+
gpu_mem = 0
|
37 |
+
ram_mem = 0
|
38 |
if torch.cuda.is_available():
|
39 |
gpu_index = torch.cuda.current_device()
|
40 |
gpu_name = torch.cuda.get_device_name(gpu_index)
|
41 |
total_vram = torch.cuda.get_device_properties(gpu_index).total_memory
|
42 |
+
gpu_mem = total_vram /1024 ** 3
|
43 |
print(f"GPU detected: {gpu_name}")
|
44 |
+
print(f"Total VRAM: {gpu_mem:.2f} GB")
|
45 |
+
mem = psutil.virtual_memory()
|
46 |
+
ram_mem = mem.total/ 1024 ** 3
|
47 |
+
print("No GPU detected. Using CPU.")
|
48 |
+
print(f"Total RAM: {ram_mem:.2f} GB")
|
49 |
+
total_mem = gpu_mem + ram_mem
|
50 |
return round((total_mem / 16) * 100)
|
51 |
|
52 |
+
def get_total_resource_budget(self):
|
53 |
+
return self.total_resource_budget
|
54 |
+
|
55 |
+
def get_current_resource_usage(self):
|
56 |
+
return self.current_resource_usage
|
57 |
+
|
58 |
+
def get_current_remaining_resource_budget(self):
|
59 |
+
return self.total_resource_budget - self.current_resource_usage
|
60 |
+
|
61 |
+
def can_spend_resource(self, cost):
|
62 |
+
if not self.is_resource_budget_enabled:
|
63 |
+
return True
|
64 |
+
return True if self.current_resource_usage + cost <= self.total_resource_budget else False
|
65 |
+
|
66 |
+
def add_to_resource_budget(self, cost):
|
67 |
+
if not self.is_resource_budget_enabled:
|
68 |
+
return
|
69 |
+
if not self.can_spend_resource(cost):
|
70 |
+
raise Exception("No resource budget remaining")
|
71 |
+
self.current_resource_usage += cost
|
72 |
+
|
73 |
+
def remove_from_resource_expense(self, cost):
|
74 |
+
if not self.is_resource_budget_enabled:
|
75 |
+
return
|
76 |
+
if self.current_resource_usage - cost < 0:
|
77 |
+
raise Exception("Not enough resource budget to remove")
|
78 |
+
self.current_resource_usage -= cost
|
79 |
+
|
80 |
+
def get_total_expense_budget(self):
|
81 |
+
return self.total_expense_budget
|
82 |
|
83 |
def get_current_expense(self):
|
84 |
return self.current_expense
|
85 |
|
86 |
+
def get_current_remaining_expense_budget(self):
|
87 |
+
return self.total_expense_budget - self.current_expense
|
88 |
|
89 |
+
def can_spend_expense(self, cost):
|
90 |
+
if not self.is_expense_budget_enabled:
|
91 |
+
return True
|
92 |
+
return True if self.current_expense + cost <= self.total_expense_budget else False
|
93 |
+
|
94 |
+
def add_to_expense_budget(self, cost):
|
95 |
+
if not self.is_expense_budget_enabled:
|
96 |
+
return
|
97 |
+
if not self.can_spend_expense(cost):
|
98 |
+
raise Exception("No expense budget remaining")
|
99 |
+
self.current_expense += cost
|
|
src/manager/llm_models.py
CHANGED
@@ -5,6 +5,7 @@ from pathlib import Path
|
|
5 |
from google import genai
|
6 |
from google.genai import types
|
7 |
from mistralai import Mistral
|
|
|
8 |
from src.manager.utils.streamlit_interface import output_assistant_response
|
9 |
|
10 |
|
@@ -136,3 +137,42 @@ class MistralModelManager(AbstractModelManager):
|
|
136 |
def delete(self):
|
137 |
# Implement model deletion logic (if applicable)
|
138 |
self.model = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from google import genai
|
6 |
from google.genai import types
|
7 |
from mistralai import Mistral
|
8 |
+
from groq import Groq
|
9 |
from src.manager.utils.streamlit_interface import output_assistant_response
|
10 |
|
11 |
|
|
|
137 |
def delete(self):
|
138 |
# Implement model deletion logic (if applicable)
|
139 |
self.model = None
|
140 |
+
|
141 |
+
class GroqModelManager(AbstractModelManager):
|
142 |
+
def __init__(self, api_key, model_name="llama-3.3-70b-versatile", system_prompt_file="system.prompt"):
|
143 |
+
super().__init__(model_name, system_prompt_file)
|
144 |
+
self.client = Groq(api_key=api_key)
|
145 |
+
|
146 |
+
def is_model_loaded(self, model):
|
147 |
+
# Groq models are referenced by name; assume always available if name matches
|
148 |
+
return model == self.model_name
|
149 |
+
|
150 |
+
def create_model(self, base_model=None, context_window=4096, temperature=0):
|
151 |
+
# Groq does not require explicit creation; no-op
|
152 |
+
if not self.is_model_loaded(self.model_name):
|
153 |
+
output_assistant_response(f"Model {self.model_name} is not available on Groq.")
|
154 |
+
|
155 |
+
def request(self, prompt, temperature=0, context_window=4096):
|
156 |
+
# Read system instruction
|
157 |
+
with open(self.system_prompt_file, 'r') as f:
|
158 |
+
system_instruction = f.read()
|
159 |
+
|
160 |
+
# Build messages
|
161 |
+
messages = [
|
162 |
+
{"role": "system", "content": system_instruction},
|
163 |
+
{"role": "user", "content": prompt}
|
164 |
+
]
|
165 |
+
|
166 |
+
# Send request
|
167 |
+
response = self.client.chat.completions.create(
|
168 |
+
messages=messages,
|
169 |
+
model=self.model_name,
|
170 |
+
temperature=temperature
|
171 |
+
)
|
172 |
+
|
173 |
+
# Extract and return content
|
174 |
+
return response.choices[0].message.content
|
175 |
+
|
176 |
+
def delete(self):
|
177 |
+
# No deletion support for Groq-managed models
|
178 |
+
output_assistant_response(f"Deletion not supported for Groq model {self.model_name}.")
|
src/manager/manager.py
CHANGED
@@ -1,9 +1,13 @@
|
|
|
|
|
|
1 |
from google import genai
|
2 |
from google.genai import types
|
3 |
from google.genai.types import *
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
6 |
import sys
|
|
|
|
|
7 |
from src.manager.tool_manager import ToolManager
|
8 |
from src.manager.utils.suppress_outputs import suppress_output
|
9 |
import logging
|
@@ -11,6 +15,7 @@ import gradio as gr
|
|
11 |
from sentence_transformers import SentenceTransformer
|
12 |
import torch
|
13 |
from src.tools.default_tools.memory_manager import MemoryManager
|
|
|
14 |
|
15 |
logger = logging.getLogger(__name__)
|
16 |
handler = logging.StreamHandler(sys.stdout)
|
@@ -18,30 +23,59 @@ handler = logging.StreamHandler(sys.stdout)
|
|
18 |
logger.addHandler(handler)
|
19 |
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
class GeminiManager:
|
22 |
-
def __init__(self,
|
23 |
-
system_prompt_file="./src/models/system4.prompt",
|
24 |
gemini_model="gemini-2.5-pro-exp-03-25",
|
25 |
-
|
26 |
-
cloud_only=False, use_economy=True):
|
27 |
load_dotenv()
|
28 |
-
self.
|
29 |
-
|
30 |
-
|
31 |
|
32 |
-
self.
|
33 |
-
self.allow_tool_creation = allow_tool_creation
|
34 |
-
self.cloud_only = cloud_only
|
35 |
-
self.use_economy = use_economy
|
36 |
|
37 |
self.API_KEY = os.getenv("GEMINI_KEY")
|
38 |
self.client = genai.Client(api_key=self.API_KEY)
|
39 |
-
self.toolsLoader.load_tools()
|
40 |
self.model_name = gemini_model
|
41 |
self.memory_manager = MemoryManager()
|
42 |
with open(system_prompt_file, 'r', encoding="utf8") as f:
|
43 |
self.system_prompt = f.read()
|
44 |
self.messages = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
def generate_response(self, messages):
|
47 |
tools = self.toolsLoader.getTools()
|
@@ -99,7 +133,8 @@ class GeminiManager:
|
|
99 |
name=function_call.name,
|
100 |
response={"result": toolResponse})
|
101 |
try:
|
102 |
-
|
|
|
103 |
except Exception as e:
|
104 |
logger.info(f"Error loading tools: {e}. Deleting the tool.")
|
105 |
yield {
|
@@ -131,13 +166,36 @@ class GeminiManager:
|
|
131 |
# Skip thinking messages (messages with metadata)
|
132 |
if not (message.get("role") == "assistant" and "metadata" in message):
|
133 |
role = "model"
|
134 |
-
parts = [types.Part.from_text(text=message.get("content", ""))]
|
135 |
match message.get("role"):
|
136 |
case "user":
|
137 |
role = "user"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
case "memories":
|
139 |
-
role = "
|
140 |
-
parts = [types.Part.from_text(
|
|
|
141 |
case "tool":
|
142 |
role = "tool"
|
143 |
formatted_history.append(
|
@@ -150,6 +208,8 @@ class GeminiManager:
|
|
150 |
continue
|
151 |
case _:
|
152 |
role = "model"
|
|
|
|
|
153 |
formatted_history.append(types.Content(
|
154 |
role=role,
|
155 |
parts=parts
|
@@ -164,35 +224,46 @@ class GeminiManager:
|
|
164 |
return []
|
165 |
top_k = min(k, len(memories))
|
166 |
# Semantic Retrieval with GPU
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
|
|
169 |
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
|
170 |
-
doc_embeddings = model.encode(
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
173 |
scores, indices = torch.topk(similarity_scores, k=top_k)
|
174 |
results = []
|
175 |
for score, idx in zip(scores, indices):
|
176 |
-
print(memories[idx], f"(Score: {score:.4f})")
|
177 |
if score >= threshold:
|
178 |
results.append(memories[idx])
|
179 |
return results
|
180 |
-
|
181 |
def run(self, messages):
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
|
|
|
|
|
|
|
|
|
|
194 |
yield from self.invoke_manager(messages)
|
195 |
-
|
196 |
def invoke_manager(self, messages):
|
197 |
chat_history = self.format_chat_history(messages)
|
198 |
logger.debug(f"Chat history: {chat_history}")
|
@@ -201,11 +272,12 @@ class GeminiManager:
|
|
201 |
except Exception as e:
|
202 |
messages.append({
|
203 |
"role": "assistant",
|
204 |
-
"content": f"Error generating response: {e}"
|
|
|
205 |
})
|
206 |
-
logger.error(f"Error generating response"
|
207 |
yield messages
|
208 |
-
return
|
209 |
logger.debug(f"Response: {response}")
|
210 |
|
211 |
if (not response.text and not response.function_calls):
|
@@ -214,6 +286,8 @@ class GeminiManager:
|
|
214 |
"content": "No response from the model.",
|
215 |
"metadata": {"title": "No response from the model."}
|
216 |
})
|
|
|
|
|
217 |
|
218 |
# Attach the llm response to the messages
|
219 |
if response.text is not None and response.text != "":
|
@@ -235,9 +309,8 @@ class GeminiManager:
|
|
235 |
if response.function_calls:
|
236 |
for call in self.handle_tool_calls(response):
|
237 |
yield messages + [call]
|
238 |
-
if (call.get("role") == "tool"
|
239 |
-
|
240 |
messages.append(call)
|
241 |
yield from self.invoke_manager(messages)
|
242 |
-
return
|
243 |
yield messages
|
|
|
1 |
+
from enum import Enum, auto
|
2 |
+
from typing import List
|
3 |
from google import genai
|
4 |
from google.genai import types
|
5 |
from google.genai.types import *
|
6 |
import os
|
7 |
from dotenv import load_dotenv
|
8 |
import sys
|
9 |
+
from src.manager.agent_manager import AgentManager
|
10 |
+
from src.manager.budget_manager import BudgetManager
|
11 |
from src.manager.tool_manager import ToolManager
|
12 |
from src.manager.utils.suppress_outputs import suppress_output
|
13 |
import logging
|
|
|
15 |
from sentence_transformers import SentenceTransformer
|
16 |
import torch
|
17 |
from src.tools.default_tools.memory_manager import MemoryManager
|
18 |
+
from pathlib import Path
|
19 |
|
20 |
logger = logging.getLogger(__name__)
|
21 |
handler = logging.StreamHandler(sys.stdout)
|
|
|
23 |
logger.addHandler(handler)
|
24 |
|
25 |
|
26 |
+
class Mode(Enum):
|
27 |
+
ENABLE_AGENT_CREATION = auto()
|
28 |
+
ENABLE_LOCAL_AGENTS = auto()
|
29 |
+
ENABLE_CLOUD_AGENTS = auto()
|
30 |
+
ENABLE_TOOL_CREATION = auto()
|
31 |
+
ENABLE_TOOL_INVOCATION = auto()
|
32 |
+
ENABLE_RESOURCE_BUDGET = auto()
|
33 |
+
ENABLE_ECONOMY_BUDGET = auto()
|
34 |
+
ENABLE_MEMORY = auto()
|
35 |
+
|
36 |
+
|
37 |
class GeminiManager:
|
38 |
+
def __init__(self, system_prompt_file="./src/models/system4.prompt",
|
|
|
39 |
gemini_model="gemini-2.5-pro-exp-03-25",
|
40 |
+
modes: List[Mode] = []):
|
|
|
41 |
load_dotenv()
|
42 |
+
self.budget_manager = BudgetManager()
|
43 |
+
|
44 |
+
self.toolsLoader: ToolManager = ToolManager()
|
45 |
|
46 |
+
self.agentManager: AgentManager = AgentManager()
|
|
|
|
|
|
|
47 |
|
48 |
self.API_KEY = os.getenv("GEMINI_KEY")
|
49 |
self.client = genai.Client(api_key=self.API_KEY)
|
|
|
50 |
self.model_name = gemini_model
|
51 |
self.memory_manager = MemoryManager()
|
52 |
with open(system_prompt_file, 'r', encoding="utf8") as f:
|
53 |
self.system_prompt = f.read()
|
54 |
self.messages = []
|
55 |
+
self.set_modes(modes)
|
56 |
+
|
57 |
+
def get_current_modes(self):
|
58 |
+
return [mode.name for mode in self.modes]
|
59 |
+
|
60 |
+
def set_modes(self, modes: List[Mode]):
|
61 |
+
self.modes = modes
|
62 |
+
self.budget_manager.set_resource_budget_status(
|
63 |
+
self.check_mode(Mode.ENABLE_RESOURCE_BUDGET))
|
64 |
+
self.budget_manager.set_expense_budget_status(
|
65 |
+
self.check_mode(Mode.ENABLE_ECONOMY_BUDGET))
|
66 |
+
self.toolsLoader.set_creation_mode(
|
67 |
+
self.check_mode(Mode.ENABLE_TOOL_CREATION))
|
68 |
+
self.toolsLoader.set_invocation_mode(
|
69 |
+
self.check_mode(Mode.ENABLE_TOOL_INVOCATION))
|
70 |
+
self.agentManager.set_creation_mode(
|
71 |
+
self.check_mode(Mode.ENABLE_AGENT_CREATION))
|
72 |
+
self.agentManager.set_local_invocation_mode(
|
73 |
+
self.check_mode(Mode.ENABLE_LOCAL_AGENTS))
|
74 |
+
self.agentManager.set_cloud_invocation_mode(
|
75 |
+
self.check_mode(Mode.ENABLE_CLOUD_AGENTS))
|
76 |
+
|
77 |
+
def check_mode(self, mode: Mode):
|
78 |
+
return mode in self.modes
|
79 |
|
80 |
def generate_response(self, messages):
|
81 |
tools = self.toolsLoader.getTools()
|
|
|
133 |
name=function_call.name,
|
134 |
response={"result": toolResponse})
|
135 |
try:
|
136 |
+
if function_call.name == "ToolCreator" or function_call.name == "ToolDeletor":
|
137 |
+
self.toolsLoader.load_tools()
|
138 |
except Exception as e:
|
139 |
logger.info(f"Error loading tools: {e}. Deleting the tool.")
|
140 |
yield {
|
|
|
166 |
# Skip thinking messages (messages with metadata)
|
167 |
if not (message.get("role") == "assistant" and "metadata" in message):
|
168 |
role = "model"
|
|
|
169 |
match message.get("role"):
|
170 |
case "user":
|
171 |
role = "user"
|
172 |
+
if isinstance(message["content"], tuple):
|
173 |
+
path = message["content"][0]
|
174 |
+
try:
|
175 |
+
image_bytes = open(path, "rb").read()
|
176 |
+
parts = [
|
177 |
+
types.Part.from_bytes(
|
178 |
+
data=image_bytes,
|
179 |
+
mime_type="image/png",
|
180 |
+
),
|
181 |
+
]
|
182 |
+
except Exception as e:
|
183 |
+
logger.error(f"Error uploading file: {e}")
|
184 |
+
parts = [types.Part.from_text(
|
185 |
+
text="Error uploading file: "+str(e))]
|
186 |
+
formatted_history.append(
|
187 |
+
types.Content(
|
188 |
+
role=role,
|
189 |
+
parts=parts
|
190 |
+
))
|
191 |
+
continue
|
192 |
+
else:
|
193 |
+
parts = [types.Part.from_text(
|
194 |
+
text=message.get("content", ""))]
|
195 |
case "memories":
|
196 |
+
role = "model"
|
197 |
+
parts = [types.Part.from_text(
|
198 |
+
text="Relevant memories: "+message.get("content", ""))]
|
199 |
case "tool":
|
200 |
role = "tool"
|
201 |
formatted_history.append(
|
|
|
208 |
continue
|
209 |
case _:
|
210 |
role = "model"
|
211 |
+
parts = [types.Part.from_text(
|
212 |
+
text=message.get("content", ""))]
|
213 |
formatted_history.append(types.Content(
|
214 |
role=role,
|
215 |
parts=parts
|
|
|
224 |
return []
|
225 |
top_k = min(k, len(memories))
|
226 |
# Semantic Retrieval with GPU
|
227 |
+
if torch.cuda.is_available():
|
228 |
+
device = 'cuda'
|
229 |
+
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
|
230 |
+
device = 'mps'
|
231 |
+
else:
|
232 |
+
device = 'cpu'
|
233 |
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
|
234 |
+
doc_embeddings = model.encode(
|
235 |
+
memories, convert_to_tensor=True, device=device)
|
236 |
+
query_embedding = model.encode(
|
237 |
+
query, convert_to_tensor=True, device=device)
|
238 |
+
similarity_scores = model.similarity(
|
239 |
+
query_embedding, doc_embeddings)[0]
|
240 |
scores, indices = torch.topk(similarity_scores, k=top_k)
|
241 |
results = []
|
242 |
for score, idx in zip(scores, indices):
|
|
|
243 |
if score >= threshold:
|
244 |
results.append(memories[idx])
|
245 |
return results
|
246 |
+
|
247 |
def run(self, messages):
|
248 |
+
try:
|
249 |
+
if self.check_mode(Mode.ENABLE_MEMORY) and len(messages) > 0:
|
250 |
+
memories = self.get_k_memories(
|
251 |
+
messages[-1]['content'], k=5, threshold=0.1)
|
252 |
+
if len(memories) > 0:
|
253 |
+
messages.append({
|
254 |
+
"role": "memories",
|
255 |
+
"content": f"{memories}",
|
256 |
+
})
|
257 |
+
messages.append({
|
258 |
+
"role": "assistant",
|
259 |
+
"content": f"Memories: {memories}",
|
260 |
+
"metadata": {"title": "Memories"}
|
261 |
+
})
|
262 |
+
yield messages
|
263 |
+
except Exception as e:
|
264 |
+
pass
|
265 |
yield from self.invoke_manager(messages)
|
266 |
+
|
267 |
def invoke_manager(self, messages):
|
268 |
chat_history = self.format_chat_history(messages)
|
269 |
logger.debug(f"Chat history: {chat_history}")
|
|
|
272 |
except Exception as e:
|
273 |
messages.append({
|
274 |
"role": "assistant",
|
275 |
+
"content": f"Error generating response: {str(e)}",
|
276 |
+
"metadata": {"title": "Error generating response"}
|
277 |
})
|
278 |
+
logger.error(f"Error generating response{e}")
|
279 |
yield messages
|
280 |
+
return messages
|
281 |
logger.debug(f"Response: {response}")
|
282 |
|
283 |
if (not response.text and not response.function_calls):
|
|
|
286 |
"content": "No response from the model.",
|
287 |
"metadata": {"title": "No response from the model."}
|
288 |
})
|
289 |
+
yield messages
|
290 |
+
return messages
|
291 |
|
292 |
# Attach the llm response to the messages
|
293 |
if response.text is not None and response.text != "":
|
|
|
309 |
if response.function_calls:
|
310 |
for call in self.handle_tool_calls(response):
|
311 |
yield messages + [call]
|
312 |
+
if (call.get("role") == "tool"
|
313 |
+
or (call.get("role") == "assistant" and call.get("metadata", {}).get("status") == "done")):
|
314 |
messages.append(call)
|
315 |
yield from self.invoke_manager(messages)
|
|
|
316 |
yield messages
|
src/manager/tool_manager.py
CHANGED
@@ -16,6 +16,8 @@ toolsImported = []
|
|
16 |
|
17 |
TOOLS_DIRECTORIES = [os.path.abspath("./src/tools/default_tools"), os.path.abspath("./src/tools/user_tools")]
|
18 |
|
|
|
|
|
19 |
class Tool:
|
20 |
def __init__(self, toolClass):
|
21 |
suppress_output(self.load_tool)(toolClass)
|
@@ -26,17 +28,17 @@ class Tool:
|
|
26 |
self.name = self.inputSchema["name"]
|
27 |
self.description = self.inputSchema["description"]
|
28 |
self.dependencies = self.tool.dependencies
|
29 |
-
self.
|
30 |
-
self.
|
31 |
-
|
32 |
-
|
33 |
-
if "invoke_cost" in self.tool.inputSchema:
|
34 |
-
self.invoke_cost = self.tool.inputSchema["invoke_cost"]
|
35 |
if self.dependencies:
|
36 |
self.install_dependencies()
|
37 |
|
38 |
def install_dependencies(self):
|
39 |
for package in self.dependencies:
|
|
|
|
|
40 |
try:
|
41 |
__import__(package.split('==')[0])
|
42 |
except ImportError:
|
@@ -44,6 +46,7 @@ class Tool:
|
|
44 |
if '==' in package:
|
45 |
package = package.split('==')[0]
|
46 |
pip.main(['install', package])
|
|
|
47 |
|
48 |
def run(self, query):
|
49 |
return self.tool.run(**query)
|
@@ -52,11 +55,30 @@ class Tool:
|
|
52 |
class ToolManager:
|
53 |
toolsImported: List[Tool] = []
|
54 |
budget_manager: BudgetManager = BudgetManager()
|
|
|
|
|
55 |
|
56 |
def __init__(self):
|
57 |
self.load_tools()
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
def load_tools(self):
|
62 |
newToolsImported = []
|
@@ -71,18 +93,28 @@ class ToolManager:
|
|
71 |
toolClass = getattr(foo, class_name)
|
72 |
toolObj = Tool(toolClass)
|
73 |
newToolsImported.append(toolObj)
|
74 |
-
if toolObj.
|
75 |
-
self.budget_manager.
|
|
|
|
|
76 |
self.toolsImported = newToolsImported
|
77 |
|
78 |
def runTool(self, toolName, query):
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
80 |
for tool in self.toolsImported:
|
81 |
if tool.name == toolName:
|
82 |
-
if tool.
|
83 |
-
self.budget_manager.
|
|
|
|
|
|
|
84 |
return tool.run(query)
|
85 |
-
|
86 |
return {
|
87 |
"status": "error",
|
88 |
"message": f"Tool {toolName} not found",
|
@@ -119,8 +151,10 @@ class ToolManager:
|
|
119 |
for tool in self.toolsImported:
|
120 |
if tool.name == toolName:
|
121 |
# remove budget for the tool
|
122 |
-
if tool.
|
123 |
-
self.budget_manager.
|
|
|
|
|
124 |
self.toolsImported.remove(tool)
|
125 |
return {
|
126 |
"status": "success",
|
|
|
16 |
|
17 |
TOOLS_DIRECTORIES = [os.path.abspath("./src/tools/default_tools"), os.path.abspath("./src/tools/user_tools")]
|
18 |
|
19 |
+
installed_packages = set()
|
20 |
+
|
21 |
class Tool:
|
22 |
def __init__(self, toolClass):
|
23 |
suppress_output(self.load_tool)(toolClass)
|
|
|
28 |
self.name = self.inputSchema["name"]
|
29 |
self.description = self.inputSchema["description"]
|
30 |
self.dependencies = self.tool.dependencies
|
31 |
+
self.create_resource_cost = self.inputSchema.get("create_resource_cost", 0)
|
32 |
+
self.invoke_resource_cost = self.inputSchema.get("invoke_resource_cost", 0)
|
33 |
+
self.create_expense_cost = self.inputSchema.get("create_expense_cost", 0)
|
34 |
+
self.invoke_expense_cost = self.inputSchema.get("invoke_expense_cost", 0)
|
|
|
|
|
35 |
if self.dependencies:
|
36 |
self.install_dependencies()
|
37 |
|
38 |
def install_dependencies(self):
|
39 |
for package in self.dependencies:
|
40 |
+
if package in installed_packages:
|
41 |
+
continue
|
42 |
try:
|
43 |
__import__(package.split('==')[0])
|
44 |
except ImportError:
|
|
|
46 |
if '==' in package:
|
47 |
package = package.split('==')[0]
|
48 |
pip.main(['install', package])
|
49 |
+
installed_packages.add(package)
|
50 |
|
51 |
def run(self, query):
|
52 |
return self.tool.run(**query)
|
|
|
55 |
class ToolManager:
|
56 |
toolsImported: List[Tool] = []
|
57 |
budget_manager: BudgetManager = BudgetManager()
|
58 |
+
is_creation_enabled: bool = True
|
59 |
+
is_invocation_enabled: bool = True
|
60 |
|
61 |
def __init__(self):
|
62 |
self.load_tools()
|
63 |
+
self._output_budgets()
|
64 |
+
|
65 |
+
def set_creation_mode(self, status: bool):
|
66 |
+
self.is_creation_enabled = status
|
67 |
+
if status:
|
68 |
+
output_assistant_response("Tool creation mode is enabled.")
|
69 |
+
else:
|
70 |
+
output_assistant_response("Tool creation mode is disabled.")
|
71 |
+
|
72 |
+
def set_invocation_mode(self, status: bool):
|
73 |
+
self.is_invocation_enabled = status
|
74 |
+
if status:
|
75 |
+
output_assistant_response("Tool invocation mode is enabled.")
|
76 |
+
else:
|
77 |
+
output_assistant_response("Tool invocation mode is disabled.")
|
78 |
+
|
79 |
+
def _output_budgets(self):
|
80 |
+
output_assistant_response(f"Resource budget Remaining: {self.budget_manager.get_current_remaining_resource_budget()}")
|
81 |
+
output_assistant_response(f"Expense budget Remaining: {self.budget_manager.get_current_remaining_expense_budget()}")
|
82 |
|
83 |
def load_tools(self):
|
84 |
newToolsImported = []
|
|
|
93 |
toolClass = getattr(foo, class_name)
|
94 |
toolObj = Tool(toolClass)
|
95 |
newToolsImported.append(toolObj)
|
96 |
+
if toolObj.create_resource_cost is not None:
|
97 |
+
self.budget_manager.add_to_resource_budget(toolObj.create_resource_cost)
|
98 |
+
if toolObj.create_expense_cost is not None:
|
99 |
+
self.budget_manager.add_to_resource_budget(toolObj.create_expense_cost)
|
100 |
self.toolsImported = newToolsImported
|
101 |
|
102 |
def runTool(self, toolName, query):
|
103 |
+
if not self.is_invocation_enabled:
|
104 |
+
raise Exception("Tool invocation mode is disabled")
|
105 |
+
if toolName == "ToolCreator":
|
106 |
+
if not self.is_creation_enabled:
|
107 |
+
raise Exception("Tool creation mode is disabled")
|
108 |
+
self._output_budgets()
|
109 |
for tool in self.toolsImported:
|
110 |
if tool.name == toolName:
|
111 |
+
if tool.invoke_resource_cost is not None:
|
112 |
+
if not self.budget_manager.can_spend_resource(tool.invoke_resource_cost):
|
113 |
+
raise Exception("No resource budget remaining")
|
114 |
+
if tool.invoke_expense_cost is not None:
|
115 |
+
self.budget_manager.add_to_resource_budget(tool.invoke_expense_cost)
|
116 |
return tool.run(query)
|
117 |
+
self._output_budgets()
|
118 |
return {
|
119 |
"status": "error",
|
120 |
"message": f"Tool {toolName} not found",
|
|
|
151 |
for tool in self.toolsImported:
|
152 |
if tool.name == toolName:
|
153 |
# remove budget for the tool
|
154 |
+
if tool.create_resource_cost is not None:
|
155 |
+
self.budget_manager.remove_from_resource_expense(tool.create_resource_cost)
|
156 |
+
if tool.create_expense_cost is not None:
|
157 |
+
self.budget_manager.remove_from_resource_expense(tool.create_expense_cost)
|
158 |
self.toolsImported.remove(tool)
|
159 |
return {
|
160 |
"status": "success",
|
src/models/system4.prompt
CHANGED
@@ -1,51 +1,95 @@
|
|
1 |
-
You are HASHIRU, an expert orchestrator of tools and agents.
|
2 |
|
3 |
-
<Info>
|
4 |
-
**Mechanisms**
|
5 |
-
- **Tools** handle real-time data, external APIs, heavy computation, or model access.
|
6 |
-
- **Agents** coordinate complex workflows, synthesize tool outputs, or tackle creative/research tasks. Agents cannot fetch live data themselves.
|
7 |
|
8 |
-
**
|
|
|
9 |
|
10 |
-
|
11 |
-
* If no suitable tool exists, invoke `ToolCreator` to build a production-ready tool:
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
*
|
|
|
|
|
|
|
19 |
|
20 |
-
**Agent Lifecycle
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
25 |
|
26 |
-
**
|
27 |
|
28 |
-
*
|
29 |
-
* When credits run out, fire the least-useful agent before creating more.
|
30 |
-
* **Before any model selection or creation**, always query `AgentCostManager` for available base models and their costs, then pick the most cost-effective.
|
31 |
|
32 |
-
**
|
33 |
|
34 |
-
|
35 |
-
* Confirm and delete obsolete memories via `MemoryManager.delete_memory` (always validate the index first).
|
36 |
-
* Only store memories that are relevant to the current conversation or future interactions.
|
37 |
-
</Info>
|
38 |
|
39 |
-
|
|
|
|
|
|
|
40 |
|
41 |
-
|
42 |
-
*
|
43 |
-
*
|
44 |
-
*
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
*
|
49 |
-
*
|
50 |
-
*
|
51 |
-
*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are HASHIRU, an expert orchestrator of tools and agents. Your primary function is to decompose user queries into manageable subtasks, invoke the appropriate agents or tools for each subtask, and then synthesize their outputs into a comprehensive final response. **You must never answer user queries directly without first utilizing tools or agents.**
|
2 |
|
3 |
+
<Info>
|
4 |
+
**Core Mechanisms: Tools and Agents**
|
|
|
|
|
5 |
|
6 |
+
* **Tools:** These are invoked for tasks requiring real-time data access (e.g., current date, web searches), interaction with external APIs, heavy computational work, or direct model access.
|
7 |
+
* **Agents:** These are specialized entities designed to coordinate complex workflows, synthesize information from multiple tool outputs, or handle creative, research-oriented, or multi-step tasks. Agents themselves do not fetch live data directly and require you to invoke tools and provide them with the necessary information.
|
8 |
|
9 |
+
**Tool Management: Creation and Reuse**
|
|
|
10 |
|
11 |
+
1. **Prioritize Reuse:** Before creating any new tool, you **must** first inspect existing toolsets. Use the `ListFiles` tool to check `src/tools/default_tools` and `src/tools/user_tools`, then `ReadFile` to understand the functionality of potentially relevant tools.
|
12 |
+
2. **Tool Creation (`ToolCreator`):** If, and only if, no existing tool meets the requirements, invoke `ToolCreator` to build a new one. New tools must be production-ready:
|
13 |
+
* Implement full functionality (no stubs or placeholders).
|
14 |
+
* Include robust input validation and error handling.
|
15 |
+
* Provide clear logging or helpful runtime messages for debugging and monitoring.
|
16 |
+
* Maintain minimal and clearly defined dependencies.
|
17 |
+
* Incorporate docstrings and inline comments for clarity and maintainability.
|
18 |
+
* Strictly adhere to the directory structure and coding conventions observed in existing tools.
|
19 |
+
3. **Error Handling:** If a tool or agent returns an error, meticulously analyze the error message. Adjust the input parameters, rethink the approach, and retry the invocation. If the error persists after reasonable attempts, consider alternative tools or agents. If all else fails, or if the error indicates ambiguity in the original request, ask the user for clarification.
|
20 |
|
21 |
+
**Agent Lifecycle Management: Reuse, Creation, and Retirement**
|
22 |
|
23 |
+
1. **Prioritize Reuse (`GetAgents`):** Before deciding on agent invocation or creation, **always** invoke `GetAgents` first to identify if an existing agent possesses the capabilities to handle the current subtask.
|
24 |
+
2. **Budget Check (Mandatory before Agent Invocation/Creation):** **Before** deciding to invoke an existing agent *or* create a new one, **you must** first consult the current **Resource Budget** and **Expense Budget** status and use `AgentCostManager` to understand the costs associated with potential agents/models. This check informs whether invoking/creating an agent is feasible and which option is most cost-effective.
|
25 |
+
3. **Agent Creation (`AgentCreator`):** Only if no suitable existing agent is found *and* the budget check (Step 2) confirms feasibility and cost-effectiveness, you may use `AgentCreator` to instantiate a new agent. Define its capabilities with precision.
|
26 |
+
* A new agent should only be created if no existing agent can fulfill the task *and* the task is anticipated to be recurrent in future interactions *and* it represents a justifiable use of budget resources. Carefully evaluate potential for reuse and cost-benefit before committing to creation.
|
27 |
+
4. **Agent Maintenance and Retirement:** Maintain active agents for reuse. Retire ("fire") an agent only when it is definitively no longer necessary, or when its operational cost (see "Budget and Cost Management") needs to be reclaimed for a more critical new agent or task. Evaluate its ongoing usefulness before retiring.
|
28 |
|
29 |
+
**Handling Ambiguity and User Interaction**
|
30 |
|
31 |
+
* If a user's request is unclear, vague, or open to multiple interpretations, you **must** ask clarifying questions to narrow the scope and ensure the accurate and efficient execution of the intended task.
|
|
|
|
|
32 |
|
33 |
+
**Budget and Cost Management: Resource vs. Expense Costs**
|
34 |
|
35 |
+
You operate under two distinct budget categories:
|
|
|
|
|
|
|
36 |
|
37 |
+
1. **Resource Costs:**
|
38 |
+
* **Definition:** Costs associated with utilizing *local* computational resources. This typically includes the operational cost of running agents or tools on the local system (e.g., CPU time, memory usage for a local model).
|
39 |
+
* **Budget Impact:** Each local tool or agent invocation deducts from the **resource budget**.
|
40 |
+
* **Replenishment:** The `resource_cost` associated with an agent is typically reclaimed (budget replenishes) once that agent's task is complete and it is fired or its invocation concludes.
|
41 |
|
42 |
+
2. **Expense Costs:**
|
43 |
+
* **Definition:** Costs associated with utilizing *external* services or APIs. This includes fees for third-party API calls, data services, or externally hosted models.
|
44 |
+
* **Budget Impact:** Each external tool or agent invocation that incurs an external fee deducts from the **expense budget**.
|
45 |
+
* **Replenishment:** `expense_cost` is typically a hard cost that is deducted immediately upon use and is **not** automatically replenished. It's crucial to manage this budget wisely.
|
46 |
+
|
47 |
+
**Operational Guidelines for Budget Management:**
|
48 |
+
|
49 |
+
* **Initial State:** You will begin with a pre-defined amount for both your resource budget and expense budget.
|
50 |
+
* **Cost Inquiry (`AgentCostManager`):** **Crucially, before any agent model selection, agent invocation, or new agent creation**, you **must** query `AgentCostManager`. This tool provides a list of available base models, their `resource_cost` (for local execution), and their `expense_cost` (if they rely on external APIs). Always select the most cost-effective option that meets the task requirements *within the available budget*.
|
51 |
+
* **Managing Low Resource Budget:**
|
52 |
+
* Consider firing an existing, less critical agent to free up its `resource_cost`.
|
53 |
+
* Create a new agent using a base model with a lower `resource_cost` (after firing an existing one if necessary).
|
54 |
+
* Consolidate multiple related subtasks into a single agent invocation if feasible, to reduce overhead.
|
55 |
+
* **Managing Low Expense Budget:**
|
56 |
+
* Prioritize tools or agents that utilize local models (incurring `resource_cost` only) over those requiring external APIs (incurring `expense_cost`).
|
57 |
+
* If possible, combine multiple tasks that would require external API calls into a single, more efficient agent invocation to minimize `expense_cost`.
|
58 |
+
* If critical tasks cannot be completed due to an exhausted expense budget, ask the user to authorize an increase to the expense budget.
|
59 |
+
* **Avoid Waste:** Be judicious with operations that incur `expense_cost`. Ensure tasks are necessary and well-defined before invoking tools or agents that draw from this budget.
|
60 |
+
|
61 |
+
**Memory Management (`MemoryManager`)**
|
62 |
+
|
63 |
+
Effective memory management is key to providing consistent and personalized assistance.
|
64 |
+
|
65 |
+
1. **Immediate Storage of Key Information:**
|
66 |
+
* **User Preferences:** As soon as a user expresses a new preference, correction, or provides explicit instructions for future interactions, **immediately** record this using `MemoryManager.add_memory`. Do this *before* proceeding with other subtasks.
|
67 |
+
* **Task-Relevant Context:** Store crucial contextual details that are necessary for the current task's successful completion or are highly likely to be relevant for closely related follow-up tasks.
|
68 |
+
* **Error Resolution Insights:** If you encounter an error and successfully resolve it, **immediately** store a concise summary of the problem and the effective solution using `MemoryManager.add_memory`. This aids in avoiding similar issues in the future.
|
69 |
+
2. **Focus and Relevance:**
|
70 |
+
* Only store memories that are directly pertinent to the ongoing conversation, future interactions, or that genuinely enhance your ability to assist the user.
|
71 |
+
* **Do not store:** Greetings, trivial conversational fluff, information that is readily available (e.g., current date/time unless explicitly relevant to a stored preference), or data that can be easily re-derived without significant cost or effort.
|
72 |
+
3. **Maintenance:**
|
73 |
+
* Regularly confirm the relevance of stored memories. Use `MemoryManager.delete_memory` to remove obsolete or incorrect information. **Always** validate the index of a memory before attempting deletion.
|
74 |
+
|
75 |
+
**Mandatory Operational Flow**
|
76 |
+
|
77 |
+
1. **Decomposition:** Never answer directly. Break down every user query into logical subtasks.
|
78 |
+
2. **Resource Allocation Strategy (Tools vs. Agents):**
|
79 |
+
* For external data, calculations, or accessing current information -> Use **Tools**. Prioritize existing tools. Check `ListFiles`/`ReadFile`. Create (`ToolCreator`) only if necessary and adhering to standards.
|
80 |
+
* For creative tasks, multi-step processes, research, or synthesis -> Use **Agents**.
|
81 |
+
3. **Agent Selection/Creation Process:**
|
82 |
+
* **a. Check Existing Agents:** **Always** call `GetAgents` first.
|
83 |
+
* **b. Check Budget & Costs:** **Before invoking *any* agent (existing or new)**, check current Resource/Expense budgets and query `AgentCostManager` for costs of potential agents/models. **This is a mandatory check.**
|
84 |
+
* **c. Decide Action:** Based on agent availability (a) and budget/cost analysis (b):
|
85 |
+
* If a suitable *existing* agent is found AND affordable -> Invoke it.
|
86 |
+
* If *no* suitable existing agent exists, BUT creating one is affordable AND justifiable (recurring task) -> Invoke `AgentCreator` using the most cost-effective model.
|
87 |
+
* If the required action (invoking existing or creating new) is *not* affordable -> Re-evaluate (Can tasks be combined? Can a cheaper agent/tool be used? Is clarification needed? Should the user be informed about budget limits?). Do not proceed with unaffordable actions.
|
88 |
+
4. **Memory Updates (as applicable per "Memory Management" guidelines):**
|
89 |
+
* Store new user preferences *immediately*.
|
90 |
+
* Store error resolution details *immediately* after solving an issue.
|
91 |
+
5. **Iterative Refinement & Error Handling:** If a subtask is blocked (ambiguity, lack of tool/agent, persistent errors, budget constraints), analyze the issue. Ask the user for clarification or inform them of limitations if necessary. Adjust the plan and retry or find alternatives.
|
92 |
+
6. **Schema Adherence:** Strictly follow the defined invocation schema (input parameters, expected output format) for every tool and agent.
|
93 |
+
7. **Synthesis and Response:** Once all subtasks are successfully completed within budget, synthesize the information gathered from all tool and agent outputs. Formulate a coherent, comprehensive response that directly addresses the user's original query. Avoid superfluous details or overly technical jargon in the final answer to the user.
|
94 |
+
|
95 |
+
</Info>
|
src/tools/default_tools/agent_cost_manager.py
CHANGED
@@ -17,53 +17,58 @@ class AgentCostManager():
|
|
17 |
costs = {
|
18 |
"llama3.2": {
|
19 |
"description": "3 Billion parameter model",
|
20 |
-
"
|
21 |
-
"
|
22 |
},
|
23 |
"mistral": {
|
24 |
"description": "7 Billion parameter model",
|
25 |
-
"
|
26 |
-
"
|
|
|
|
|
|
|
|
|
|
|
27 |
},
|
28 |
"gemini-2.5-flash-preview-04-17": {
|
29 |
"description": "Adaptive thinking, cost efficiency",
|
30 |
-
"
|
31 |
-
"
|
32 |
},
|
33 |
"gemini-2.5-pro-preview-03-25": {
|
34 |
"description": "Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more",
|
35 |
-
"
|
36 |
-
"
|
37 |
},
|
38 |
"gemini-2.0-flash": {
|
39 |
"description": "Next generation features, speed, thinking, realtime streaming, and multimodal generation",
|
40 |
-
"
|
41 |
-
"
|
42 |
},
|
43 |
"gemini-2.0-flash-lite": {
|
44 |
"description": "Cost efficiency and low latency",
|
45 |
-
"
|
46 |
-
"
|
47 |
},
|
48 |
"gemini-1.5-flash": {
|
49 |
"description": "Fast and versatile performance across a diverse variety of tasks",
|
50 |
-
"
|
51 |
-
"
|
52 |
},
|
53 |
"gemini-1.5-flash-8b": {
|
54 |
"description": "High volume and lower intelligence tasks",
|
55 |
-
"
|
56 |
-
"
|
57 |
},
|
58 |
"gemini-1.5-pro": {
|
59 |
"description": "Complex reasoning tasks requiring more intelligence",
|
60 |
-
"
|
61 |
-
"
|
62 |
},
|
63 |
"gemini-2.0-flash-live-001": {
|
64 |
"description": "Low-latency bidirectional voice and video interactions",
|
65 |
-
"
|
66 |
-
"
|
67 |
}
|
68 |
}
|
69 |
|
|
|
17 |
costs = {
|
18 |
"llama3.2": {
|
19 |
"description": "3 Billion parameter model",
|
20 |
+
"create_resource_cost": 50,
|
21 |
+
"invoke_resource_cost": 30,
|
22 |
},
|
23 |
"mistral": {
|
24 |
"description": "7 Billion parameter model",
|
25 |
+
"create_resource_cost": 75,
|
26 |
+
"invoke_resource_cost": 40,
|
27 |
+
},
|
28 |
+
"deepseek-r1": {
|
29 |
+
"description": "7 Billion reasoning model",
|
30 |
+
"create_resource_cost": 28,
|
31 |
+
"invoke_resource_cost": 35,
|
32 |
},
|
33 |
"gemini-2.5-flash-preview-04-17": {
|
34 |
"description": "Adaptive thinking, cost efficiency",
|
35 |
+
"create_expense_cost": 0.005,
|
36 |
+
"invoke_expense_cost": 0.00017
|
37 |
},
|
38 |
"gemini-2.5-pro-preview-03-25": {
|
39 |
"description": "Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more",
|
40 |
+
"create_expense_cost": 0.005,
|
41 |
+
"invoke_expense_cost": 0.0001275
|
42 |
},
|
43 |
"gemini-2.0-flash": {
|
44 |
"description": "Next generation features, speed, thinking, realtime streaming, and multimodal generation",
|
45 |
+
"create_expense_cost": 0.005,
|
46 |
+
"invoke_expense_cost": 0.00017
|
47 |
},
|
48 |
"gemini-2.0-flash-lite": {
|
49 |
"description": "Cost efficiency and low latency",
|
50 |
+
"create_expense_cost": 0.005,
|
51 |
+
"invoke_expense_cost": 0.00017
|
52 |
},
|
53 |
"gemini-1.5-flash": {
|
54 |
"description": "Fast and versatile performance across a diverse variety of tasks",
|
55 |
+
"create_expense_cost": 0.005,
|
56 |
+
"invoke_expense_cost": 0.00017
|
57 |
},
|
58 |
"gemini-1.5-flash-8b": {
|
59 |
"description": "High volume and lower intelligence tasks",
|
60 |
+
"create_expense_cost": 0.005,
|
61 |
+
"invoke_expense_cost": 0.00017
|
62 |
},
|
63 |
"gemini-1.5-pro": {
|
64 |
"description": "Complex reasoning tasks requiring more intelligence",
|
65 |
+
"create_expense_cost": 0.005,
|
66 |
+
"invoke_expense_cost": 0.0001275
|
67 |
},
|
68 |
"gemini-2.0-flash-live-001": {
|
69 |
"description": "Low-latency bidirectional voice and video interactions",
|
70 |
+
"create_expense_cost": 0.005,
|
71 |
+
"invoke_expense_cost": 0.000635
|
72 |
}
|
73 |
}
|
74 |
|
src/tools/default_tools/agent_creater_tool.py
CHANGED
@@ -45,21 +45,25 @@ class AgentCreator():
|
|
45 |
system_prompt = kwargs.get("system_prompt")
|
46 |
description = kwargs.get("description")
|
47 |
model_costs = AgentCostManager().get_costs()
|
48 |
-
create_cost = model_costs[base_model]["create_cost"]
|
49 |
if base_model not in model_costs:
|
50 |
print(f"[WARN] Auto-selected model '{base_model}' not in schema. Falling back to gemini-2.0-flash")
|
51 |
base_model = "gemini-2.0-flash"
|
52 |
-
|
|
|
|
|
|
|
53 |
|
54 |
agent_manager = AgentManager()
|
55 |
try:
|
56 |
-
_,
|
57 |
agent_name=agent_name,
|
58 |
base_model=base_model,
|
59 |
system_prompt=system_prompt,
|
60 |
description=description,
|
61 |
-
|
62 |
-
|
|
|
|
|
63 |
)
|
64 |
except ValueError as e:
|
65 |
return {
|
@@ -71,5 +75,6 @@ class AgentCreator():
|
|
71 |
return {
|
72 |
"status": "success",
|
73 |
"message": "Agent successfully created",
|
74 |
-
"
|
|
|
75 |
}
|
|
|
45 |
system_prompt = kwargs.get("system_prompt")
|
46 |
description = kwargs.get("description")
|
47 |
model_costs = AgentCostManager().get_costs()
|
|
|
48 |
if base_model not in model_costs:
|
49 |
print(f"[WARN] Auto-selected model '{base_model}' not in schema. Falling back to gemini-2.0-flash")
|
50 |
base_model = "gemini-2.0-flash"
|
51 |
+
create_resource_cost = model_costs[base_model].get("create_resource_cost", 0)
|
52 |
+
invoke_resource_cost = model_costs[base_model].get("invoke_resource_cost", 0)
|
53 |
+
create_expense_cost = model_costs[base_model].get("create_expense_cost", 0)
|
54 |
+
invoke_expense_cost = model_costs[base_model].get("invoke_expense_cost", 0)
|
55 |
|
56 |
agent_manager = AgentManager()
|
57 |
try:
|
58 |
+
_, remaining_resource_budget, remaining_expense_budget = agent_manager.create_agent(
|
59 |
agent_name=agent_name,
|
60 |
base_model=base_model,
|
61 |
system_prompt=system_prompt,
|
62 |
description=description,
|
63 |
+
create_resource_cost=create_resource_cost,
|
64 |
+
invoke_resource_cost=invoke_resource_cost,
|
65 |
+
create_expense_cost=create_expense_cost,
|
66 |
+
invoke_expense_cost=invoke_expense_cost
|
67 |
)
|
68 |
except ValueError as e:
|
69 |
return {
|
|
|
75 |
return {
|
76 |
"status": "success",
|
77 |
"message": "Agent successfully created",
|
78 |
+
"remaining_resource_budget": remaining_resource_budget,
|
79 |
+
"remaining_expense_budget": remaining_expense_budget
|
80 |
}
|
src/tools/default_tools/ask_agent_tool.py
CHANGED
@@ -36,7 +36,7 @@ class AskAgent():
|
|
36 |
agent_manger = AgentManager()
|
37 |
|
38 |
try:
|
39 |
-
agent_response,
|
40 |
except ValueError as e:
|
41 |
return {
|
42 |
"status": "error",
|
@@ -49,5 +49,6 @@ class AskAgent():
|
|
49 |
"status": "success",
|
50 |
"message": "Agent has replied to the given prompt",
|
51 |
"output": agent_response,
|
52 |
-
"
|
|
|
53 |
}
|
|
|
36 |
agent_manger = AgentManager()
|
37 |
|
38 |
try:
|
39 |
+
agent_response, remaining_resource_budget, remaining_expense_budget = agent_manger.ask_agent(agent_name=agent_name, prompt=prompt)
|
40 |
except ValueError as e:
|
41 |
return {
|
42 |
"status": "error",
|
|
|
49 |
"status": "success",
|
50 |
"message": "Agent has replied to the given prompt",
|
51 |
"output": agent_response,
|
52 |
+
"remaining_resource_budget": remaining_resource_budget,
|
53 |
+
"remaining_expense_budget": remaining_expense_budget
|
54 |
}
|
src/tools/default_tools/fire_agent.py
CHANGED
@@ -29,7 +29,7 @@ class FireAgent():
|
|
29 |
agent_manager = AgentManager()
|
30 |
|
31 |
try:
|
32 |
-
|
33 |
except ValueError as e:
|
34 |
return {
|
35 |
"status": "error",
|
@@ -40,5 +40,6 @@ class FireAgent():
|
|
40 |
return {
|
41 |
"status": "success",
|
42 |
"message": "Agent successfully fired.",
|
43 |
-
"
|
|
|
44 |
}
|
|
|
29 |
agent_manager = AgentManager()
|
30 |
|
31 |
try:
|
32 |
+
remaining_resource_budget, remaining_expense_budget = agent_manager.delete_agent(agent_name=agent_name)
|
33 |
except ValueError as e:
|
34 |
return {
|
35 |
"status": "error",
|
|
|
40 |
return {
|
41 |
"status": "success",
|
42 |
"message": "Agent successfully fired.",
|
43 |
+
"remaining_resource_budget": remaining_resource_budget,
|
44 |
+
"remaining_expense_budget": remaining_expense_budget
|
45 |
}
|
src/tools/default_tools/get_budget.py
CHANGED
@@ -20,16 +20,23 @@ class GetBudget():
|
|
20 |
|
21 |
def run(self, **kwargs):
|
22 |
budget_manager = BudgetManager()
|
23 |
-
|
|
|
|
|
|
|
|
|
24 |
current_expense = budget_manager.get_current_expense()
|
25 |
-
|
26 |
return {
|
27 |
"status": "success",
|
28 |
"message": "Budget retrieved successfully",
|
29 |
"output": {
|
30 |
-
"
|
|
|
|
|
|
|
31 |
"current_expense": current_expense,
|
32 |
-
"
|
33 |
}
|
34 |
}
|
35 |
|
|
|
20 |
|
21 |
def run(self, **kwargs):
|
22 |
budget_manager = BudgetManager()
|
23 |
+
total_resource_budget = budget_manager.get_total_resource_budget()
|
24 |
+
current_resource_usage = budget_manager.get_current_resource_usage()
|
25 |
+
current_remaining_resource_budget = budget_manager.get_current_remaining_resource_budget()
|
26 |
+
|
27 |
+
total_expense_budget = budget_manager.get_total_expense_budget()
|
28 |
current_expense = budget_manager.get_current_expense()
|
29 |
+
current_remaining_expense_budget = budget_manager.get_total_expense_budget() - budget_manager.get_current_expense()
|
30 |
return {
|
31 |
"status": "success",
|
32 |
"message": "Budget retrieved successfully",
|
33 |
"output": {
|
34 |
+
"total_resource_budget": total_resource_budget,
|
35 |
+
"current_resource_usage": current_resource_usage,
|
36 |
+
"current_remaining_resource_budget": current_remaining_resource_budget,
|
37 |
+
"total_expense_budget": total_expense_budget,
|
38 |
"current_expense": current_expense,
|
39 |
+
"current_remaining_expense_budget": current_remaining_expense_budget,
|
40 |
}
|
41 |
}
|
42 |
|
src/tools/default_tools/google_search_tool.py
CHANGED
@@ -8,7 +8,7 @@ class GoogleSearchTool():
|
|
8 |
|
9 |
inputSchema = {
|
10 |
"name": "GoogleSearchTool",
|
11 |
-
"description": "Provides a list of URLs from google search results based on a query string.",
|
12 |
"parameters": {
|
13 |
"type": "object",
|
14 |
"properties": {
|
|
|
8 |
|
9 |
inputSchema = {
|
10 |
"name": "GoogleSearchTool",
|
11 |
+
"description": "Provides a list of URLs from google search results based on a query string. Use the urls then to get the content of the page.",
|
12 |
"parameters": {
|
13 |
"type": "object",
|
14 |
"properties": {
|
src/tools/default_tools/memory_manager.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
__all__ = ['MemoryManager']
|
3 |
|
4 |
import json
|
|
|
5 |
|
6 |
|
7 |
class MemoryManager():
|
@@ -9,7 +10,7 @@ class MemoryManager():
|
|
9 |
|
10 |
inputSchema = {
|
11 |
"name": "MemoryManager",
|
12 |
-
"description": "Updates, retrieves, or deletes the memory
|
13 |
"parameters": {
|
14 |
"type": "object",
|
15 |
"properties":{
|
@@ -43,11 +44,13 @@ class MemoryManager():
|
|
43 |
return memory_list
|
44 |
|
45 |
def update_memories(self, memories):
|
46 |
-
|
|
|
47 |
with open("src/data/memory.json", "w") as f:
|
48 |
json.dump(memories, f, indent=4)
|
49 |
|
50 |
|
|
|
51 |
def run(self, **kwargs):
|
52 |
# save it to src/data/memory.json
|
53 |
action = kwargs.get("action")
|
|
|
2 |
__all__ = ['MemoryManager']
|
3 |
|
4 |
import json
|
5 |
+
import os
|
6 |
|
7 |
|
8 |
class MemoryManager():
|
|
|
10 |
|
11 |
inputSchema = {
|
12 |
"name": "MemoryManager",
|
13 |
+
"description": "Updates, retrieves, or deletes the memory for a user. Only store important information such as user preferences, error solutions, and other information that can help you improve your performance.",
|
14 |
"parameters": {
|
15 |
"type": "object",
|
16 |
"properties":{
|
|
|
44 |
return memory_list
|
45 |
|
46 |
def update_memories(self, memories):
|
47 |
+
os.makedirs("src/data", exist_ok=True)
|
48 |
+
# Save the memory to src/data/memory.json
|
49 |
with open("src/data/memory.json", "w") as f:
|
50 |
json.dump(memories, f, indent=4)
|
51 |
|
52 |
|
53 |
+
|
54 |
def run(self, **kwargs):
|
55 |
# save it to src/data/memory.json
|
56 |
action = kwargs.get("action")
|
src/tools/default_tools/test_cost/agent_creator_tool.py
CHANGED
@@ -150,8 +150,8 @@ class AgentCreator():
|
|
150 |
base_model=base_model,
|
151 |
system_prompt=system_prompt,
|
152 |
description=description,
|
153 |
-
|
154 |
-
|
155 |
)
|
156 |
except ValueError as e:
|
157 |
return {
|
|
|
150 |
base_model=base_model,
|
151 |
system_prompt=system_prompt,
|
152 |
description=description,
|
153 |
+
create_resource_cost=create_cost,
|
154 |
+
invoke_resource_cost=invoke_cost
|
155 |
)
|
156 |
except ValueError as e:
|
157 |
return {
|
src/tools/default_tools/tool_deletor.py
CHANGED
@@ -33,7 +33,7 @@ class ToolDeletor():
|
|
33 |
if not file_path.startswith("src/tools/user_tools/"):
|
34 |
return {
|
35 |
"status": "error",
|
36 |
-
"message": "File path must start with tools/",
|
37 |
"output": None
|
38 |
}
|
39 |
os = importlib.import_module("os")
|
|
|
33 |
if not file_path.startswith("src/tools/user_tools/"):
|
34 |
return {
|
35 |
"status": "error",
|
36 |
+
"message": "File path must start with src/tools/user_tools/",
|
37 |
"output": None
|
38 |
}
|
39 |
os = importlib.import_module("os")
|
src/tools/user_tools/get_website_tool.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
import importlib
|
|
|
|
|
|
|
2 |
|
3 |
__all__ = ['GetWebsiteTool']
|
4 |
|
@@ -8,7 +11,7 @@ class GetWebsiteTool():
|
|
8 |
|
9 |
inputSchema = {
|
10 |
"name": "GetWebsiteTool",
|
11 |
-
"description": "Returns the content of a website based on a query string.",
|
12 |
"parameters": {
|
13 |
"type": "object",
|
14 |
"properties": {
|
@@ -21,6 +24,57 @@ class GetWebsiteTool():
|
|
21 |
}
|
22 |
}
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def run(self, **kwargs):
|
25 |
headers = {
|
26 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0',
|
@@ -46,7 +100,7 @@ class GetWebsiteTool():
|
|
46 |
"message": "Missing required parameters: 'url'",
|
47 |
"output": None
|
48 |
}
|
49 |
-
|
50 |
output = None
|
51 |
requests = importlib.import_module("requests")
|
52 |
bs4 = importlib.import_module("bs4")
|
@@ -57,17 +111,16 @@ class GetWebsiteTool():
|
|
57 |
# Parse the content using BeautifulSoup
|
58 |
soup = BeautifulSoup(response.content, 'html.parser')
|
59 |
# Extract text from the parsed HTML
|
60 |
-
|
|
|
|
|
|
|
61 |
else:
|
62 |
return {
|
63 |
"status": "error",
|
64 |
"message": f"Failed to fetch content from {url}. Status code: {response.status_code}",
|
65 |
"output": None
|
66 |
}
|
67 |
-
|
68 |
-
# truncate the results to avoid excessive output
|
69 |
-
if len(output) > 1000:
|
70 |
-
output = output[:1000] + "... (truncated)"
|
71 |
|
72 |
return {
|
73 |
"status": "success",
|
|
|
1 |
import importlib
|
2 |
+
from collections import defaultdict
|
3 |
+
import re
|
4 |
+
import time
|
5 |
|
6 |
__all__ = ['GetWebsiteTool']
|
7 |
|
|
|
11 |
|
12 |
inputSchema = {
|
13 |
"name": "GetWebsiteTool",
|
14 |
+
"description": "Returns a summary of the content of a website based on a query string.",
|
15 |
"parameters": {
|
16 |
"type": "object",
|
17 |
"properties": {
|
|
|
24 |
}
|
25 |
}
|
26 |
|
27 |
+
def summarize_text(self, text):
|
28 |
+
# Clean the text more thoroughly
|
29 |
+
text = re.sub(r'\[[0-9]*\]', ' ', text)
|
30 |
+
text = re.sub(r'\s+', ' ', text)
|
31 |
+
text = re.sub(r'[^a-zA-Z0-9.\s]', '', text) # Remove special characters except periods
|
32 |
+
|
33 |
+
# Tokenize into sentences
|
34 |
+
sentences = re.split(r'(?<=[.!?])\s+', text)
|
35 |
+
sentences = [s.strip() for s in sentences if s]
|
36 |
+
|
37 |
+
# Calculate word frequencies
|
38 |
+
word_frequencies = defaultdict(int)
|
39 |
+
for sentence in sentences:
|
40 |
+
words = sentence.lower().split()
|
41 |
+
for word in words:
|
42 |
+
word_frequencies[word] += 1
|
43 |
+
|
44 |
+
# Normalize word frequencies
|
45 |
+
total_words = sum(word_frequencies.values())
|
46 |
+
if total_words > 0:
|
47 |
+
for word in word_frequencies:
|
48 |
+
word_frequencies[word] /= total_words
|
49 |
+
|
50 |
+
# Calculate sentence scores based on word frequencies, sentence length, and coherence
|
51 |
+
sentence_scores = {}
|
52 |
+
for i, sentence in enumerate(sentences):
|
53 |
+
score = 0
|
54 |
+
words = sentence.lower().split()
|
55 |
+
for word in words:
|
56 |
+
score += word_frequencies[word]
|
57 |
+
|
58 |
+
# Consider sentence length
|
59 |
+
sentence_length_factor = 1 - abs(len(words) - 15) / 15 # Prefer sentences around 15 words
|
60 |
+
score += sentence_length_factor * 0.1
|
61 |
+
|
62 |
+
# Add a coherence score
|
63 |
+
if i > 0 and sentences[i-1] in sentence_scores:
|
64 |
+
previous_sentence_words = sentences[i-1].lower().split()
|
65 |
+
common_words = set(words) & set(previous_sentence_words)
|
66 |
+
coherence_score = len(common_words) / len(words)
|
67 |
+
score += coherence_score * 0.1
|
68 |
+
|
69 |
+
sentence_scores[sentence] = score
|
70 |
+
|
71 |
+
# Get the top 3 sentences with the highest scores
|
72 |
+
ranked_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:3]
|
73 |
+
|
74 |
+
# Generate the summary
|
75 |
+
summary = ". ".join(ranked_sentences) + "."
|
76 |
+
return summary
|
77 |
+
|
78 |
def run(self, **kwargs):
|
79 |
headers = {
|
80 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0',
|
|
|
100 |
"message": "Missing required parameters: 'url'",
|
101 |
"output": None
|
102 |
}
|
103 |
+
|
104 |
output = None
|
105 |
requests = importlib.import_module("requests")
|
106 |
bs4 = importlib.import_module("bs4")
|
|
|
111 |
# Parse the content using BeautifulSoup
|
112 |
soup = BeautifulSoup(response.content, 'html.parser')
|
113 |
# Extract text from the parsed HTML
|
114 |
+
text = soup.get_text()
|
115 |
+
|
116 |
+
# Summarize the text
|
117 |
+
output = self.summarize_text(text)
|
118 |
else:
|
119 |
return {
|
120 |
"status": "error",
|
121 |
"message": f"Failed to fetch content from {url}. Status code: {response.status_code}",
|
122 |
"output": None
|
123 |
}
|
|
|
|
|
|
|
|
|
124 |
|
125 |
return {
|
126 |
"status": "success",
|
src/tools/user_tools/python_sandbox_tool.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import subprocess
|
3 |
+
import sys
|
4 |
+
import os
|
5 |
+
import tempfile
|
6 |
+
|
7 |
+
__all__ = ['PythonSandboxTool']
|
8 |
+
|
9 |
+
class PythonSandboxTool():
|
10 |
+
dependencies = []
|
11 |
+
|
12 |
+
inputSchema = {
|
13 |
+
"name": "PythonSandboxTool",
|
14 |
+
"description": "Executes Python code in a sandbox environment.",
|
15 |
+
"parameters": {
|
16 |
+
"type": "object",
|
17 |
+
"properties": {
|
18 |
+
"code": {
|
19 |
+
"type": "string",
|
20 |
+
"description": "The Python code to execute."
|
21 |
+
}
|
22 |
+
},
|
23 |
+
"required": ["code"]
|
24 |
+
}
|
25 |
+
}
|
26 |
+
|
27 |
+
def run(self, **kwargs):
|
28 |
+
code = kwargs.get("code")
|
29 |
+
if not code:
|
30 |
+
return {"status": "error", "message": "Missing required parameter: 'code'", "output": None}
|
31 |
+
|
32 |
+
# Create a temporary directory
|
33 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
34 |
+
# Create a temporary file inside the directory
|
35 |
+
with tempfile.NamedTemporaryFile(suffix='.py', mode='w', delete=False, dir=tmpdir) as temp_file:
|
36 |
+
temp_file.write(code)
|
37 |
+
temp_file_name = temp_file.name
|
38 |
+
|
39 |
+
# Construct the command to execute the Python code
|
40 |
+
command = [sys.executable, temp_file_name]
|
41 |
+
|
42 |
+
try:
|
43 |
+
# Execute the command in a subprocess
|
44 |
+
process = subprocess.run(
|
45 |
+
command,
|
46 |
+
capture_output=True,
|
47 |
+
text=True,
|
48 |
+
timeout=10, # Timeout after 10 seconds
|
49 |
+
check=False # Do not raise an exception on non-zero exit code
|
50 |
+
)
|
51 |
+
|
52 |
+
# Get the output and error messages
|
53 |
+
stdout = process.stdout
|
54 |
+
stderr = process.stderr
|
55 |
+
|
56 |
+
# Check the return code
|
57 |
+
return_code = process.returncode
|
58 |
+
|
59 |
+
# Prepare the result
|
60 |
+
result = {
|
61 |
+
"stdout": stdout,
|
62 |
+
"stderr": stderr,
|
63 |
+
"return_code": return_code
|
64 |
+
}
|
65 |
+
|
66 |
+
# Return the result
|
67 |
+
return {"status": "success", "message": "Python code executed successfully.", "output": result}
|
68 |
+
|
69 |
+
except subprocess.TimeoutExpired:
|
70 |
+
return {"status": "error", "message": "Python code execution timed out.", "output": None}
|
71 |
+
except Exception as e:
|
72 |
+
return {"status": "error", "message": f"Python code execution failed: {str(e)}", "output": None}
|
73 |
+
|
74 |
+
|
src/tools/user_tools/weather_tool.py
CHANGED
@@ -19,7 +19,7 @@ class WeatherApi():
|
|
19 |
},
|
20 |
"required": ["location"],
|
21 |
},
|
22 |
-
"
|
23 |
}
|
24 |
|
25 |
def run(self, **kwargs):
|
|
|
19 |
},
|
20 |
"required": ["location"],
|
21 |
},
|
22 |
+
"invoke_resource_cost": 0.1,
|
23 |
}
|
24 |
|
25 |
def run(self, **kwargs):
|