Spaces:
Sleeping
Sleeping
Commit
·
326479a
1
Parent(s):
66d6d1f
- download model
Browse files
app.py
CHANGED
@@ -19,10 +19,9 @@ class GaiaToolCallingAgent:
|
|
19 |
def __init__(self, local_model=None):
|
20 |
print("GaiaToolCallingAgent initialized.")
|
21 |
self.tool_manager = ToolManager()
|
22 |
-
self.name = "tool_agent"
|
23 |
-
self.description = "A specialized agent that uses various tools to answer questions"
|
24 |
|
25 |
-
# Use local model if provided, or create a simpler one
|
26 |
self.local_model = local_model
|
27 |
if not self.local_model:
|
28 |
try:
|
@@ -35,13 +34,10 @@ class GaiaToolCallingAgent:
|
|
35 |
self.local_model = None
|
36 |
|
37 |
def run(self, query: str) -> str:
|
38 |
-
"""Process a query and return a response using available tools."""
|
39 |
print(f"Processing query: {query}")
|
40 |
tools = self.tool_manager.get_tools()
|
41 |
|
42 |
-
# For each tool, try to get relevant information
|
43 |
context_info = []
|
44 |
-
|
45 |
for tool in tools:
|
46 |
try:
|
47 |
if self._should_use_tool(tool, query):
|
@@ -52,10 +48,8 @@ class GaiaToolCallingAgent:
|
|
52 |
except Exception as e:
|
53 |
print(f"Error using {tool.name}: {e}")
|
54 |
|
55 |
-
# Combine all context information
|
56 |
full_context = "\n\n".join(context_info) if context_info else ""
|
57 |
|
58 |
-
# If we have context and a local model, generate a proper response
|
59 |
if full_context and self.local_model:
|
60 |
try:
|
61 |
prompt = f"""
|
@@ -71,24 +65,18 @@ class GaiaToolCallingAgent:
|
|
71 |
return response
|
72 |
except Exception as e:
|
73 |
print(f"Error generating response with local model: {e}")
|
74 |
-
# Fall back to returning just the context
|
75 |
return full_context
|
76 |
else:
|
77 |
-
# No context or no model, return whatever we have
|
78 |
if not full_context:
|
79 |
return "I couldn't find any relevant information to answer your question."
|
80 |
return full_context
|
81 |
|
82 |
def __call__(self, query: str) -> str:
|
83 |
-
"""Make the agent callable so it can be used directly by CodeAgent."""
|
84 |
print(f"Tool agent received query: {query}")
|
85 |
return self.run(query)
|
86 |
|
87 |
def _should_use_tool(self, tool: Tool, query: str) -> bool:
|
88 |
-
"""Determine if a specific tool should be used for the query."""
|
89 |
query_lower = query.lower()
|
90 |
-
|
91 |
-
# Tool-specific patterns
|
92 |
patterns = {
|
93 |
"web_search": ["current", "latest", "recent", "who", "what", "when", "where", "how"],
|
94 |
"web_content": ["content", "webpage", "website", "page"],
|
@@ -96,77 +84,70 @@ class GaiaToolCallingAgent:
|
|
96 |
"wikipedia_search": ["wikipedia", "wiki", "article"],
|
97 |
"gaia_retriever": ["gaia", "agent", "ai", "artificial intelligence"]
|
98 |
}
|
99 |
-
|
100 |
-
# Use all tools if patterns dict doesn't have the tool name
|
101 |
if tool.name not in patterns:
|
102 |
return True
|
103 |
-
|
104 |
return any(pattern in query_lower for pattern in patterns.get(tool.name, []))
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
def create_manager_agent() -> CodeAgent:
|
107 |
-
"""Create and configure the main GAIA agent."""
|
108 |
-
|
109 |
try:
|
110 |
-
|
111 |
-
from config import LOCAL_MODEL_CONFIG, USE_LLAMACPP, LLAMACPP_CONFIG
|
112 |
|
113 |
-
# Use llama-cpp-python model (no PyTorch dependency)
|
114 |
if USE_LLAMACPP:
|
115 |
-
#
|
|
|
|
|
|
|
|
|
|
|
116 |
model = LlamaCppModel(
|
117 |
-
model_path=
|
118 |
-
model_url=LLAMACPP_CONFIG.get("model_url"),
|
119 |
n_ctx=LLAMACPP_CONFIG.get("n_ctx", 2048),
|
120 |
n_gpu_layers=LLAMACPP_CONFIG.get("n_gpu_layers", 0),
|
121 |
-
max_tokens=LLAMACPP_CONFIG.get("max_tokens", 512),
|
122 |
temperature=LLAMACPP_CONFIG.get("temperature", 0.7)
|
123 |
)
|
124 |
-
print(f"Using LlamaCpp model")
|
125 |
else:
|
126 |
-
# Use a simpler stub model if needed
|
127 |
from smolagents import StubModel
|
128 |
model = StubModel()
|
129 |
print("Using StubModel as fallback")
|
130 |
-
|
131 |
except Exception as e:
|
132 |
print(f"Error setting up model: {e}")
|
133 |
-
# Use a simplified configuration as fallback
|
134 |
try:
|
135 |
-
# Simple fallback with default params
|
136 |
model = LlamaCppModel()
|
137 |
print("Using fallback LlamaCpp model configuration")
|
138 |
except Exception as e2:
|
139 |
-
# Last resort fallback
|
140 |
from smolagents import StubModel
|
141 |
model = StubModel()
|
142 |
print(f"Using StubModel due to error: {e2}")
|
143 |
|
144 |
-
# Initialize the managed tool-calling agent, sharing the model
|
145 |
tool_agent = GaiaToolCallingAgent(local_model=model)
|
146 |
|
147 |
-
# Create the manager agent
|
148 |
manager_agent = CodeAgent(
|
149 |
model=model,
|
150 |
-
tools=[],
|
151 |
managed_agents=[tool_agent],
|
152 |
additional_authorized_imports=[
|
153 |
-
"json",
|
154 |
-
"pandas",
|
155 |
-
"numpy",
|
156 |
-
"re",
|
157 |
-
"requests",
|
158 |
-
"bs4"
|
159 |
],
|
160 |
planning_interval=3,
|
161 |
verbosity_level=2,
|
162 |
max_steps=10
|
163 |
)
|
164 |
-
|
165 |
print("Manager agent created with local model")
|
166 |
return manager_agent
|
167 |
|
168 |
def create_agent():
|
169 |
-
"""Create the GAIA agent system."""
|
170 |
try:
|
171 |
print("Initializing GAIA agent system...")
|
172 |
return create_manager_agent()
|
@@ -175,13 +156,7 @@ def create_agent():
|
|
175 |
return None
|
176 |
|
177 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
178 |
-
""
|
179 |
-
Fetches all questions, runs the GAIA agent on them, submits all answers,
|
180 |
-
and displays the results.
|
181 |
-
"""
|
182 |
-
# --- Determine HF Space Runtime URL and Repo URL ---
|
183 |
-
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
184 |
-
|
185 |
if profile:
|
186 |
username = f"{profile.username}"
|
187 |
print(f"User logged in: {username}")
|
@@ -193,7 +168,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
193 |
questions_url = f"{api_url}/questions"
|
194 |
submit_url = f"{api_url}/submit"
|
195 |
|
196 |
-
# 1. Initialize Agent
|
197 |
try:
|
198 |
print("Initializing GAIA agent system...")
|
199 |
agent = create_agent()
|
@@ -204,7 +178,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
204 |
print(f"Error initializing agent: {e}")
|
205 |
return f"Error initializing agent: {e}", None
|
206 |
|
207 |
-
# 2. Fetch Questions
|
208 |
print(f"Fetching questions from: {questions_url}")
|
209 |
try:
|
210 |
response = requests.get(questions_url, timeout=15)
|
@@ -221,7 +194,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
221 |
print(f"An unexpected error occurred fetching questions: {e}")
|
222 |
return f"An unexpected error occurred fetching questions: {e}", None
|
223 |
|
224 |
-
# 3. Run Agent on Questions
|
225 |
results_log = []
|
226 |
answers_payload = []
|
227 |
print(f"Running agent on {len(questions_data)} questions...")
|
@@ -232,28 +204,20 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
232 |
print(f"Skipping item with missing task_id or question: {item}")
|
233 |
continue
|
234 |
try:
|
235 |
-
# Run the agent and get the response
|
236 |
response = agent.run(f"Answer this question concisely: {question_text}")
|
237 |
-
|
238 |
-
# Clean up the response if needed
|
239 |
if isinstance(response, dict):
|
240 |
submitted_answer = response.get("answer", str(response))
|
241 |
else:
|
242 |
submitted_answer = str(response)
|
243 |
-
|
244 |
-
# Add to submission payload
|
245 |
answers_payload.append({
|
246 |
"task_id": task_id,
|
247 |
"submitted_answer": submitted_answer
|
248 |
})
|
249 |
-
|
250 |
-
# Log the result
|
251 |
results_log.append({
|
252 |
"Task ID": task_id,
|
253 |
"Question": question_text,
|
254 |
"Submitted Answer": submitted_answer
|
255 |
})
|
256 |
-
|
257 |
except Exception as e:
|
258 |
print(f"Error running agent on task {task_id}: {e}")
|
259 |
results_log.append({
|
@@ -266,19 +230,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
266 |
print("Agent did not produce any answers to submit.")
|
267 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
268 |
|
269 |
-
# 4. Prepare Submission
|
270 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
271 |
submission_data = {
|
272 |
"username": username.strip(),
|
273 |
"agent_code": agent_code,
|
274 |
"answers": answers_payload
|
275 |
-
}
|
|
|
276 |
print(f"Submitting {len(answers_payload)} answers to API...")
|
277 |
try:
|
278 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
279 |
response.raise_for_status()
|
280 |
result_data = response.json()
|
281 |
-
|
282 |
status_message = (
|
283 |
f"Submission Successful!\n"
|
284 |
f"User: {result_data.get('username')}\n"
|
@@ -293,29 +256,19 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
293 |
print(f"Error during submission: {e}")
|
294 |
return status_message, pd.DataFrame(results_log)
|
295 |
|
296 |
-
# --- Build Gradio Interface using Blocks ---
|
297 |
with gr.Blocks() as demo:
|
298 |
gr.Markdown("# GAIA Agent Evaluation Runner")
|
299 |
-
gr.Markdown(
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
The agent uses a managed tool-calling architecture and the smolagents framework for reliable answers.
|
306 |
-
"""
|
307 |
-
)
|
308 |
-
|
309 |
gr.LoginButton()
|
310 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
311 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
312 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
313 |
-
|
314 |
-
run_button.click(
|
315 |
-
fn=run_and_submit_all,
|
316 |
-
outputs=[status_output, results_table]
|
317 |
-
)
|
318 |
|
319 |
if __name__ == "__main__":
|
320 |
print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
|
321 |
-
demo.launch(debug=True, share=False)
|
|
|
19 |
def __init__(self, local_model=None):
|
20 |
print("GaiaToolCallingAgent initialized.")
|
21 |
self.tool_manager = ToolManager()
|
22 |
+
self.name = "tool_agent"
|
23 |
+
self.description = "A specialized agent that uses various tools to answer questions"
|
24 |
|
|
|
25 |
self.local_model = local_model
|
26 |
if not self.local_model:
|
27 |
try:
|
|
|
34 |
self.local_model = None
|
35 |
|
36 |
def run(self, query: str) -> str:
|
|
|
37 |
print(f"Processing query: {query}")
|
38 |
tools = self.tool_manager.get_tools()
|
39 |
|
|
|
40 |
context_info = []
|
|
|
41 |
for tool in tools:
|
42 |
try:
|
43 |
if self._should_use_tool(tool, query):
|
|
|
48 |
except Exception as e:
|
49 |
print(f"Error using {tool.name}: {e}")
|
50 |
|
|
|
51 |
full_context = "\n\n".join(context_info) if context_info else ""
|
52 |
|
|
|
53 |
if full_context and self.local_model:
|
54 |
try:
|
55 |
prompt = f"""
|
|
|
65 |
return response
|
66 |
except Exception as e:
|
67 |
print(f"Error generating response with local model: {e}")
|
|
|
68 |
return full_context
|
69 |
else:
|
|
|
70 |
if not full_context:
|
71 |
return "I couldn't find any relevant information to answer your question."
|
72 |
return full_context
|
73 |
|
74 |
def __call__(self, query: str) -> str:
|
|
|
75 |
print(f"Tool agent received query: {query}")
|
76 |
return self.run(query)
|
77 |
|
78 |
def _should_use_tool(self, tool: Tool, query: str) -> bool:
|
|
|
79 |
query_lower = query.lower()
|
|
|
|
|
80 |
patterns = {
|
81 |
"web_search": ["current", "latest", "recent", "who", "what", "when", "where", "how"],
|
82 |
"web_content": ["content", "webpage", "website", "page"],
|
|
|
84 |
"wikipedia_search": ["wikipedia", "wiki", "article"],
|
85 |
"gaia_retriever": ["gaia", "agent", "ai", "artificial intelligence"]
|
86 |
}
|
|
|
|
|
87 |
if tool.name not in patterns:
|
88 |
return True
|
|
|
89 |
return any(pattern in query_lower for pattern in patterns.get(tool.name, []))
|
90 |
|
91 |
+
def download_model_if_needed(model_path, model_url):
|
92 |
+
if not os.path.exists(model_path):
|
93 |
+
print(f"Downloading model from {model_url}...")
|
94 |
+
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
95 |
+
with requests.get(model_url, stream=True) as response:
|
96 |
+
response.raise_for_status()
|
97 |
+
with open(model_path, "wb") as f:
|
98 |
+
for chunk in response.iter_content(chunk_size=8192):
|
99 |
+
f.write(chunk)
|
100 |
+
print("Download complete.")
|
101 |
+
|
102 |
def create_manager_agent() -> CodeAgent:
|
|
|
|
|
103 |
try:
|
104 |
+
from config import USE_LLAMACPP, LLAMACPP_CONFIG
|
|
|
105 |
|
|
|
106 |
if USE_LLAMACPP:
|
107 |
+
# Use TheBloke's model with auto-download
|
108 |
+
model_path = LLAMACPP_CONFIG.get("model_path") or "./models/llama-2-7b.Q4_0.gguf"
|
109 |
+
model_url = "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf"
|
110 |
+
|
111 |
+
download_model_if_needed(model_path, model_url)
|
112 |
+
|
113 |
model = LlamaCppModel(
|
114 |
+
model_path=model_path,
|
|
|
115 |
n_ctx=LLAMACPP_CONFIG.get("n_ctx", 2048),
|
116 |
n_gpu_layers=LLAMACPP_CONFIG.get("n_gpu_layers", 0),
|
|
|
117 |
temperature=LLAMACPP_CONFIG.get("temperature", 0.7)
|
118 |
)
|
119 |
+
print(f"Using LlamaCpp model from {model_path}")
|
120 |
else:
|
|
|
121 |
from smolagents import StubModel
|
122 |
model = StubModel()
|
123 |
print("Using StubModel as fallback")
|
|
|
124 |
except Exception as e:
|
125 |
print(f"Error setting up model: {e}")
|
|
|
126 |
try:
|
|
|
127 |
model = LlamaCppModel()
|
128 |
print("Using fallback LlamaCpp model configuration")
|
129 |
except Exception as e2:
|
|
|
130 |
from smolagents import StubModel
|
131 |
model = StubModel()
|
132 |
print(f"Using StubModel due to error: {e2}")
|
133 |
|
|
|
134 |
tool_agent = GaiaToolCallingAgent(local_model=model)
|
135 |
|
|
|
136 |
manager_agent = CodeAgent(
|
137 |
model=model,
|
138 |
+
tools=[],
|
139 |
managed_agents=[tool_agent],
|
140 |
additional_authorized_imports=[
|
141 |
+
"json", "pandas", "numpy", "re", "requests", "bs4"
|
|
|
|
|
|
|
|
|
|
|
142 |
],
|
143 |
planning_interval=3,
|
144 |
verbosity_level=2,
|
145 |
max_steps=10
|
146 |
)
|
|
|
147 |
print("Manager agent created with local model")
|
148 |
return manager_agent
|
149 |
|
150 |
def create_agent():
|
|
|
151 |
try:
|
152 |
print("Initializing GAIA agent system...")
|
153 |
return create_manager_agent()
|
|
|
156 |
return None
|
157 |
|
158 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
159 |
+
space_id = os.getenv("SPACE_ID")
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
if profile:
|
161 |
username = f"{profile.username}"
|
162 |
print(f"User logged in: {username}")
|
|
|
168 |
questions_url = f"{api_url}/questions"
|
169 |
submit_url = f"{api_url}/submit"
|
170 |
|
|
|
171 |
try:
|
172 |
print("Initializing GAIA agent system...")
|
173 |
agent = create_agent()
|
|
|
178 |
print(f"Error initializing agent: {e}")
|
179 |
return f"Error initializing agent: {e}", None
|
180 |
|
|
|
181 |
print(f"Fetching questions from: {questions_url}")
|
182 |
try:
|
183 |
response = requests.get(questions_url, timeout=15)
|
|
|
194 |
print(f"An unexpected error occurred fetching questions: {e}")
|
195 |
return f"An unexpected error occurred fetching questions: {e}", None
|
196 |
|
|
|
197 |
results_log = []
|
198 |
answers_payload = []
|
199 |
print(f"Running agent on {len(questions_data)} questions...")
|
|
|
204 |
print(f"Skipping item with missing task_id or question: {item}")
|
205 |
continue
|
206 |
try:
|
|
|
207 |
response = agent.run(f"Answer this question concisely: {question_text}")
|
|
|
|
|
208 |
if isinstance(response, dict):
|
209 |
submitted_answer = response.get("answer", str(response))
|
210 |
else:
|
211 |
submitted_answer = str(response)
|
|
|
|
|
212 |
answers_payload.append({
|
213 |
"task_id": task_id,
|
214 |
"submitted_answer": submitted_answer
|
215 |
})
|
|
|
|
|
216 |
results_log.append({
|
217 |
"Task ID": task_id,
|
218 |
"Question": question_text,
|
219 |
"Submitted Answer": submitted_answer
|
220 |
})
|
|
|
221 |
except Exception as e:
|
222 |
print(f"Error running agent on task {task_id}: {e}")
|
223 |
results_log.append({
|
|
|
230 |
print("Agent did not produce any answers to submit.")
|
231 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
232 |
|
|
|
233 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
234 |
submission_data = {
|
235 |
"username": username.strip(),
|
236 |
"agent_code": agent_code,
|
237 |
"answers": answers_payload
|
238 |
+
}
|
239 |
+
|
240 |
print(f"Submitting {len(answers_payload)} answers to API...")
|
241 |
try:
|
242 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
243 |
response.raise_for_status()
|
244 |
result_data = response.json()
|
|
|
245 |
status_message = (
|
246 |
f"Submission Successful!\n"
|
247 |
f"User: {result_data.get('username')}\n"
|
|
|
256 |
print(f"Error during submission: {e}")
|
257 |
return status_message, pd.DataFrame(results_log)
|
258 |
|
|
|
259 |
with gr.Blocks() as demo:
|
260 |
gr.Markdown("# GAIA Agent Evaluation Runner")
|
261 |
+
gr.Markdown("""
|
262 |
+
**Instructions:**
|
263 |
+
1. Log in to your Hugging Face account using the button below.
|
264 |
+
2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and see the score.
|
265 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
266 |
gr.LoginButton()
|
267 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
268 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
269 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
270 |
+
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
|
|
|
|
|
|
|
|
271 |
|
272 |
if __name__ == "__main__":
|
273 |
print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
|
274 |
+
demo.launch(debug=True, share=False)
|
config.py
CHANGED
@@ -12,12 +12,21 @@ USE_LOCAL_MODEL = True # Set to False to use remote API model instead
|
|
12 |
USE_LLAMACPP = True # Set to True to use llama-cpp-python instead of transformers
|
13 |
|
14 |
# Configuration for llama-cpp-python model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
LLAMACPP_CONFIG = {
|
16 |
-
"model_path": None, #
|
17 |
-
|
18 |
-
"model_url": "https://huggingface.co/eachadea/ggml-gridlocked-alpha-3b/resolve/main/ggml-gridlocked-3b-q4_0.bin",
|
19 |
"n_ctx": 2048,
|
20 |
-
"n_gpu_layers": 0, #
|
21 |
"max_tokens": 1024,
|
22 |
"temperature": 0.7
|
23 |
}
|
|
|
12 |
USE_LLAMACPP = True # Set to True to use llama-cpp-python instead of transformers
|
13 |
|
14 |
# Configuration for llama-cpp-python model
|
15 |
+
# LLAMACPP_CONFIG = {
|
16 |
+
# "model_path": None, # Will use a default small model if None
|
17 |
+
# # Using a smaller GGUF model to avoid download issues
|
18 |
+
# "model_url": "https://huggingface.co/eachadea/ggml-gridlocked-alpha-3b/resolve/main/ggml-gridlocked-3b-q4_0.bin",
|
19 |
+
# "n_ctx": 2048,
|
20 |
+
# "n_gpu_layers": 0, # Use 0 for CPU-only
|
21 |
+
# "max_tokens": 1024,
|
22 |
+
# "temperature": 0.7
|
23 |
+
# }
|
24 |
+
|
25 |
LLAMACPP_CONFIG = {
|
26 |
+
"model_path": None, # Use a default or provide a local path manually
|
27 |
+
"model_url": "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf",
|
|
|
28 |
"n_ctx": 2048,
|
29 |
+
"n_gpu_layers": 0, # CPU-only, adjust as needed for GPU
|
30 |
"max_tokens": 1024,
|
31 |
"temperature": 0.7
|
32 |
}
|