Spaces:
Sleeping
Sleeping
Commit
·
c1db1fc
1
Parent(s):
e305927
- changes to get agent working
Browse files- app.py +480 -6
- requirements.txt +7 -1
app.py
CHANGED
@@ -6,17 +6,455 @@ import pandas as pd
|
|
6 |
import time
|
7 |
import json
|
8 |
from typing import Dict, List, Union, Optional
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
# (Keep Constants as is)
|
11 |
# --- Constants ---
|
12 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# --- Basic Agent Definition ---
|
15 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
class BasicAgent:
|
17 |
def __init__(self):
|
18 |
print("BasicAgent initialized.")
|
19 |
# Initialize the Hugging Face API client
|
|
|
20 |
self.hf_api_url = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
|
21 |
self.hf_api_token = os.getenv("HF_API_TOKEN")
|
22 |
if not self.hf_api_token:
|
@@ -150,14 +588,50 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
150 |
|
151 |
api_url = DEFAULT_API_URL
|
152 |
questions_url = f"{api_url}/questions"
|
153 |
-
submit_url = f"{api_url}/submit"
|
154 |
-
|
155 |
-
# 1. Instantiate Agent ( modify this part to create your agent)
|
156 |
try:
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
except Exception as e:
|
159 |
-
print(f"Error instantiating
|
160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
162 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
163 |
print(agent_code)
|
|
|
6 |
import time
|
7 |
import json
|
8 |
from typing import Dict, List, Union, Optional
|
9 |
+
import re
|
10 |
+
from bs4 import BeautifulSoup
|
11 |
+
from duckduckgo_search import DDGS
|
12 |
+
|
13 |
+
from smolagents import Tool, CodeAgent, InferenceClientModel
|
14 |
+
|
15 |
+
import random
|
16 |
+
from smolagents import CodeAgent, InferenceClientModel
|
17 |
+
|
18 |
+
# Import our custom tools from their modules
|
19 |
+
# from smolagents.tools import DuckDuckGoSearchTool, WeatherInfoTool, HubStatsTool
|
20 |
+
# from smolagents.tools import WebPageVisitTool, WebpageContentExtractorTool
|
21 |
+
|
22 |
+
from smolagents import CodeAgent, InferenceClientModel, load_tool
|
23 |
+
|
24 |
+
|
25 |
+
# Import necessary libraries
|
26 |
+
import random
|
27 |
+
from smolagents import CodeAgent, InferenceClientModel
|
28 |
+
|
29 |
+
# Import our custom tools from their modules
|
30 |
+
# from tools import DuckDuckGoSearchTool, WeatherInfoTool, HubStatsTool
|
31 |
+
# from retriever import load_guest_dataset
|
32 |
+
|
33 |
+
from langchain.docstore.document import Document
|
34 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
35 |
+
from langchain_community.retrievers import BM25Retriever
|
36 |
+
import functools
|
37 |
+
|
38 |
+
# Create a knowledge base for the agent
|
39 |
+
GAIA_KNOWLEDGE = """
|
40 |
+
### AI and Agent Concepts
|
41 |
+
- An agent is an autonomous entity that observes and acts upon an environment using sensors and actuators, usually to achieve specific goals.
|
42 |
+
- GAIA (General AI Assistant) is a framework for creating and evaluating AI assistants that can perform a wide range of tasks.
|
43 |
+
- The agent loop consists of perception, reasoning, and action.
|
44 |
+
- RAG (Retrieval-Augmented Generation) combines retrieval of relevant information with generation capabilities of language models.
|
45 |
+
- An LLM (Large Language Model) is a neural network trained on vast amounts of text data to understand and generate human language.
|
46 |
+
|
47 |
+
### Agent Capabilities
|
48 |
+
- Tool use refers to an agent's ability to employ external tools like search engines, APIs, or specialized algorithms.
|
49 |
+
- An effective agent should be able to decompose complex problems into manageable parts.
|
50 |
+
- Chain-of-thought reasoning allows agents to break down problem-solving steps to improve accuracy.
|
51 |
+
- Agents should apply appropriate reasoning strategies based on the type of question (factual, analytical, etc.)
|
52 |
+
- Self-reflection helps agents identify and correct errors in their reasoning.
|
53 |
+
|
54 |
+
### Evaluation Criteria
|
55 |
+
- Agent responses should be accurate, relevant, and factually correct.
|
56 |
+
- Effective agents provide concise yet comprehensive answers.
|
57 |
+
- Agents should acknowledge limitations and uncertainties when appropriate.
|
58 |
+
- Good agents can follow multi-step instructions and fulfill all requirements.
|
59 |
+
- Reasoning transparency helps users understand how the agent arrived at its conclusions.
|
60 |
+
"""
|
61 |
|
62 |
# (Keep Constants as is)
|
63 |
# --- Constants ---
|
64 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
65 |
|
66 |
+
LLAMA_API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct"
|
67 |
+
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
68 |
+
HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
|
69 |
+
MAX_RETRIES = 3
|
70 |
+
RETRY_DELAY = 2 # seconds
|
71 |
+
|
72 |
+
# Create knowledge base documents
|
73 |
+
def create_knowledge_documents():
|
74 |
+
"""Create documents from the knowledge base for retrieval."""
|
75 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
76 |
+
chunk_size=500,
|
77 |
+
chunk_overlap=50,
|
78 |
+
separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
|
79 |
+
)
|
80 |
+
knowledge_chunks = text_splitter.split_text(GAIA_KNOWLEDGE)
|
81 |
+
return [Document(page_content=chunk) for chunk in knowledge_chunks]
|
82 |
+
|
83 |
# --- Basic Agent Definition ---
|
84 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
85 |
+
|
86 |
+
# --- Tools ---
|
87 |
+
class WebSearchTool(Tool):
|
88 |
+
name = "web_search"
|
89 |
+
description = "Search the web for information about a query using DuckDuckGo."
|
90 |
+
inputs = {
|
91 |
+
"query": {
|
92 |
+
"type": "string",
|
93 |
+
"description": "The search query."
|
94 |
+
}
|
95 |
+
}
|
96 |
+
output_type = "string"
|
97 |
+
|
98 |
+
def __init__(self, **kwargs):
|
99 |
+
super().__init__(**kwargs)
|
100 |
+
self.max_results = 3
|
101 |
+
|
102 |
+
def forward(self, query: str) -> str:
|
103 |
+
assert isinstance(query, str), "Query must be a string."
|
104 |
+
try:
|
105 |
+
results = []
|
106 |
+
with DDGS() as ddgs:
|
107 |
+
ddgs_results = list(ddgs.text(query, max_results=self.max_results))
|
108 |
+
|
109 |
+
if not ddgs_results:
|
110 |
+
return "No web search results found."
|
111 |
+
|
112 |
+
formatted_results = "\nWeb Search Results:\n"
|
113 |
+
for i, r in enumerate(ddgs_results, 1):
|
114 |
+
formatted_results += f"\n{i}. {r['title']}\n {r['body']}\n Source: {r['href']}\n"
|
115 |
+
|
116 |
+
return formatted_results
|
117 |
+
except Exception as e:
|
118 |
+
print(f"Error in web search: {str(e)}")
|
119 |
+
return f"Error performing web search: {str(e)}"
|
120 |
+
|
121 |
+
|
122 |
+
class WebContentTool(Tool):
|
123 |
+
name = "web_content"
|
124 |
+
description = "Fetch and extract content from a specific webpage."
|
125 |
+
inputs = {
|
126 |
+
"url": {
|
127 |
+
"type": "string",
|
128 |
+
"description": "The URL of the webpage to fetch content from."
|
129 |
+
}
|
130 |
+
}
|
131 |
+
output_type = "string"
|
132 |
+
|
133 |
+
def forward(self, url: str) -> str:
|
134 |
+
assert isinstance(url, str), "URL must be a string."
|
135 |
+
try:
|
136 |
+
headers = {
|
137 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
138 |
+
}
|
139 |
+
response = requests.get(url, headers=headers, timeout=10)
|
140 |
+
response.raise_for_status()
|
141 |
+
|
142 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
143 |
+
|
144 |
+
# Remove script and style elements
|
145 |
+
for script in soup(["script", "style"]):
|
146 |
+
script.extract()
|
147 |
+
|
148 |
+
# Get text content
|
149 |
+
text = soup.get_text(separator='\n')
|
150 |
+
|
151 |
+
# Clean up text (remove extra whitespace and blank lines)
|
152 |
+
lines = (line.strip() for line in text.splitlines())
|
153 |
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
154 |
+
text = '\n'.join(chunk for chunk in chunks if chunk)
|
155 |
+
|
156 |
+
# Truncate if too long
|
157 |
+
if len(text) > 2000:
|
158 |
+
text = text[:2000] + "... [content truncated]"
|
159 |
+
|
160 |
+
return f"Content from {url}:\n\n{text}"
|
161 |
+
except Exception as e:
|
162 |
+
print(f"Error fetching web content: {str(e)}")
|
163 |
+
return f"Error fetching content from {url}: {str(e)}"
|
164 |
+
|
165 |
+
|
166 |
+
class GaiaRetrieverTool(Tool):
|
167 |
+
name = "gaia_retriever"
|
168 |
+
description = "Semantic search for retrieving relevant information for GaiaAgent."
|
169 |
+
inputs = {
|
170 |
+
"query": {
|
171 |
+
"type": "string",
|
172 |
+
"description": "Query for semantic search."
|
173 |
+
}
|
174 |
+
}
|
175 |
+
output_type = "string"
|
176 |
+
|
177 |
+
def __init__(self, docs, **kwargs):
|
178 |
+
super().__init__(**kwargs)
|
179 |
+
self.retriever = BM25Retriever.from_documents(docs, k=3)
|
180 |
+
self.docs = docs # Store docs for fallback
|
181 |
+
|
182 |
+
def forward(self, query: str) -> str:
|
183 |
+
assert isinstance(query, str), "Query must be a string."
|
184 |
+
try:
|
185 |
+
docs = self.retriever.invoke(query)
|
186 |
+
if not docs:
|
187 |
+
# Fallback to return most relevant general knowledge
|
188 |
+
return "\nNo specific information found. Here's some general knowledge:\n" + "".join([
|
189 |
+
f"\n- {self.docs[i].page_content}" for i in range(min(3, len(self.docs)))
|
190 |
+
])
|
191 |
+
return "\nRetrieved Information:\n" + "".join([
|
192 |
+
f"\n- {doc.page_content}" for doc in docs
|
193 |
+
])
|
194 |
+
except Exception as e:
|
195 |
+
print(f"Error in retriever: {str(e)}")
|
196 |
+
# Return a fallback response
|
197 |
+
return f"Unable to retrieve specific information. The agent will rely on its general knowledge."
|
198 |
+
|
199 |
+
# --- Agent ---
|
200 |
+
class GaiaAgent:
|
201 |
+
def __init__(self):
|
202 |
+
print("GaiaAgent initialized.")
|
203 |
+
# Create knowledge base documents
|
204 |
+
self.knowledge_docs = create_knowledge_documents()
|
205 |
+
|
206 |
+
# Create our tools
|
207 |
+
self.retriever_tool = GaiaRetrieverTool(self.knowledge_docs)
|
208 |
+
self.web_search_tool = WebSearchTool()
|
209 |
+
self.web_content_tool = WebContentTool()
|
210 |
+
|
211 |
+
# Initialize the Hugging Face model
|
212 |
+
self.model = InferenceClientModel()
|
213 |
+
|
214 |
+
# Initialize the web search tool
|
215 |
+
# self.search_tool = DuckDuckGoSearchTool()
|
216 |
+
|
217 |
+
# Initialize the weather tool
|
218 |
+
# self.weather_info_tool = WeatherInfoTool()
|
219 |
+
|
220 |
+
# Initialize the Hub stats tool
|
221 |
+
# self.hub_stats_tool = HubStatsTool()
|
222 |
+
|
223 |
+
# Load the guest dataset and initialize the guest info tool
|
224 |
+
# self.guest_info_tool = load_guest_dataset()
|
225 |
+
|
226 |
+
# Set up LLM API access
|
227 |
+
self.hf_api_url = LLAMA_API_URL
|
228 |
+
self.headers = HEADERS
|
229 |
+
|
230 |
+
# Set up caching for responses
|
231 |
+
self.cache = {}
|
232 |
+
|
233 |
+
def query_llm(self, prompt):
|
234 |
+
"""Send a prompt to the LLM API and return the response."""
|
235 |
+
# Check cache first
|
236 |
+
if prompt in self.cache:
|
237 |
+
print("Using cached response")
|
238 |
+
return self.cache[prompt]
|
239 |
+
|
240 |
+
if not HF_API_TOKEN:
|
241 |
+
# Fallback to rule-based approach if no API token
|
242 |
+
return self.rule_based_answer(prompt)
|
243 |
+
|
244 |
+
payload = {
|
245 |
+
"inputs": prompt,
|
246 |
+
"parameters": {
|
247 |
+
"max_new_tokens": 512,
|
248 |
+
"temperature": 0.7,
|
249 |
+
"top_p": 0.9,
|
250 |
+
"do_sample": True
|
251 |
+
}
|
252 |
+
}
|
253 |
+
|
254 |
+
for attempt in range(MAX_RETRIES):
|
255 |
+
try:
|
256 |
+
response = requests.post(self.hf_api_url, headers=self.headers, json=payload, timeout=30)
|
257 |
+
response.raise_for_status()
|
258 |
+
result = response.json()
|
259 |
+
|
260 |
+
# Extract the generated text from the response
|
261 |
+
if isinstance(result, list) and len(result) > 0:
|
262 |
+
generated_text = result[0].get("generated_text", "")
|
263 |
+
# Clean up the response to get just the answer
|
264 |
+
clean_response = self.clean_response(generated_text, prompt)
|
265 |
+
# Cache the response
|
266 |
+
self.cache[prompt] = clean_response
|
267 |
+
return clean_response
|
268 |
+
return "I couldn't generate a proper response."
|
269 |
+
|
270 |
+
except Exception as e:
|
271 |
+
print(f"Attempt {attempt+1}/{MAX_RETRIES} failed: {str(e)}")
|
272 |
+
if attempt < MAX_RETRIES - 1:
|
273 |
+
time.sleep(RETRY_DELAY)
|
274 |
+
else:
|
275 |
+
# Fall back to rule-based method on failure
|
276 |
+
return self.rule_based_answer(prompt)
|
277 |
+
|
278 |
+
def clean_response(self, response, prompt):
|
279 |
+
"""Clean up the LLM response to extract the answer."""
|
280 |
+
# Remove the prompt from the beginning if it's included
|
281 |
+
if response.startswith(prompt):
|
282 |
+
response = response[len(prompt):]
|
283 |
+
|
284 |
+
# Try to find where the model's actual answer begins
|
285 |
+
markers = ["<answer>", "<response>", "Answer:", "Response:", "Assistant:"]
|
286 |
+
for marker in markers:
|
287 |
+
if marker.lower() in response.lower():
|
288 |
+
parts = response.lower().split(marker.lower(), 1)
|
289 |
+
if len(parts) > 1:
|
290 |
+
response = parts[1].strip()
|
291 |
+
|
292 |
+
# Remove any closing tags if they exist
|
293 |
+
end_markers = ["</answer>", "</response>", "Human:", "User:"]
|
294 |
+
for marker in end_markers:
|
295 |
+
if marker.lower() in response.lower():
|
296 |
+
response = response.lower().split(marker.lower())[0].strip()
|
297 |
+
|
298 |
+
return response.strip()
|
299 |
+
|
300 |
+
def rule_based_answer(self, question):
|
301 |
+
"""Fallback method using rule-based answers for common question types."""
|
302 |
+
question_lower = question.lower()
|
303 |
+
|
304 |
+
# Simple pattern matching for common question types
|
305 |
+
if "what is" in question_lower or "define" in question_lower:
|
306 |
+
if "agent" in question_lower:
|
307 |
+
return "An agent is an autonomous entity that observes and acts upon an environment using sensors and actuators, usually to achieve specific goals."
|
308 |
+
if "gaia" in question_lower:
|
309 |
+
return "GAIA (General AI Assistant) is a framework for creating and evaluating AI assistants that can perform a wide range of tasks."
|
310 |
+
if "llm" in question_lower or "large language model" in question_lower:
|
311 |
+
return "A Large Language Model (LLM) is a neural network trained on vast amounts of text data to understand and generate human language."
|
312 |
+
if "rag" in question_lower or "retrieval" in question_lower:
|
313 |
+
return "RAG (Retrieval-Augmented Generation) combines retrieval of relevant information with generation capabilities of language models."
|
314 |
+
|
315 |
+
if "how to" in question_lower:
|
316 |
+
return "To accomplish this task, you should first understand the requirements, then implement a solution step by step, and finally test your implementation."
|
317 |
+
|
318 |
+
if "example" in question_lower:
|
319 |
+
return "Here's an example implementation that demonstrates the concept in a practical manner."
|
320 |
+
|
321 |
+
if "evaluate" in question_lower or "criteria" in question_lower:
|
322 |
+
return "Evaluation criteria for agents typically include accuracy, relevance, factual correctness, conciseness, ability to follow instructions, and transparency in reasoning."
|
323 |
+
|
324 |
+
# Default response for unmatched questions
|
325 |
+
return "Based on my understanding, the answer involves analyzing the context carefully and applying the relevant principles to arrive at a solution."
|
326 |
+
|
327 |
+
def determine_tools_needed(self, question):
|
328 |
+
"""Determine which tools should be used for a given question."""
|
329 |
+
question_lower = question.lower()
|
330 |
+
|
331 |
+
# Patterns that suggest the need for web search
|
332 |
+
web_search_patterns = [
|
333 |
+
"current", "latest", "recent", "news", "update", "today",
|
334 |
+
"statistics", "data", "facts", "information about",
|
335 |
+
"what is happening", "how many", "where is", "when was"
|
336 |
+
]
|
337 |
+
|
338 |
+
# Check if the question likely needs web search
|
339 |
+
needs_web_search = False
|
340 |
+
for pattern in web_search_patterns:
|
341 |
+
if pattern in question_lower:
|
342 |
+
needs_web_search = True
|
343 |
+
break
|
344 |
+
|
345 |
+
# Check if question appears to be about GAIA, agents, or AI concepts
|
346 |
+
needs_knowledge_retrieval = any(term in question_lower for term in
|
347 |
+
["agent", "gaia", "llm", "ai", "artificial intelligence",
|
348 |
+
"evaluation", "tool", "rag", "retrieval"])
|
349 |
+
|
350 |
+
# Determine which tools to use based on the analysis
|
351 |
+
return {
|
352 |
+
"use_web_search": needs_web_search,
|
353 |
+
"use_knowledge_retrieval": needs_knowledge_retrieval or not needs_web_search, # Fallback to knowledge retrieval
|
354 |
+
"use_webpage_visit": "example" in question_lower or "details" in question_lower or "explain" in question_lower
|
355 |
+
}
|
356 |
+
|
357 |
+
def format_prompt(self, question, knowledge_info="", web_info="", webpage_content=""):
|
358 |
+
"""Format the question into a proper prompt for the LLM."""
|
359 |
+
context = ""
|
360 |
+
|
361 |
+
if knowledge_info:
|
362 |
+
context += f"\nLocal Knowledge Base Information:\n{knowledge_info}\n"
|
363 |
+
|
364 |
+
if web_info:
|
365 |
+
context += f"\nWeb Search Results:\n{web_info}\n"
|
366 |
+
|
367 |
+
if webpage_content:
|
368 |
+
context += f"\nDetailed Web Content:\n{webpage_content}\n"
|
369 |
+
|
370 |
+
if context:
|
371 |
+
return f"""You are an intelligent AI assistant specialized in answering questions about AI agents, GAIA (General AI Assistant), and related concepts.
|
372 |
+
Use the following information to help answer the question accurately. If the information doesn't contain what you need, use your general knowledge.
|
373 |
+
|
374 |
+
{context}
|
375 |
+
|
376 |
+
Question: {question}
|
377 |
+
|
378 |
+
Provide a clear, concise, and accurate answer. Use reasoning steps when appropriate. If you're uncertain, acknowledge limitations.
|
379 |
+
|
380 |
+
Answer:"""
|
381 |
+
else:
|
382 |
+
return f"""You are an intelligent AI assistant specialized in answering questions about AI agents, GAIA (General AI Assistant), and related concepts.
|
383 |
+
|
384 |
+
Question: {question}
|
385 |
+
|
386 |
+
Provide a clear, concise, and accurate answer. Use reasoning steps when appropriate. If you're uncertain, acknowledge limitations.
|
387 |
+
|
388 |
+
Answer:"""
|
389 |
+
|
390 |
+
def __call__(self, question: str) -> str:
|
391 |
+
print(f"GaiaAgent received question (first 50 chars): {question[:50]}...")
|
392 |
+
|
393 |
+
try:
|
394 |
+
# Step 1: Determine which tools to use
|
395 |
+
tool_selection = self.determine_tools_needed(question)
|
396 |
+
|
397 |
+
# Step 2: Gather information from selected tools
|
398 |
+
knowledge_info = ""
|
399 |
+
web_info = ""
|
400 |
+
webpage_content = ""
|
401 |
+
|
402 |
+
# Get knowledge base information
|
403 |
+
if tool_selection["use_knowledge_retrieval"]:
|
404 |
+
try:
|
405 |
+
knowledge_info = self.retriever_tool.forward(question)
|
406 |
+
print("Retrieved knowledge base information")
|
407 |
+
except Exception as e:
|
408 |
+
print(f"Error retrieving knowledge base information: {e}")
|
409 |
+
|
410 |
+
# Get web search results
|
411 |
+
if tool_selection["use_web_search"]:
|
412 |
+
try:
|
413 |
+
web_info = self.web_search_tool.forward(question)
|
414 |
+
print("Retrieved web search results")
|
415 |
+
except Exception as e:
|
416 |
+
print(f"Error with web search: {e}")
|
417 |
+
|
418 |
+
# If web search found URLs and we should visit them
|
419 |
+
if tool_selection["use_webpage_visit"] and web_info and "http" in web_info.lower():
|
420 |
+
# Extract URL from search results
|
421 |
+
url_match = re.search(r'Source: (https?://[^\s]+)', web_info)
|
422 |
+
if url_match:
|
423 |
+
url = url_match.group(1)
|
424 |
+
try:
|
425 |
+
content_result = self.web_content_tool.forward(url)
|
426 |
+
|
427 |
+
# Only use if result seems valid
|
428 |
+
if content_result and len(content_result) > 100:
|
429 |
+
webpage_content = content_result
|
430 |
+
print(f"Retrieved webpage content from {url}")
|
431 |
+
else:
|
432 |
+
print("Webpage content was too short or empty")
|
433 |
+
|
434 |
+
except Exception as e:
|
435 |
+
print(f"Error extracting webpage content: {e}")
|
436 |
+
|
437 |
+
# Step 3: Format prompt with gathered information
|
438 |
+
prompt = self.format_prompt(question, knowledge_info, web_info, webpage_content)
|
439 |
+
|
440 |
+
# Step 4: Query the LLM with the formatted prompt
|
441 |
+
answer = self.query_llm(prompt)
|
442 |
+
|
443 |
+
print(f"GaiaAgent returning answer (first 50 chars): {answer[:50]}...")
|
444 |
+
return answer
|
445 |
+
|
446 |
+
except Exception as e:
|
447 |
+
print(f"Error in GaiaAgent: {e}")
|
448 |
+
# Fallback to the rule-based method if anything goes wrong
|
449 |
+
fallback_answer = self.rule_based_answer(question)
|
450 |
+
print(f"GaiaAgent returning fallback answer: {fallback_answer[:50]}...")
|
451 |
+
return fallback_answer
|
452 |
+
|
453 |
class BasicAgent:
|
454 |
def __init__(self):
|
455 |
print("BasicAgent initialized.")
|
456 |
# Initialize the Hugging Face API client
|
457 |
+
# https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct
|
458 |
self.hf_api_url = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
|
459 |
self.hf_api_token = os.getenv("HF_API_TOKEN")
|
460 |
if not self.hf_api_token:
|
|
|
588 |
|
589 |
api_url = DEFAULT_API_URL
|
590 |
questions_url = f"{api_url}/questions"
|
591 |
+
submit_url = f"{api_url}/submit" # 1. Instantiate Agent ( modify this part to create your agent)
|
|
|
|
|
592 |
try:
|
593 |
+
print("Initializing GaiaAgent...")
|
594 |
+
agent = GaiaAgent()
|
595 |
+
|
596 |
+
# Initialize the Hugging Face model
|
597 |
+
model = InferenceClientModel()
|
598 |
+
|
599 |
+
# Initialize the web search tool
|
600 |
+
#search_tool = DuckDuckGoSearchTool()
|
601 |
+
|
602 |
+
# Initialize the weather tool
|
603 |
+
#weather_info_tool = WeatherInfoTool()
|
604 |
+
|
605 |
+
# Initialize the Hub stats tool
|
606 |
+
#hub_stats_tool = HubStatsTool()
|
607 |
+
|
608 |
+
# Load the guest dataset and initialize the guest info tool
|
609 |
+
guest_info_tool = load_guest_dataset()
|
610 |
+
|
611 |
+
# Initialize the Hugging Face model
|
612 |
+
model = InferenceClientModel()
|
613 |
+
|
614 |
+
# Load the DuckDuckGo search tool dynamically
|
615 |
+
search_tool = load_tool(repo_id="smol-ai/duckduckgo-search", trust_remote_code=True)
|
616 |
+
|
617 |
+
|
618 |
+
agent = CodeAgent(
|
619 |
+
tools=[guest_info_tool, search_tool],
|
620 |
+
model=model,
|
621 |
+
add_base_tools=True, # Add any additional base tools
|
622 |
+
planning_interval=3 # Enable planning every 3 steps
|
623 |
+
)
|
624 |
+
|
625 |
+
print("GaiaAgent initialization complete.")
|
626 |
except Exception as e:
|
627 |
+
print(f"Error instantiating GaiaAgent: {e}")
|
628 |
+
print("Falling back to BasicAgent...")
|
629 |
+
try:
|
630 |
+
agent = BasicAgent()
|
631 |
+
print("BasicAgent initialization complete.")
|
632 |
+
except Exception as e:
|
633 |
+
print(f"Error instantiating BasicAgent: {e}")
|
634 |
+
return f"Error initializing agents: {e}", None
|
635 |
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
636 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
637 |
print(agent_code)
|
requirements.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
1 |
gradio
|
2 |
requests
|
3 |
-
pandas
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
gradio
|
2 |
requests
|
3 |
+
pandas
|
4 |
+
langchain
|
5 |
+
langchain-community
|
6 |
+
smolagents
|
7 |
+
gradio[oauth]
|
8 |
+
beautifulsoup4
|
9 |
+
duckduckgo-search
|