Spaces:
Sleeping
Sleeping
mchinea
commited on
Commit
·
afadec7
1
Parent(s):
fd14ef2
update tools and model
Browse files- agent_smolagent.py +1 -25
- tools.py +1 -135
- tools_smolagent.py +2 -142
agent_smolagent.py
CHANGED
@@ -49,7 +49,7 @@ def get_system_prompt(prompt_file: Path = None):
|
|
49 |
|
50 |
def build_agent():
|
51 |
model_desp = OpenAIServerModel(
|
52 |
-
model_id="gpt-4o
|
53 |
api_base="https://api.openai.com/v1",
|
54 |
api_key=os.environ["OPENAI_API_KEY"],
|
55 |
)
|
@@ -80,11 +80,6 @@ def build_agent():
|
|
80 |
planning_interval=10,
|
81 |
name="Manager",
|
82 |
description="The manager of the team, responsible for overseeing and guiding the team's work.",
|
83 |
-
#final_answer_checks=[
|
84 |
-
# check_reasoning,
|
85 |
-
# ensure_formatting,
|
86 |
-
#],
|
87 |
-
#prompt_templates=get_system_prompt()
|
88 |
)
|
89 |
|
90 |
class MyGAIAAgent:
|
@@ -93,20 +88,6 @@ class MyGAIAAgent:
|
|
93 |
self.agent = build_agent()
|
94 |
self.verbose = verbose
|
95 |
def __call__(self, task: dict) -> str:
|
96 |
-
|
97 |
-
'''
|
98 |
-
# Wrap the question in a HumanMessage from langchain_core
|
99 |
-
|
100 |
-
messages = [HumanMessage(content=question)]
|
101 |
-
messages = self.graph.invoke({"messages": messages})
|
102 |
-
answer = messages['messages'][-1].content
|
103 |
-
|
104 |
-
user_input = {"messages": [("user", question)]}
|
105 |
-
answer1 = self.graph.invoke(user_input)["messages"][-1].content
|
106 |
-
print (answer)
|
107 |
-
#print (self._clean_answer(answer))
|
108 |
-
return self._clean_answer(answer)
|
109 |
-
'''
|
110 |
question = task["question"]
|
111 |
task_id = task["task_id"]
|
112 |
file_name = task.get("file_name")
|
@@ -198,11 +179,6 @@ if __name__ == "__main__":
|
|
198 |
question16 = "What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer."
|
199 |
question17 = "Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters."
|
200 |
question18 = "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?"
|
201 |
-
'''
|
202 |
-
|
203 |
-
question4 = "How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?"
|
204 |
-
question6 = "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?"
|
205 |
-
'''
|
206 |
task = {
|
207 |
"task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
208 |
"question": question1,
|
|
|
49 |
|
50 |
def build_agent():
|
51 |
model_desp = OpenAIServerModel(
|
52 |
+
model_id="gpt-4o",
|
53 |
api_base="https://api.openai.com/v1",
|
54 |
api_key=os.environ["OPENAI_API_KEY"],
|
55 |
)
|
|
|
80 |
planning_interval=10,
|
81 |
name="Manager",
|
82 |
description="The manager of the team, responsible for overseeing and guiding the team's work.",
|
|
|
|
|
|
|
|
|
|
|
83 |
)
|
84 |
|
85 |
class MyGAIAAgent:
|
|
|
88 |
self.agent = build_agent()
|
89 |
self.verbose = verbose
|
90 |
def __call__(self, task: dict) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
question = task["question"]
|
92 |
task_id = task["task_id"]
|
93 |
file_name = task.get("file_name")
|
|
|
179 |
question16 = "What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer."
|
180 |
question17 = "Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters."
|
181 |
question18 = "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?"
|
|
|
|
|
|
|
|
|
|
|
182 |
task = {
|
183 |
"task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
184 |
"question": question1,
|
tools.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
'''
|
2 |
import os
|
3 |
import random
|
4 |
import requests
|
@@ -267,133 +266,6 @@ def read_python_file(file_path: str) -> str:
|
|
267 |
raise type(err)(f"Could not parse python file > {err}")
|
268 |
|
269 |
|
270 |
-
@tool
|
271 |
-
def save_and_read_file(content: str, filename: str = None) -> str:
|
272 |
-
"""
|
273 |
-
Save content to a temporary file and return the path.
|
274 |
-
Useful for processing files from the GAIA API.
|
275 |
-
|
276 |
-
Args:
|
277 |
-
content: The content to save to the file
|
278 |
-
filename: Optional filename, will generate a random name if not provided
|
279 |
-
|
280 |
-
Returns:
|
281 |
-
Path to the saved file
|
282 |
-
"""
|
283 |
-
temp_dir = tempfile.gettempdir()
|
284 |
-
if filename is None:
|
285 |
-
temp_file = tempfile.NamedTemporaryFile(delete=False)
|
286 |
-
filepath = temp_file.name
|
287 |
-
else:
|
288 |
-
filepath = os.path.join(temp_dir, filename)
|
289 |
-
|
290 |
-
# Write content to the file
|
291 |
-
with open(filepath, 'w') as f:
|
292 |
-
f.write(content)
|
293 |
-
|
294 |
-
return f"File saved to {filepath}. You can read this file to process its contents."
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
def download_file_from_url(url: str, filename: str) -> str:
|
299 |
-
"""
|
300 |
-
Download a file from a URL and save it to a temporary location.
|
301 |
-
Args:
|
302 |
-
url: The URL to download from
|
303 |
-
filename: filename
|
304 |
-
Returns:
|
305 |
-
Path to the downloaded file
|
306 |
-
"""
|
307 |
-
try:
|
308 |
-
# Parse URL to get filename if not provided
|
309 |
-
if not filename:
|
310 |
-
path = urlparse(url).path
|
311 |
-
filename = os.path.basename(path)
|
312 |
-
if not filename:
|
313 |
-
# Generate a random name if we couldn't extract one
|
314 |
-
import uuid
|
315 |
-
|
316 |
-
filename = f"downloaded_{uuid.uuid4().hex[:8]}"
|
317 |
-
|
318 |
-
# Create temporary file
|
319 |
-
temp_dir = tempfile.gettempdir()
|
320 |
-
filepath = os.path.join(temp_dir, filename)
|
321 |
-
|
322 |
-
# Download the file
|
323 |
-
response = requests.get(url, stream=True)
|
324 |
-
response.raise_for_status()
|
325 |
-
|
326 |
-
# Save the file
|
327 |
-
with open(filepath, "wb") as f:
|
328 |
-
for chunk in response.iter_content(chunk_size=8192):
|
329 |
-
f.write(chunk)
|
330 |
-
|
331 |
-
return f"File downloaded to {filepath}. You can now process this file."
|
332 |
-
except Exception as e:
|
333 |
-
return f"Error downloading file: {str(e)}"
|
334 |
-
|
335 |
-
|
336 |
-
@tool
|
337 |
-
def extract_text_from_image(image_path: str) -> str:
|
338 |
-
"""
|
339 |
-
Extracts text from an image using pytesseract OCR.
|
340 |
-
|
341 |
-
Args:
|
342 |
-
image_path: Path to the image file.
|
343 |
-
|
344 |
-
Returns:
|
345 |
-
A string with the extracted text or an error message.
|
346 |
-
"""
|
347 |
-
try:
|
348 |
-
from PIL import Image
|
349 |
-
import pytesseract
|
350 |
-
|
351 |
-
# Load the image
|
352 |
-
image = Image.open(image_path)
|
353 |
-
|
354 |
-
# Perform OCR
|
355 |
-
text = pytesseract.image_to_string(image)
|
356 |
-
|
357 |
-
return f"Extracted text from image:\n\n{text.strip()}"
|
358 |
-
except ImportError:
|
359 |
-
return (
|
360 |
-
"Error: pytesseract or PIL is not installed. "
|
361 |
-
"Install them with 'pip install pytesseract pillow' and ensure Tesseract OCR is installed."
|
362 |
-
)
|
363 |
-
except FileNotFoundError:
|
364 |
-
return f"Error: File not found at '{image_path}'."
|
365 |
-
except Exception as e:
|
366 |
-
return f"Unexpected error during OCR: {str(e)}"
|
367 |
-
|
368 |
-
|
369 |
-
@tool
|
370 |
-
def transcribe_audio(audio_path: str) -> str:
|
371 |
-
"""
|
372 |
-
Transcribes speech from an audio file using Whisper (local).
|
373 |
-
|
374 |
-
Args:
|
375 |
-
audio_path: Path to the audio file (e.g., .mp3, .wav, .m4a).
|
376 |
-
|
377 |
-
Returns:
|
378 |
-
The transcribed text or an error message.
|
379 |
-
"""
|
380 |
-
try:
|
381 |
-
import whisper
|
382 |
-
|
383 |
-
if not os.path.exists(audio_path):
|
384 |
-
return f"Error: File not found at '{audio_path}'."
|
385 |
-
|
386 |
-
model = whisper.load_model("base") # You can use "small", "medium", "large"
|
387 |
-
result = model.transcribe(audio_path)
|
388 |
-
|
389 |
-
return result["text"].strip()
|
390 |
-
except ImportError:
|
391 |
-
return (
|
392 |
-
"Error: 'whisper' library is not installed. "
|
393 |
-
"Install it using 'pip install openai-whisper'."
|
394 |
-
)
|
395 |
-
except Exception as e:
|
396 |
-
return f"Error during transcription: {str(e)}"
|
397 |
|
398 |
level1_tools = [
|
399 |
multiply,
|
@@ -407,11 +279,5 @@ level1_tools = [
|
|
407 |
arvix_search,
|
408 |
convert_units,
|
409 |
query_table_data,
|
410 |
-
|
411 |
-
#save_and_read_file,
|
412 |
-
#read_python_file,
|
413 |
-
#extract_text_from_image,
|
414 |
-
#transcribe_audio
|
415 |
]
|
416 |
-
|
417 |
-
'''
|
|
|
|
|
1 |
import os
|
2 |
import random
|
3 |
import requests
|
|
|
266 |
raise type(err)(f"Could not parse python file > {err}")
|
267 |
|
268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
level1_tools = [
|
271 |
multiply,
|
|
|
279 |
arvix_search,
|
280 |
convert_units,
|
281 |
query_table_data,
|
282 |
+
read_python_file,
|
|
|
|
|
|
|
|
|
283 |
]
|
|
|
|
tools_smolagent.py
CHANGED
@@ -20,129 +20,7 @@ from PIL import Image
|
|
20 |
#import imageio
|
21 |
|
22 |
load_dotenv(override=True)
|
23 |
-
'''
|
24 |
-
@tool
|
25 |
-
def use_vision_model(question: str, images: List[Image.Image]) -> str:
|
26 |
-
"""
|
27 |
-
Use a Vision Model to answer a question about a set of images.
|
28 |
-
Always use this tool to ask questions about a set of images you have been provided.
|
29 |
-
This function uses an image-to-text AI model.
|
30 |
-
You can ask a question about a list of one image or a list of multiple images.
|
31 |
-
So, if you have multiple images that you want to ask the same question of, pass the entire list of images to the model.
|
32 |
-
Ensure your prompt is specific enough to retrieve the exact information you are looking for.
|
33 |
-
|
34 |
-
Args:
|
35 |
-
question: The question to ask about the images. Type: str
|
36 |
-
images: The list of images to as the question about. Type: List[PIL.Image.Image]
|
37 |
-
"""
|
38 |
-
image_model = create_vision_model()
|
39 |
-
|
40 |
-
content = [
|
41 |
-
{
|
42 |
-
"type": "text",
|
43 |
-
"text": question
|
44 |
-
}
|
45 |
-
]
|
46 |
-
print(f"Asking model a question about {len(images)} images")
|
47 |
-
for image in images:
|
48 |
-
content.append({
|
49 |
-
"type": "image",
|
50 |
-
"image": image # ✅ Directly the PIL Image, no wrapping
|
51 |
-
})
|
52 |
-
|
53 |
-
messages = [
|
54 |
-
{
|
55 |
-
"role": "user",
|
56 |
-
"content": content
|
57 |
-
}
|
58 |
-
]
|
59 |
-
|
60 |
-
output = image_model(messages).content
|
61 |
-
print(f'Model returned: {output}')
|
62 |
-
return output
|
63 |
-
|
64 |
-
|
65 |
-
@tool
|
66 |
-
def youtube_frames_to_images(url: str, sample_interval_frames: int = 24) -> List[Image.Image]:
|
67 |
-
"""
|
68 |
-
Reviews a YouTube video and returns a List of PIL Images (List[PIL.Image.Image]), which can then be reviewed by a vision model.
|
69 |
-
Only use this tool if you have been given a YouTube video that you need to analyze.
|
70 |
-
This will generate a list of images, and you can use the use_vision_model tool to analyze those images
|
71 |
-
Args:
|
72 |
-
url: The Youtube URL
|
73 |
-
sample_interval_frames: The sampling interval (default is 24 frames)
|
74 |
-
"""
|
75 |
-
with tempfile.TemporaryDirectory() as tmpdir:
|
76 |
-
# Download the video locally
|
77 |
-
ydl_opts = {
|
78 |
-
'format': 'bestvideo[height<=1080]+bestaudio/best[height<=1080]/best',
|
79 |
-
'outtmpl': os.path.join(tmpdir, 'video.%(ext)s'),
|
80 |
-
'quiet': True,
|
81 |
-
'noplaylist': True,
|
82 |
-
'merge_output_format': 'mp4',
|
83 |
-
'force_ipv4': True, # Avoid IPv6 issues
|
84 |
-
}
|
85 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
86 |
-
info = ydl.extract_info(url, download=True)
|
87 |
-
|
88 |
-
# Find the downloaded file
|
89 |
-
video_path = None
|
90 |
-
for file in os.listdir(tmpdir):
|
91 |
-
if file.endswith('.mp4'):
|
92 |
-
video_path = os.path.join(tmpdir, file)
|
93 |
-
break
|
94 |
-
|
95 |
-
if not video_path:
|
96 |
-
raise RuntimeError("Failed to download video as mp4")
|
97 |
-
|
98 |
-
# ✅ Fix: Use `imageio.get_reader()` instead of `imopen()`
|
99 |
-
reader = imageio.get_reader(video_path) # Works for frame-by-frame iteration
|
100 |
-
# metadata = reader.get_meta_data()
|
101 |
-
# fps = metadata.get('fps')
|
102 |
-
|
103 |
-
# if fps is None:
|
104 |
-
# reader.close()
|
105 |
-
# raise RuntimeError("Unable to determine FPS from video metadata")
|
106 |
-
|
107 |
-
# frame_interval = int(fps * sample_interval_frames)
|
108 |
-
frame_interval = sample_interval_frames # Use the provided interval directly
|
109 |
-
images: List[Image.Image] = []
|
110 |
|
111 |
-
# ✅ Iterate over frames using `get_reader()`
|
112 |
-
for idx, frame in enumerate(reader):
|
113 |
-
print(f"Processing frame {idx}")
|
114 |
-
if idx % frame_interval == 0:
|
115 |
-
images.append(Image.fromarray(frame))
|
116 |
-
|
117 |
-
reader.close()
|
118 |
-
return images
|
119 |
-
|
120 |
-
@tool
|
121 |
-
def review_youtube_video(url: str, question: str) -> str:
|
122 |
-
"""
|
123 |
-
Reviews a YouTube video and answers a specific question about that video.
|
124 |
-
Args:
|
125 |
-
url (str): the URL to the YouTube video. Should be like this format: https://www.youtube.com/watch?v=9hE5-98ZeCg
|
126 |
-
question (str): The question you are asking about the video
|
127 |
-
"""
|
128 |
-
try:
|
129 |
-
client = genai.Client(api_key=os.getenv('GEMINI_KEY'))
|
130 |
-
model = 'gemini-2.0-flash-lite'
|
131 |
-
response = client.models.generate_content(
|
132 |
-
model=model,
|
133 |
-
contents=types.Content(
|
134 |
-
parts=[
|
135 |
-
types.Part(
|
136 |
-
file_data=types.FileData(file_uri=url)
|
137 |
-
),
|
138 |
-
types.Part(text=question)
|
139 |
-
]
|
140 |
-
)
|
141 |
-
)
|
142 |
-
return response.text
|
143 |
-
except Exception as e:
|
144 |
-
return f"Error asking {model} about video: {str(e)}"
|
145 |
-
'''
|
146 |
|
147 |
@tool
|
148 |
def read_file(filepath: str ) -> str:
|
@@ -155,9 +33,6 @@ def read_file(filepath: str ) -> str:
|
|
155 |
filepath (str): The path to the file to be read.
|
156 |
Returns:
|
157 |
str: Content of the file as a string.
|
158 |
-
|
159 |
-
Raises:
|
160 |
-
IOError: If there is an error opening or reading from the file.
|
161 |
"""
|
162 |
try:
|
163 |
with open(filepath, 'r', encoding='utf-8') as file:
|
@@ -194,7 +69,7 @@ def extract_text_from_image(image_path: str) -> str:
|
|
194 |
|
195 |
return f"Extracted text from image:\n\n{text}"
|
196 |
except ImportError:
|
197 |
-
return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract'
|
198 |
except Exception as e:
|
199 |
return f"Error extracting text from image: {str(e)}"
|
200 |
|
@@ -443,19 +318,4 @@ def convert_units(value: float, from_unit: str, to_unit: str) -> float:
|
|
443 |
if key not in conversions:
|
444 |
raise ValueError(f"Conversion from {from_unit} to {to_unit} not supported.")
|
445 |
|
446 |
-
return conversions[key](value)
|
447 |
-
|
448 |
-
level1_tools = [
|
449 |
-
multiply,
|
450 |
-
add,
|
451 |
-
subtract,
|
452 |
-
divide,
|
453 |
-
modulus,
|
454 |
-
read_file,
|
455 |
-
extract_text_from_image,
|
456 |
-
analyze_csv_file,
|
457 |
-
analyze_excel_file,
|
458 |
-
youtube_transcribe,
|
459 |
-
transcribe_audio,
|
460 |
-
wikipedia_search
|
461 |
-
]
|
|
|
20 |
#import imageio
|
21 |
|
22 |
load_dotenv(override=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
@tool
|
26 |
def read_file(filepath: str ) -> str:
|
|
|
33 |
filepath (str): The path to the file to be read.
|
34 |
Returns:
|
35 |
str: Content of the file as a string.
|
|
|
|
|
|
|
36 |
"""
|
37 |
try:
|
38 |
with open(filepath, 'r', encoding='utf-8') as file:
|
|
|
69 |
|
70 |
return f"Extracted text from image:\n\n{text}"
|
71 |
except ImportError:
|
72 |
+
return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract'"
|
73 |
except Exception as e:
|
74 |
return f"Error extracting text from image: {str(e)}"
|
75 |
|
|
|
318 |
if key not in conversions:
|
319 |
raise ValueError(f"Conversion from {from_unit} to {to_unit} not supported.")
|
320 |
|
321 |
+
return conversions[key](value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|