mchinea commited on
Commit
afadec7
·
1 Parent(s): fd14ef2

update tools and model

Browse files
Files changed (3) hide show
  1. agent_smolagent.py +1 -25
  2. tools.py +1 -135
  3. tools_smolagent.py +2 -142
agent_smolagent.py CHANGED
@@ -49,7 +49,7 @@ def get_system_prompt(prompt_file: Path = None):
49
 
50
  def build_agent():
51
  model_desp = OpenAIServerModel(
52
- model_id="gpt-4o-mini",
53
  api_base="https://api.openai.com/v1",
54
  api_key=os.environ["OPENAI_API_KEY"],
55
  )
@@ -80,11 +80,6 @@ def build_agent():
80
  planning_interval=10,
81
  name="Manager",
82
  description="The manager of the team, responsible for overseeing and guiding the team's work.",
83
- #final_answer_checks=[
84
- # check_reasoning,
85
- # ensure_formatting,
86
- #],
87
- #prompt_templates=get_system_prompt()
88
  )
89
 
90
  class MyGAIAAgent:
@@ -93,20 +88,6 @@ class MyGAIAAgent:
93
  self.agent = build_agent()
94
  self.verbose = verbose
95
  def __call__(self, task: dict) -> str:
96
-
97
- '''
98
- # Wrap the question in a HumanMessage from langchain_core
99
-
100
- messages = [HumanMessage(content=question)]
101
- messages = self.graph.invoke({"messages": messages})
102
- answer = messages['messages'][-1].content
103
-
104
- user_input = {"messages": [("user", question)]}
105
- answer1 = self.graph.invoke(user_input)["messages"][-1].content
106
- print (answer)
107
- #print (self._clean_answer(answer))
108
- return self._clean_answer(answer)
109
- '''
110
  question = task["question"]
111
  task_id = task["task_id"]
112
  file_name = task.get("file_name")
@@ -198,11 +179,6 @@ if __name__ == "__main__":
198
  question16 = "What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer."
199
  question17 = "Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters."
200
  question18 = "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?"
201
- '''
202
-
203
- question4 = "How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?"
204
- question6 = "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?"
205
- '''
206
  task = {
207
  "task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
208
  "question": question1,
 
49
 
50
  def build_agent():
51
  model_desp = OpenAIServerModel(
52
+ model_id="gpt-4o",
53
  api_base="https://api.openai.com/v1",
54
  api_key=os.environ["OPENAI_API_KEY"],
55
  )
 
80
  planning_interval=10,
81
  name="Manager",
82
  description="The manager of the team, responsible for overseeing and guiding the team's work.",
 
 
 
 
 
83
  )
84
 
85
  class MyGAIAAgent:
 
88
  self.agent = build_agent()
89
  self.verbose = verbose
90
  def __call__(self, task: dict) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  question = task["question"]
92
  task_id = task["task_id"]
93
  file_name = task.get("file_name")
 
179
  question16 = "What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer."
180
  question17 = "Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters."
181
  question18 = "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?"
 
 
 
 
 
182
  task = {
183
  "task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
184
  "question": question1,
tools.py CHANGED
@@ -1,4 +1,3 @@
1
- '''
2
  import os
3
  import random
4
  import requests
@@ -267,133 +266,6 @@ def read_python_file(file_path: str) -> str:
267
  raise type(err)(f"Could not parse python file > {err}")
268
 
269
 
270
- @tool
271
- def save_and_read_file(content: str, filename: str = None) -> str:
272
- """
273
- Save content to a temporary file and return the path.
274
- Useful for processing files from the GAIA API.
275
-
276
- Args:
277
- content: The content to save to the file
278
- filename: Optional filename, will generate a random name if not provided
279
-
280
- Returns:
281
- Path to the saved file
282
- """
283
- temp_dir = tempfile.gettempdir()
284
- if filename is None:
285
- temp_file = tempfile.NamedTemporaryFile(delete=False)
286
- filepath = temp_file.name
287
- else:
288
- filepath = os.path.join(temp_dir, filename)
289
-
290
- # Write content to the file
291
- with open(filepath, 'w') as f:
292
- f.write(content)
293
-
294
- return f"File saved to {filepath}. You can read this file to process its contents."
295
-
296
-
297
-
298
- def download_file_from_url(url: str, filename: str) -> str:
299
- """
300
- Download a file from a URL and save it to a temporary location.
301
- Args:
302
- url: The URL to download from
303
- filename: filename
304
- Returns:
305
- Path to the downloaded file
306
- """
307
- try:
308
- # Parse URL to get filename if not provided
309
- if not filename:
310
- path = urlparse(url).path
311
- filename = os.path.basename(path)
312
- if not filename:
313
- # Generate a random name if we couldn't extract one
314
- import uuid
315
-
316
- filename = f"downloaded_{uuid.uuid4().hex[:8]}"
317
-
318
- # Create temporary file
319
- temp_dir = tempfile.gettempdir()
320
- filepath = os.path.join(temp_dir, filename)
321
-
322
- # Download the file
323
- response = requests.get(url, stream=True)
324
- response.raise_for_status()
325
-
326
- # Save the file
327
- with open(filepath, "wb") as f:
328
- for chunk in response.iter_content(chunk_size=8192):
329
- f.write(chunk)
330
-
331
- return f"File downloaded to {filepath}. You can now process this file."
332
- except Exception as e:
333
- return f"Error downloading file: {str(e)}"
334
-
335
-
336
- @tool
337
- def extract_text_from_image(image_path: str) -> str:
338
- """
339
- Extracts text from an image using pytesseract OCR.
340
-
341
- Args:
342
- image_path: Path to the image file.
343
-
344
- Returns:
345
- A string with the extracted text or an error message.
346
- """
347
- try:
348
- from PIL import Image
349
- import pytesseract
350
-
351
- # Load the image
352
- image = Image.open(image_path)
353
-
354
- # Perform OCR
355
- text = pytesseract.image_to_string(image)
356
-
357
- return f"Extracted text from image:\n\n{text.strip()}"
358
- except ImportError:
359
- return (
360
- "Error: pytesseract or PIL is not installed. "
361
- "Install them with 'pip install pytesseract pillow' and ensure Tesseract OCR is installed."
362
- )
363
- except FileNotFoundError:
364
- return f"Error: File not found at '{image_path}'."
365
- except Exception as e:
366
- return f"Unexpected error during OCR: {str(e)}"
367
-
368
-
369
- @tool
370
- def transcribe_audio(audio_path: str) -> str:
371
- """
372
- Transcribes speech from an audio file using Whisper (local).
373
-
374
- Args:
375
- audio_path: Path to the audio file (e.g., .mp3, .wav, .m4a).
376
-
377
- Returns:
378
- The transcribed text or an error message.
379
- """
380
- try:
381
- import whisper
382
-
383
- if not os.path.exists(audio_path):
384
- return f"Error: File not found at '{audio_path}'."
385
-
386
- model = whisper.load_model("base") # You can use "small", "medium", "large"
387
- result = model.transcribe(audio_path)
388
-
389
- return result["text"].strip()
390
- except ImportError:
391
- return (
392
- "Error: 'whisper' library is not installed. "
393
- "Install it using 'pip install openai-whisper'."
394
- )
395
- except Exception as e:
396
- return f"Error during transcription: {str(e)}"
397
 
398
  level1_tools = [
399
  multiply,
@@ -407,11 +279,5 @@ level1_tools = [
407
  arvix_search,
408
  convert_units,
409
  query_table_data,
410
- #download_file_from_url,
411
- #save_and_read_file,
412
- #read_python_file,
413
- #extract_text_from_image,
414
- #transcribe_audio
415
  ]
416
-
417
- '''
 
 
1
  import os
2
  import random
3
  import requests
 
266
  raise type(err)(f"Could not parse python file > {err}")
267
 
268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
  level1_tools = [
271
  multiply,
 
279
  arvix_search,
280
  convert_units,
281
  query_table_data,
282
+ read_python_file,
 
 
 
 
283
  ]
 
 
tools_smolagent.py CHANGED
@@ -20,129 +20,7 @@ from PIL import Image
20
  #import imageio
21
 
22
  load_dotenv(override=True)
23
- '''
24
- @tool
25
- def use_vision_model(question: str, images: List[Image.Image]) -> str:
26
- """
27
- Use a Vision Model to answer a question about a set of images.
28
- Always use this tool to ask questions about a set of images you have been provided.
29
- This function uses an image-to-text AI model.
30
- You can ask a question about a list of one image or a list of multiple images.
31
- So, if you have multiple images that you want to ask the same question of, pass the entire list of images to the model.
32
- Ensure your prompt is specific enough to retrieve the exact information you are looking for.
33
-
34
- Args:
35
- question: The question to ask about the images. Type: str
36
- images: The list of images to as the question about. Type: List[PIL.Image.Image]
37
- """
38
- image_model = create_vision_model()
39
-
40
- content = [
41
- {
42
- "type": "text",
43
- "text": question
44
- }
45
- ]
46
- print(f"Asking model a question about {len(images)} images")
47
- for image in images:
48
- content.append({
49
- "type": "image",
50
- "image": image # ✅ Directly the PIL Image, no wrapping
51
- })
52
-
53
- messages = [
54
- {
55
- "role": "user",
56
- "content": content
57
- }
58
- ]
59
-
60
- output = image_model(messages).content
61
- print(f'Model returned: {output}')
62
- return output
63
-
64
-
65
- @tool
66
- def youtube_frames_to_images(url: str, sample_interval_frames: int = 24) -> List[Image.Image]:
67
- """
68
- Reviews a YouTube video and returns a List of PIL Images (List[PIL.Image.Image]), which can then be reviewed by a vision model.
69
- Only use this tool if you have been given a YouTube video that you need to analyze.
70
- This will generate a list of images, and you can use the use_vision_model tool to analyze those images
71
- Args:
72
- url: The Youtube URL
73
- sample_interval_frames: The sampling interval (default is 24 frames)
74
- """
75
- with tempfile.TemporaryDirectory() as tmpdir:
76
- # Download the video locally
77
- ydl_opts = {
78
- 'format': 'bestvideo[height<=1080]+bestaudio/best[height<=1080]/best',
79
- 'outtmpl': os.path.join(tmpdir, 'video.%(ext)s'),
80
- 'quiet': True,
81
- 'noplaylist': True,
82
- 'merge_output_format': 'mp4',
83
- 'force_ipv4': True, # Avoid IPv6 issues
84
- }
85
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
86
- info = ydl.extract_info(url, download=True)
87
-
88
- # Find the downloaded file
89
- video_path = None
90
- for file in os.listdir(tmpdir):
91
- if file.endswith('.mp4'):
92
- video_path = os.path.join(tmpdir, file)
93
- break
94
-
95
- if not video_path:
96
- raise RuntimeError("Failed to download video as mp4")
97
-
98
- # ✅ Fix: Use `imageio.get_reader()` instead of `imopen()`
99
- reader = imageio.get_reader(video_path) # Works for frame-by-frame iteration
100
- # metadata = reader.get_meta_data()
101
- # fps = metadata.get('fps')
102
-
103
- # if fps is None:
104
- # reader.close()
105
- # raise RuntimeError("Unable to determine FPS from video metadata")
106
-
107
- # frame_interval = int(fps * sample_interval_frames)
108
- frame_interval = sample_interval_frames # Use the provided interval directly
109
- images: List[Image.Image] = []
110
 
111
- # ✅ Iterate over frames using `get_reader()`
112
- for idx, frame in enumerate(reader):
113
- print(f"Processing frame {idx}")
114
- if idx % frame_interval == 0:
115
- images.append(Image.fromarray(frame))
116
-
117
- reader.close()
118
- return images
119
-
120
- @tool
121
- def review_youtube_video(url: str, question: str) -> str:
122
- """
123
- Reviews a YouTube video and answers a specific question about that video.
124
- Args:
125
- url (str): the URL to the YouTube video. Should be like this format: https://www.youtube.com/watch?v=9hE5-98ZeCg
126
- question (str): The question you are asking about the video
127
- """
128
- try:
129
- client = genai.Client(api_key=os.getenv('GEMINI_KEY'))
130
- model = 'gemini-2.0-flash-lite'
131
- response = client.models.generate_content(
132
- model=model,
133
- contents=types.Content(
134
- parts=[
135
- types.Part(
136
- file_data=types.FileData(file_uri=url)
137
- ),
138
- types.Part(text=question)
139
- ]
140
- )
141
- )
142
- return response.text
143
- except Exception as e:
144
- return f"Error asking {model} about video: {str(e)}"
145
- '''
146
 
147
  @tool
148
  def read_file(filepath: str ) -> str:
@@ -155,9 +33,6 @@ def read_file(filepath: str ) -> str:
155
  filepath (str): The path to the file to be read.
156
  Returns:
157
  str: Content of the file as a string.
158
-
159
- Raises:
160
- IOError: If there is an error opening or reading from the file.
161
  """
162
  try:
163
  with open(filepath, 'r', encoding='utf-8') as file:
@@ -194,7 +69,7 @@ def extract_text_from_image(image_path: str) -> str:
194
 
195
  return f"Extracted text from image:\n\n{text}"
196
  except ImportError:
197
- return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
198
  except Exception as e:
199
  return f"Error extracting text from image: {str(e)}"
200
 
@@ -443,19 +318,4 @@ def convert_units(value: float, from_unit: str, to_unit: str) -> float:
443
  if key not in conversions:
444
  raise ValueError(f"Conversion from {from_unit} to {to_unit} not supported.")
445
 
446
- return conversions[key](value)
447
-
448
- level1_tools = [
449
- multiply,
450
- add,
451
- subtract,
452
- divide,
453
- modulus,
454
- read_file,
455
- extract_text_from_image,
456
- analyze_csv_file,
457
- analyze_excel_file,
458
- youtube_transcribe,
459
- transcribe_audio,
460
- wikipedia_search
461
- ]
 
20
  #import imageio
21
 
22
  load_dotenv(override=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  @tool
26
  def read_file(filepath: str ) -> str:
 
33
  filepath (str): The path to the file to be read.
34
  Returns:
35
  str: Content of the file as a string.
 
 
 
36
  """
37
  try:
38
  with open(filepath, 'r', encoding='utf-8') as file:
 
69
 
70
  return f"Extracted text from image:\n\n{text}"
71
  except ImportError:
72
+ return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract'"
73
  except Exception as e:
74
  return f"Error extracting text from image: {str(e)}"
75
 
 
318
  if key not in conversions:
319
  raise ValueError(f"Conversion from {from_unit} to {to_unit} not supported.")
320
 
321
+ return conversions[key](value)