Final_Assignment_Template / tools_smolagent.py
mchinea
update tools and model
afadec7
import tempfile
import requests
import os
#from time import sleep
from dotenv import load_dotenv
#from urllib.parse import urlparse
from typing import Optional, List
import yt_dlp
import wikipedia
from smolagents import tool
#from google.genai import types
from PIL import Image
#from google import genai
#from dotenv import load_dotenv
#from model_provider import create_react_model, create_vision_model
#import imageio
load_dotenv(override=True)
@tool
def read_file(filepath: str ) -> str:
"""
Used to read the content of a file. Returns the content as a string.
Will only work for text-based files, such as .txt files or code files.
Do not use for audio or visual files.
Args:
filepath (str): The path to the file to be read.
Returns:
str: Content of the file as a string.
"""
try:
with open(filepath, 'r', encoding='utf-8') as file:
content = file.read()
print(content)
return content
except FileNotFoundError:
print(f"File not found: {filepath}")
except IOError as e:
print(f"Error reading file: {str(e)}")
@tool
def extract_text_from_image(image_path: str) -> str:
"""
Extract text from an image using pytesseract (if available).
Args:
image_path: Path to the image file
Returns:
Extracted text or error message
"""
try:
# Try to import pytesseract
import pytesseract
from PIL import Image
# Open the image
image = Image.open(image_path)
# Extract text
text = pytesseract.image_to_string(image)
return f"Extracted text from image:\n\n{text}"
except ImportError:
return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract'"
except Exception as e:
return f"Error extracting text from image: {str(e)}"
@tool
def analyze_csv_file(file_path: str, query: str) -> str:
"""
Analyze a CSV file using pandas and answer a question about it.
To use this file you need to have saved it in a location and pass that location to the function.
The download_file_from_url tool will save it by name to tempfile.gettempdir()
Args:
file_path: Path to the CSV file
query: Question about the data
Returns:
Analysis result or error message
"""
try:
import pandas as pd
# Read the CSV file
df = pd.read_csv(file_path)
# Run various analyses based on the query
result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
result += f"Columns: {', '.join(df.columns)}\n\n"
# Add summary statistics
result += "Summary statistics:\n"
result += str(df.describe())
return result
except ImportError:
return "Error: pandas is not installed. Please install it with 'pip install pandas'."
except Exception as e:
return f"Error analyzing CSV file: {str(e)}"
@tool
def analyze_excel_file(file_path: str, query: str) -> str:
"""
Analyze an Excel file using pandas and answer a question about it.
To use this file you need to have saved it in a location and pass that location to the function.
The download_file_from_url tool will save it by name to tempfile.gettempdir()
Args:
file_path: Path to the Excel file
query: Question about the data
Returns:
Analysis result or error message
"""
try:
import pandas as pd
# Read the Excel file
df = pd.read_excel(file_path)
# Run various analyses based on the query
result = f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
result += f"Columns: {', '.join(df.columns)}\n\n"
# Add summary statistics
result += "Summary statistics:\n"
result += str(df.describe())
return result
except ImportError:
return "Error: pandas and openpyxl are not installed. Please install them with 'pip install pandas openpyxl'."
except Exception as e:
return f"Error analyzing Excel file: {str(e)}"
import whisper
@tool
def youtube_transcribe(url: str) -> str:
"""
Transcribes a YouTube video. Use when you need to process the audio from a YouTube video into Text.
Args:
url: Url of the YouTube video
"""
model_size: str = "base"
# Load model
model = whisper.load_model(model_size)
with tempfile.TemporaryDirectory() as tmpdir:
# Download audio
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': os.path.join(tmpdir, 'audio.%(ext)s'),
'quiet': True,
'noplaylist': True,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
'preferredquality': '192',
}],
'force_ipv4': True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
audio_path = next((os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.endswith('.wav')), None)
if not audio_path:
raise RuntimeError("Failed to find audio")
# Transcribe
result = model.transcribe(audio_path)
return result['text']
@tool
def transcribe_audio(audio_file_path: str) -> str:
"""
Transcribes an audio file. Use when you need to process audio data.
DO NOT use this tool for YouTube video; use the youtube_transcribe tool to process audio data from YouTube.
Use this tool when you have an audio file in .mp3, .wav, .aac, .ogg, .flac, .m4a, .alac or .wma
Args:
audio_file_path: Filepath to the audio file (str)
"""
model_size: str = "small"
# Load model
model = whisper.load_model(model_size)
result = model.transcribe(audio_file_path)
return result['text']
@tool
def wikipedia_search(query: str) -> dict:
"""
Search Wikipedia for a given query and return the first 10 results with summaries.
Args:
query: The search term or topic.
Returns:
A dictionary with a 'wiki_results' key containing formatted Wikipedia summaries.
"""
wikipedia.set_lang("en")
try:
results = wikipedia.search(query, results=10)
summaries = []
for title in results:
try:
summary = wikipedia.summary(title, sentences=2)
summaries.append(f"## {title}\n{summary}")
except wikipedia.exceptions.DisambiguationError as e:
summaries.append(f"## {title}\nDisambiguation required. Example options: {e.options[:3]}")
except wikipedia.exceptions.PageError:
summaries.append(f"## {title}\nPage not found.")
formatted = "\n\n---\n\n".join(summaries)
return {"wiki_results": formatted}
except Exception as e:
return {"wiki_results": f"Error during Wikipedia search: {str(e)}"}
#Mathematical tools
@tool
def multiply(a: float, b: float) -> float:
"""Multiply two numbers.
Args:
a: first number
b: second number
Returns:
Multiplication result
"""
return a * b
@tool
def add(a: float, b: float) -> float:
"""Add two numbers.
Args:
a: first number
b: second number
Returns:
Addition result
"""
return a + b
@tool
def subtract(a: float, b: float) -> float:
"""Subtract two numbers.
Args:
a: first number
b: second number
Returns:
Subtraction result
"""
return a - b
@tool
def divide(a: float, b: float) -> float:
"""Divide two numbers.
Args:
a: first number
b: second number
Returns:
Division result
"""
if b == 0:
raise ValueError("Cannot divide by zero.")
return a / b
@tool
def modulus(a: int, b: int) -> int:
"""Get the modulus of two numbers.
Args:
a: first number
b: second number
Returns:
Modulus result
"""
return a % b
@tool
def convert_units(value: float, from_unit: str, to_unit: str) -> float:
"""
Converts a value from one unit to another.
Args:
value: The numerical value to convert.
from_unit: The original unit (e.g. 'miles', 'kg', 'celsius').
to_unit: The target unit (e.g. 'kilometers', 'lb', 'fahrenheit').
Supported conversions:
- miles <-> kilometers
- kilograms <-> pounds
- celsius <-> fahrenheit
Returns:
The converted value result.
"""
conversions = {
("miles", "kilometers"): lambda v: v * 1.60934,
("kilometers", "miles"): lambda v: v / 1.60934,
("kilograms", "pounds"): lambda v: v * 2.20462,
("pounds", "kilograms"): lambda v: v / 2.20462,
("celsius", "fahrenheit"): lambda v: (v * 9/5) + 32,
("fahrenheit", "celsius"): lambda v: (v - 32) * 5/9,
}
key = (from_unit.lower(), to_unit.lower())
if key not in conversions:
raise ValueError(f"Conversion from {from_unit} to {to_unit} not supported.")
return conversions[key](value)