Final_Assignment_Template

Sleeping

Final_Assignment_Template / tools_smolagent.py

mchinea

update tools and model

afadec7 6 days ago

9.35 kB

	import tempfile
	import requests
	import os

	#from time import sleep
	from dotenv import load_dotenv
	#from urllib.parse import urlparse
	from typing import Optional, List
	import yt_dlp
	import wikipedia

	from smolagents import tool

	#from google.genai import types

	from PIL import Image
	#from google import genai
	#from dotenv import load_dotenv
	#from model_provider import create_react_model, create_vision_model
	#import imageio

	load_dotenv(override=True)


	@tool
	def read_file(filepath: str ) -> str:
	"""
	Used to read the content of a file. Returns the content as a string.
	Will only work for text-based files, such as .txt files or code files.
	Do not use for audio or visual files.

	Args:
	filepath (str): The path to the file to be read.
	Returns:
	str: Content of the file as a string.
	"""
	try:
	with open(filepath, 'r', encoding='utf-8') as file:
	content = file.read()
	print(content)
	return content
	except FileNotFoundError:
	print(f"File not found: {filepath}")
	except IOError as e:
	print(f"Error reading file: {str(e)}")


	@tool
	def extract_text_from_image(image_path: str) -> str:
	"""
	Extract text from an image using pytesseract (if available).

	Args:
	image_path: Path to the image file

	Returns:
	Extracted text or error message
	"""
	try:
	# Try to import pytesseract
	import pytesseract
	from PIL import Image

	# Open the image
	image = Image.open(image_path)

	# Extract text
	text = pytesseract.image_to_string(image)

	return f"Extracted text from image:\n\n{text}"
	except ImportError:
	return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract'"
	except Exception as e:
	return f"Error extracting text from image: {str(e)}"

	@tool
	def analyze_csv_file(file_path: str, query: str) -> str:
	"""
	Analyze a CSV file using pandas and answer a question about it.
	To use this file you need to have saved it in a location and pass that location to the function.
	The download_file_from_url tool will save it by name to tempfile.gettempdir()

	Args:
	file_path: Path to the CSV file
	query: Question about the data

	Returns:
	Analysis result or error message
	"""
	try:
	import pandas as pd

	# Read the CSV file
	df = pd.read_csv(file_path)

	# Run various analyses based on the query
	result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
	result += f"Columns: {', '.join(df.columns)}\n\n"

	# Add summary statistics
	result += "Summary statistics:\n"
	result += str(df.describe())

	return result
	except ImportError:
	return "Error: pandas is not installed. Please install it with 'pip install pandas'."
	except Exception as e:
	return f"Error analyzing CSV file: {str(e)}"

	@tool
	def analyze_excel_file(file_path: str, query: str) -> str:
	"""
	Analyze an Excel file using pandas and answer a question about it.
	To use this file you need to have saved it in a location and pass that location to the function.
	The download_file_from_url tool will save it by name to tempfile.gettempdir()

	Args:
	file_path: Path to the Excel file
	query: Question about the data

	Returns:
	Analysis result or error message
	"""
	try:
	import pandas as pd

	# Read the Excel file
	df = pd.read_excel(file_path)

	# Run various analyses based on the query
	result = f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
	result += f"Columns: {', '.join(df.columns)}\n\n"

	# Add summary statistics
	result += "Summary statistics:\n"
	result += str(df.describe())

	return result
	except ImportError:
	return "Error: pandas and openpyxl are not installed. Please install them with 'pip install pandas openpyxl'."
	except Exception as e:
	return f"Error analyzing Excel file: {str(e)}"

	import whisper

	@tool
	def youtube_transcribe(url: str) -> str:
	"""
	Transcribes a YouTube video. Use when you need to process the audio from a YouTube video into Text.
	Args:
	url: Url of the YouTube video
	"""
	model_size: str = "base"
	# Load model
	model = whisper.load_model(model_size)
	with tempfile.TemporaryDirectory() as tmpdir:
	# Download audio
	ydl_opts = {
	'format': 'bestaudio/best',
	'outtmpl': os.path.join(tmpdir, 'audio.%(ext)s'),
	'quiet': True,
	'noplaylist': True,
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'wav',
	'preferredquality': '192',
	}],
	'force_ipv4': True,
	}
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info = ydl.extract_info(url, download=True)

	audio_path = next((os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.endswith('.wav')), None)
	if not audio_path:
	raise RuntimeError("Failed to find audio")

	# Transcribe
	result = model.transcribe(audio_path)
	return result['text']

	@tool
	def transcribe_audio(audio_file_path: str) -> str:
	"""
	Transcribes an audio file. Use when you need to process audio data.
	DO NOT use this tool for YouTube video; use the youtube_transcribe tool to process audio data from YouTube.
	Use this tool when you have an audio file in .mp3, .wav, .aac, .ogg, .flac, .m4a, .alac or .wma
	Args:
	audio_file_path: Filepath to the audio file (str)
	"""
	model_size: str = "small"
	# Load model
	model = whisper.load_model(model_size)
	result = model.transcribe(audio_file_path)
	return result['text']


	@tool
	def wikipedia_search(query: str) -> dict:
	"""
	Search Wikipedia for a given query and return the first 10 results with summaries.

	Args:
	query: The search term or topic.
	Returns:
	A dictionary with a 'wiki_results' key containing formatted Wikipedia summaries.
	"""
	wikipedia.set_lang("en")
	try:
	results = wikipedia.search(query, results=10)
	summaries = []
	for title in results:
	try:
	summary = wikipedia.summary(title, sentences=2)
	summaries.append(f"## {title}\n{summary}")
	except wikipedia.exceptions.DisambiguationError as e:
	summaries.append(f"## {title}\nDisambiguation required. Example options: {e.options[:3]}")
	except wikipedia.exceptions.PageError:
	summaries.append(f"## {title}\nPage not found.")

	formatted = "\n\n---\n\n".join(summaries)
	return {"wiki_results": formatted}

	except Exception as e:
	return {"wiki_results": f"Error during Wikipedia search: {str(e)}"}


	#Mathematical tools
	@tool
	def multiply(a: float, b: float) -> float:
	"""Multiply two numbers.
	Args:
	a: first number
	b: second number
	Returns:
	Multiplication result
	"""
	return a * b


	@tool
	def add(a: float, b: float) -> float:
	"""Add two numbers.
	Args:
	a: first number
	b: second number
	Returns:
	Addition result
	"""
	return a + b


	@tool
	def subtract(a: float, b: float) -> float:
	"""Subtract two numbers.
	Args:
	a: first number
	b: second number
	Returns:
	Subtraction result
	"""
	return a - b


	@tool
	def divide(a: float, b: float) -> float:
	"""Divide two numbers.
	Args:
	a: first number
	b: second number
	Returns:
	Division result
	"""
	if b == 0:
	raise ValueError("Cannot divide by zero.")
	return a / b


	@tool
	def modulus(a: int, b: int) -> int:
	"""Get the modulus of two numbers.
	Args:
	a: first number
	b: second number
	Returns:
	Modulus result
	"""
	return a % b


	@tool
	def convert_units(value: float, from_unit: str, to_unit: str) -> float:
	"""
	Converts a value from one unit to another.

	Args:
	value: The numerical value to convert.
	from_unit: The original unit (e.g. 'miles', 'kg', 'celsius').
	to_unit: The target unit (e.g. 'kilometers', 'lb', 'fahrenheit').

	Supported conversions:
	- miles <-> kilometers
	- kilograms <-> pounds
	- celsius <-> fahrenheit

	Returns:
	The converted value result.
	"""
	conversions = {
	("miles", "kilometers"): lambda v: v * 1.60934,
	("kilometers", "miles"): lambda v: v / 1.60934,
	("kilograms", "pounds"): lambda v: v * 2.20462,
	("pounds", "kilograms"): lambda v: v / 2.20462,
	("celsius", "fahrenheit"): lambda v: (v * 9/5) + 32,
	("fahrenheit", "celsius"): lambda v: (v - 32) * 5/9,
	}

	key = (from_unit.lower(), to_unit.lower())
	if key not in conversions:
	raise ValueError(f"Conversion from {from_unit} to {to_unit} not supported.")

	return conversions[key](value)