# Web search and content tools import requests import re from bs4 import BeautifulSoup from duckduckgo_search import DDGS from smolagents import Tool class WebSearchTool(Tool): name = "web_search" description = "Search the web for information about a query using DuckDuckGo." inputs = { "query": { "type": "string", "description": "The search query." } } output_type = "string" def __init__(self, **kwargs): super().__init__(**kwargs) self.max_results = 3 def forward(self, query: str) -> str: assert isinstance(query, str), "Query must be a string." try: results = [] with DDGS() as ddgs: ddgs_results = list(ddgs.text(query, max_results=self.max_results)) if not ddgs_results: return "No web search results found." formatted_results = "\nWeb Search Results:\n" for i, r in enumerate(ddgs_results, 1): formatted_results += f"\n{i}. {r['title']}\n {r['body']}\n Source: {r['href']}\n" return formatted_results except Exception as e: print(f"Error in web search: {str(e)}") return f"Error performing web search: {str(e)}" class WebContentTool(Tool): name = "web_content" description = "Fetch and extract content from a specific webpage." inputs = { "url": { "type": "string", "description": "The URL of the webpage to fetch content from." } } output_type = "string" def forward(self, url: str) -> str: assert isinstance(url, str), "URL must be a string." try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') for script in soup(["script", "style"]): script.extract() text = soup.get_text(separator='\n') lines = (line.strip() for line in text.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = '\n'.join(chunk for chunk in chunks if chunk) if len(text) > 2000: text = text[:2000] + "... [content truncated]" return f"Content from {url}:\n\n{text}" except Exception as e: print(f"Error fetching web content: {str(e)}") return f"Error fetching content from {url}: {str(e)}"