Spaces:
Sleeping
Sleeping
# Web search and content tools | |
import requests | |
import re | |
from bs4 import BeautifulSoup | |
from duckduckgo_search import DDGS | |
from smolagents import Tool | |
class WebSearchTool(Tool): | |
name = "web_search" | |
description = "Search the web for information about a query using DuckDuckGo." | |
inputs = { | |
"query": { | |
"type": "string", | |
"description": "The search query." | |
} | |
} | |
output_type = "string" | |
def __init__(self, **kwargs): | |
super().__init__(**kwargs) | |
self.max_results = 3 | |
def forward(self, query: str) -> str: | |
assert isinstance(query, str), "Query must be a string." | |
try: | |
results = [] | |
with DDGS() as ddgs: | |
ddgs_results = list(ddgs.text(query, max_results=self.max_results)) | |
if not ddgs_results: | |
return "No web search results found." | |
formatted_results = "\nWeb Search Results:\n" | |
for i, r in enumerate(ddgs_results, 1): | |
formatted_results += f"\n{i}. {r['title']}\n {r['body']}\n Source: {r['href']}\n" | |
return formatted_results | |
except Exception as e: | |
print(f"Error in web search: {str(e)}") | |
return f"Error performing web search: {str(e)}" | |
class WebContentTool(Tool): | |
name = "web_content" | |
description = "Fetch and extract content from a specific webpage." | |
inputs = { | |
"url": { | |
"type": "string", | |
"description": "The URL of the webpage to fetch content from." | |
} | |
} | |
output_type = "string" | |
def forward(self, url: str) -> str: | |
assert isinstance(url, str), "URL must be a string." | |
try: | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
} | |
response = requests.get(url, headers=headers, timeout=10) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
for script in soup(["script", "style"]): | |
script.extract() | |
text = soup.get_text(separator='\n') | |
lines = (line.strip() for line in text.splitlines()) | |
chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) | |
text = '\n'.join(chunk for chunk in chunks if chunk) | |
if len(text) > 2000: | |
text = text[:2000] + "... [content truncated]" | |
return f"Content from {url}:\n\n{text}" | |
except Exception as e: | |
print(f"Error fetching web content: {str(e)}") | |
return f"Error fetching content from {url}: {str(e)}" | |