Spaces:
Running
Running
Kunal Pai
commited on
Commit
·
f1acfc4
1
Parent(s):
b315d85
Add tools for searching arXiv, web, and Wikipedia
Browse files- tools/arxiv_tool.py +73 -0
- tools/web_search_tool.py +65 -0
- tools/wikipedia_tool.py +102 -0
tools/arxiv_tool.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import importlib
|
2 |
+
|
3 |
+
__all__ = ['ArxivTool']
|
4 |
+
|
5 |
+
|
6 |
+
class ArxivTool():
|
7 |
+
dependencies = ["arxiv==2.1.3"]
|
8 |
+
|
9 |
+
inputSchema = {
|
10 |
+
"name": "ArxivTool",
|
11 |
+
"description": "Searches arXiv for academic papers based on a query.",
|
12 |
+
"parameters": {
|
13 |
+
"type": "object",
|
14 |
+
"properties": {
|
15 |
+
"query": {
|
16 |
+
"type": "string",
|
17 |
+
"description": "Search query for papers (e.g., 'superconductors gem5').",
|
18 |
+
},
|
19 |
+
"max_results": {
|
20 |
+
"type": "integer",
|
21 |
+
"description": "Maximum number of papers to retrieve. Default is 5.",
|
22 |
+
"default": 5
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"required": ["query"],
|
26 |
+
}
|
27 |
+
}
|
28 |
+
|
29 |
+
def __init__(self):
|
30 |
+
pass
|
31 |
+
|
32 |
+
def run(self, **kwargs):
|
33 |
+
query = kwargs.get("query")
|
34 |
+
max_results = kwargs.get("max_results", 5)
|
35 |
+
|
36 |
+
if not query:
|
37 |
+
return {
|
38 |
+
"status": "error",
|
39 |
+
"message": "Missing required parameter: 'query'",
|
40 |
+
"output": None
|
41 |
+
}
|
42 |
+
|
43 |
+
try:
|
44 |
+
arxiv = importlib.import_module("arxiv")
|
45 |
+
client = arxiv.Client()
|
46 |
+
|
47 |
+
search = arxiv.Search(
|
48 |
+
query=query,
|
49 |
+
max_results=max_results,
|
50 |
+
)
|
51 |
+
|
52 |
+
papers = []
|
53 |
+
for result in client.results(search):
|
54 |
+
papers.append({
|
55 |
+
"title": result.title,
|
56 |
+
"authors": [author.name for author in result.authors],
|
57 |
+
"published": result.published.isoformat(),
|
58 |
+
"summary": result.summary.strip(),
|
59 |
+
"pdf_url": result.pdf_url,
|
60 |
+
})
|
61 |
+
|
62 |
+
return {
|
63 |
+
"status": "success",
|
64 |
+
"message": f"Found {len(papers)} paper(s) on arXiv",
|
65 |
+
"output": papers,
|
66 |
+
}
|
67 |
+
|
68 |
+
except Exception as e:
|
69 |
+
return {
|
70 |
+
"status": "error",
|
71 |
+
"message": f"arXiv search failed: {str(e)}",
|
72 |
+
"output": None,
|
73 |
+
}
|
tools/web_search_tool.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import importlib
|
2 |
+
|
3 |
+
__all__ = ['WebSearchTool']
|
4 |
+
|
5 |
+
|
6 |
+
class WebSearchTool():
|
7 |
+
dependencies = ["googlesearch-python==1.3.0"]
|
8 |
+
|
9 |
+
inputSchema = {
|
10 |
+
"name": "WebSearchTool",
|
11 |
+
"description": "Searches a specific website for a given query using Google search.",
|
12 |
+
"parameters": {
|
13 |
+
"type": "object",
|
14 |
+
"properties": {
|
15 |
+
"website": {
|
16 |
+
"type": "string",
|
17 |
+
"description": "The website domain to search in (e.g., 'stackoverflow.com').",
|
18 |
+
},
|
19 |
+
"query": {
|
20 |
+
"type": "string",
|
21 |
+
"description": "The query string to search for on the website.",
|
22 |
+
}
|
23 |
+
},
|
24 |
+
"required": ["website", "query"],
|
25 |
+
}
|
26 |
+
}
|
27 |
+
|
28 |
+
def __init__(self):
|
29 |
+
pass
|
30 |
+
|
31 |
+
def run(self, **kwargs):
|
32 |
+
print("Running web search")
|
33 |
+
|
34 |
+
website = kwargs.get("website")
|
35 |
+
query = kwargs.get("query")
|
36 |
+
|
37 |
+
if not website or not query:
|
38 |
+
return {
|
39 |
+
"status": "error",
|
40 |
+
"message": "Missing required parameters: 'website' and 'query'",
|
41 |
+
"output": None
|
42 |
+
}
|
43 |
+
|
44 |
+
search_query = f"site:{website} {query}"
|
45 |
+
results = []
|
46 |
+
|
47 |
+
googlesearch = importlib.import_module("googlesearch")
|
48 |
+
|
49 |
+
try:
|
50 |
+
for result in googlesearch.search(search_query, num_results=10):
|
51 |
+
if "/search?num=" not in result:
|
52 |
+
results.append(result)
|
53 |
+
|
54 |
+
return {
|
55 |
+
"status": "success",
|
56 |
+
"message": "Search completed successfully",
|
57 |
+
"output": results,
|
58 |
+
}
|
59 |
+
|
60 |
+
except Exception as e:
|
61 |
+
return {
|
62 |
+
"status": "error",
|
63 |
+
"message": f"Search failed: {str(e)}",
|
64 |
+
"output": None,
|
65 |
+
}
|
tools/wikipedia_tool.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import importlib
|
2 |
+
|
3 |
+
__all__ = ['WikipediaTool']
|
4 |
+
|
5 |
+
|
6 |
+
class WikipediaTool():
|
7 |
+
dependencies = ["requests==2.32.3", "beautifulsoup4==4.13.3"]
|
8 |
+
|
9 |
+
inputSchema = {
|
10 |
+
"name": "WikipediaTool",
|
11 |
+
"description": "Searches Wikipedia for a given question and returns a short summary.",
|
12 |
+
"parameters": {
|
13 |
+
"type": "object",
|
14 |
+
"properties": {
|
15 |
+
"question": {
|
16 |
+
"type": "string",
|
17 |
+
"description": "The topic or question to search on Wikipedia.",
|
18 |
+
}
|
19 |
+
},
|
20 |
+
"required": ["question"],
|
21 |
+
}
|
22 |
+
}
|
23 |
+
|
24 |
+
def __init__(self):
|
25 |
+
pass
|
26 |
+
|
27 |
+
def run(self, **kwargs):
|
28 |
+
question = kwargs.get("question")
|
29 |
+
if not question:
|
30 |
+
return {
|
31 |
+
"status": "error",
|
32 |
+
"message": "Missing required parameter: 'question'",
|
33 |
+
"output": None
|
34 |
+
}
|
35 |
+
|
36 |
+
print(f"Searching Wikipedia for: {question}")
|
37 |
+
|
38 |
+
requests = importlib.import_module("requests")
|
39 |
+
bs4 = importlib.import_module("bs4")
|
40 |
+
BeautifulSoup = bs4.BeautifulSoup
|
41 |
+
|
42 |
+
search_url = "https://en.wikipedia.org/w/api.php"
|
43 |
+
search_params = {
|
44 |
+
"action": "query",
|
45 |
+
"format": "json",
|
46 |
+
"list": "search",
|
47 |
+
"srsearch": question,
|
48 |
+
"srlimit": 1,
|
49 |
+
}
|
50 |
+
|
51 |
+
try:
|
52 |
+
response = requests.get(search_url, params=search_params)
|
53 |
+
if response.status_code != 200:
|
54 |
+
return {
|
55 |
+
"status": "error",
|
56 |
+
"message": "Wikipedia API request failed.",
|
57 |
+
"output": None,
|
58 |
+
}
|
59 |
+
|
60 |
+
data = response.json()
|
61 |
+
search_results = data.get("query", {}).get("search", [])
|
62 |
+
|
63 |
+
if not search_results:
|
64 |
+
return {
|
65 |
+
"status": "error",
|
66 |
+
"message": "No results found on Wikipedia.",
|
67 |
+
"output": None,
|
68 |
+
}
|
69 |
+
|
70 |
+
top_result = search_results[0]["title"]
|
71 |
+
page_url = f"https://en.wikipedia.org/wiki/{top_result.replace(' ', '_')}"
|
72 |
+
print(f"Fetching full content from: {page_url}")
|
73 |
+
|
74 |
+
html_url = f"https://en.wikipedia.org/api/rest_v1/page/html/{top_result.replace(' ', '_')}"
|
75 |
+
html_response = requests.get(html_url)
|
76 |
+
|
77 |
+
if html_response.status_code != 200:
|
78 |
+
return {
|
79 |
+
"status": "error",
|
80 |
+
"message": "Failed to fetch article content.",
|
81 |
+
"output": None,
|
82 |
+
}
|
83 |
+
|
84 |
+
soup = BeautifulSoup(html_response.text, "html.parser")
|
85 |
+
paragraphs = [p.get_text() for p in soup.find_all("p") if p.get_text()]
|
86 |
+
full_text = " ".join(paragraphs)
|
87 |
+
summary = " ".join(full_text.split(". ")[:5]) # First 5 sentences
|
88 |
+
|
89 |
+
output_text = f"**{top_result}**\n{summary}...\n[Read more]({page_url})"
|
90 |
+
|
91 |
+
return {
|
92 |
+
"status": "success",
|
93 |
+
"message": "Wikipedia article summary retrieved successfully.",
|
94 |
+
"output": output_text,
|
95 |
+
}
|
96 |
+
|
97 |
+
except Exception as e:
|
98 |
+
return {
|
99 |
+
"status": "error",
|
100 |
+
"message": f"Exception occurred: {str(e)}",
|
101 |
+
"output": None,
|
102 |
+
}
|