Kunal Pai commited on
Commit
f1acfc4
·
1 Parent(s): b315d85

Add tools for searching arXiv, web, and Wikipedia

Browse files
tools/arxiv_tool.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+
3
+ __all__ = ['ArxivTool']
4
+
5
+
6
+ class ArxivTool():
7
+ dependencies = ["arxiv==2.1.3"]
8
+
9
+ inputSchema = {
10
+ "name": "ArxivTool",
11
+ "description": "Searches arXiv for academic papers based on a query.",
12
+ "parameters": {
13
+ "type": "object",
14
+ "properties": {
15
+ "query": {
16
+ "type": "string",
17
+ "description": "Search query for papers (e.g., 'superconductors gem5').",
18
+ },
19
+ "max_results": {
20
+ "type": "integer",
21
+ "description": "Maximum number of papers to retrieve. Default is 5.",
22
+ "default": 5
23
+ }
24
+ },
25
+ "required": ["query"],
26
+ }
27
+ }
28
+
29
+ def __init__(self):
30
+ pass
31
+
32
+ def run(self, **kwargs):
33
+ query = kwargs.get("query")
34
+ max_results = kwargs.get("max_results", 5)
35
+
36
+ if not query:
37
+ return {
38
+ "status": "error",
39
+ "message": "Missing required parameter: 'query'",
40
+ "output": None
41
+ }
42
+
43
+ try:
44
+ arxiv = importlib.import_module("arxiv")
45
+ client = arxiv.Client()
46
+
47
+ search = arxiv.Search(
48
+ query=query,
49
+ max_results=max_results,
50
+ )
51
+
52
+ papers = []
53
+ for result in client.results(search):
54
+ papers.append({
55
+ "title": result.title,
56
+ "authors": [author.name for author in result.authors],
57
+ "published": result.published.isoformat(),
58
+ "summary": result.summary.strip(),
59
+ "pdf_url": result.pdf_url,
60
+ })
61
+
62
+ return {
63
+ "status": "success",
64
+ "message": f"Found {len(papers)} paper(s) on arXiv",
65
+ "output": papers,
66
+ }
67
+
68
+ except Exception as e:
69
+ return {
70
+ "status": "error",
71
+ "message": f"arXiv search failed: {str(e)}",
72
+ "output": None,
73
+ }
tools/web_search_tool.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+
3
+ __all__ = ['WebSearchTool']
4
+
5
+
6
+ class WebSearchTool():
7
+ dependencies = ["googlesearch-python==1.3.0"]
8
+
9
+ inputSchema = {
10
+ "name": "WebSearchTool",
11
+ "description": "Searches a specific website for a given query using Google search.",
12
+ "parameters": {
13
+ "type": "object",
14
+ "properties": {
15
+ "website": {
16
+ "type": "string",
17
+ "description": "The website domain to search in (e.g., 'stackoverflow.com').",
18
+ },
19
+ "query": {
20
+ "type": "string",
21
+ "description": "The query string to search for on the website.",
22
+ }
23
+ },
24
+ "required": ["website", "query"],
25
+ }
26
+ }
27
+
28
+ def __init__(self):
29
+ pass
30
+
31
+ def run(self, **kwargs):
32
+ print("Running web search")
33
+
34
+ website = kwargs.get("website")
35
+ query = kwargs.get("query")
36
+
37
+ if not website or not query:
38
+ return {
39
+ "status": "error",
40
+ "message": "Missing required parameters: 'website' and 'query'",
41
+ "output": None
42
+ }
43
+
44
+ search_query = f"site:{website} {query}"
45
+ results = []
46
+
47
+ googlesearch = importlib.import_module("googlesearch")
48
+
49
+ try:
50
+ for result in googlesearch.search(search_query, num_results=10):
51
+ if "/search?num=" not in result:
52
+ results.append(result)
53
+
54
+ return {
55
+ "status": "success",
56
+ "message": "Search completed successfully",
57
+ "output": results,
58
+ }
59
+
60
+ except Exception as e:
61
+ return {
62
+ "status": "error",
63
+ "message": f"Search failed: {str(e)}",
64
+ "output": None,
65
+ }
tools/wikipedia_tool.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+
3
+ __all__ = ['WikipediaTool']
4
+
5
+
6
+ class WikipediaTool():
7
+ dependencies = ["requests==2.32.3", "beautifulsoup4==4.13.3"]
8
+
9
+ inputSchema = {
10
+ "name": "WikipediaTool",
11
+ "description": "Searches Wikipedia for a given question and returns a short summary.",
12
+ "parameters": {
13
+ "type": "object",
14
+ "properties": {
15
+ "question": {
16
+ "type": "string",
17
+ "description": "The topic or question to search on Wikipedia.",
18
+ }
19
+ },
20
+ "required": ["question"],
21
+ }
22
+ }
23
+
24
+ def __init__(self):
25
+ pass
26
+
27
+ def run(self, **kwargs):
28
+ question = kwargs.get("question")
29
+ if not question:
30
+ return {
31
+ "status": "error",
32
+ "message": "Missing required parameter: 'question'",
33
+ "output": None
34
+ }
35
+
36
+ print(f"Searching Wikipedia for: {question}")
37
+
38
+ requests = importlib.import_module("requests")
39
+ bs4 = importlib.import_module("bs4")
40
+ BeautifulSoup = bs4.BeautifulSoup
41
+
42
+ search_url = "https://en.wikipedia.org/w/api.php"
43
+ search_params = {
44
+ "action": "query",
45
+ "format": "json",
46
+ "list": "search",
47
+ "srsearch": question,
48
+ "srlimit": 1,
49
+ }
50
+
51
+ try:
52
+ response = requests.get(search_url, params=search_params)
53
+ if response.status_code != 200:
54
+ return {
55
+ "status": "error",
56
+ "message": "Wikipedia API request failed.",
57
+ "output": None,
58
+ }
59
+
60
+ data = response.json()
61
+ search_results = data.get("query", {}).get("search", [])
62
+
63
+ if not search_results:
64
+ return {
65
+ "status": "error",
66
+ "message": "No results found on Wikipedia.",
67
+ "output": None,
68
+ }
69
+
70
+ top_result = search_results[0]["title"]
71
+ page_url = f"https://en.wikipedia.org/wiki/{top_result.replace(' ', '_')}"
72
+ print(f"Fetching full content from: {page_url}")
73
+
74
+ html_url = f"https://en.wikipedia.org/api/rest_v1/page/html/{top_result.replace(' ', '_')}"
75
+ html_response = requests.get(html_url)
76
+
77
+ if html_response.status_code != 200:
78
+ return {
79
+ "status": "error",
80
+ "message": "Failed to fetch article content.",
81
+ "output": None,
82
+ }
83
+
84
+ soup = BeautifulSoup(html_response.text, "html.parser")
85
+ paragraphs = [p.get_text() for p in soup.find_all("p") if p.get_text()]
86
+ full_text = " ".join(paragraphs)
87
+ summary = " ".join(full_text.split(". ")[:5]) # First 5 sentences
88
+
89
+ output_text = f"**{top_result}**\n{summary}...\n[Read more]({page_url})"
90
+
91
+ return {
92
+ "status": "success",
93
+ "message": "Wikipedia article summary retrieved successfully.",
94
+ "output": output_text,
95
+ }
96
+
97
+ except Exception as e:
98
+ return {
99
+ "status": "error",
100
+ "message": f"Exception occurred: {str(e)}",
101
+ "output": None,
102
+ }