helloparthshah commited on
Commit
5d665be
·
1 Parent(s): e8fe06f

Updated get website tool

Browse files
src/manager/manager.py CHANGED
@@ -287,6 +287,9 @@ class GeminiManager:
287
  "role": "assistant",
288
  "content": full_text
289
  }]
 
 
 
290
  for candidate in chunk.candidates:
291
  if candidate.content and candidate.content.parts:
292
  function_call_requests.append({
@@ -305,6 +308,8 @@ class GeminiManager:
305
  messages = messages + function_call_requests
306
  yield messages
307
  except Exception as e:
 
 
308
  messages.append({
309
  "role": "assistant",
310
  "content": f"Error generating response: {str(e)}",
@@ -315,13 +320,13 @@ class GeminiManager:
315
  return messages
316
 
317
  # Check if any text was received
318
- if not full_text and len(function_calls) == 0:
 
319
  messages.append({
320
  "role": "assistant",
321
  "content": "No response from the model.",
322
  "metadata": {"title": "No response from the model."}
323
  })
324
- yield messages
325
 
326
  if function_calls and len(function_calls) > 0:
327
  for call in self.handle_tool_calls(function_calls):
 
287
  "role": "assistant",
288
  "content": full_text
289
  }]
290
+ else:
291
+ print("Empty chunk received")
292
+ print(chunk)
293
  for candidate in chunk.candidates:
294
  if candidate.content and candidate.content.parts:
295
  function_call_requests.append({
 
308
  messages = messages + function_call_requests
309
  yield messages
310
  except Exception as e:
311
+ print(messages)
312
+ print(chat_history)
313
  messages.append({
314
  "role": "assistant",
315
  "content": f"Error generating response: {str(e)}",
 
320
  return messages
321
 
322
  # Check if any text was received
323
+ if len(full_text.strip()) == 0 and len(function_calls) == 0:
324
+ print(response_stream)
325
  messages.append({
326
  "role": "assistant",
327
  "content": "No response from the model.",
328
  "metadata": {"title": "No response from the model."}
329
  })
 
330
 
331
  if function_calls and len(function_calls) > 0:
332
  for call in self.handle_tool_calls(function_calls):
src/tools/user_tools/get_website_tool.py CHANGED
@@ -3,15 +3,15 @@ from collections import defaultdict
3
  import re
4
  import time
5
 
6
- __all__ = ['GetWebsiteTool']
7
 
8
 
9
- class GetWebsiteTool():
10
  dependencies = ["requests", "beautifulsoup4==4.13.3"]
11
 
12
  inputSchema = {
13
- "name": "GetWebsiteTool",
14
- "description": "Returns a summary of the content of a website based on a query string.",
15
  "parameters": {
16
  "type": "object",
17
  "properties": {
@@ -19,6 +19,16 @@ class GetWebsiteTool():
19
  "type": "string",
20
  "description": "The URL of the website to fetch content from.",
21
  },
 
 
 
 
 
 
 
 
 
 
22
  },
23
  "required": ["url"],
24
  }
@@ -28,7 +38,7 @@ class GetWebsiteTool():
28
  # Clean the text more thoroughly
29
  text = re.sub(r'\[[0-9]*\]', ' ', text)
30
  text = re.sub(r'\s+', ' ', text)
31
- text = re.sub(r'[^a-zA-Z0-9.\s]', '', text) # Remove special characters except periods
32
 
33
  # Tokenize into sentences
34
  sentences = re.split(r'(?<=[.!?])\s+', text)
@@ -60,8 +70,8 @@ class GetWebsiteTool():
60
  score += sentence_length_factor * 0.1
61
 
62
  # Add a coherence score
63
- if i > 0 and sentences[i-1] in sentence_scores:
64
- previous_sentence_words = sentences[i-1].lower().split()
65
  common_words = set(words) & set(previous_sentence_words)
66
  coherence_score = len(common_words) / len(words)
67
  score += coherence_score * 0.1
@@ -90,9 +100,11 @@ class GetWebsiteTool():
90
  'Sec-Fetch-User': '?1',
91
  'Priority': 'u=0, i',
92
  }
93
- print("Running web search")
94
 
95
  url = kwargs.get("url")
 
 
96
 
97
  if not url:
98
  return {
@@ -107,26 +119,44 @@ class GetWebsiteTool():
107
  BeautifulSoup = bs4.BeautifulSoup
108
  try:
109
  response = requests.get(url, headers=headers, timeout=10)
110
- if response.status_code == 200:
111
- # Parse the content using BeautifulSoup
112
- soup = BeautifulSoup(response.content, 'html.parser')
 
 
 
 
 
 
 
 
113
  # Extract text from the parsed HTML
114
  text = soup.get_text()
115
 
 
116
  # Summarize the text
117
  output = self.summarize_text(text)
 
 
118
  else:
119
  return {
120
  "status": "error",
121
- "message": f"Failed to fetch content from {url}. Status code: {response.status_code}",
122
  "output": None
123
  }
124
 
 
125
  return {
126
  "status": "success",
127
  "message": "Search completed successfully",
128
  "output": output,
129
  }
 
 
 
 
 
 
130
  except Exception as e:
131
  return {
132
  "status": "error",
 
3
  import re
4
  import time
5
 
6
+ __all__ = ['GetWebsite']
7
 
8
 
9
+ class GetWebsite():
10
  dependencies = ["requests", "beautifulsoup4==4.13.3"]
11
 
12
  inputSchema = {
13
+ "name": "GetWebsite",
14
+ "description": "Returns the content of a website with enhanced error handling and output options.",
15
  "parameters": {
16
  "type": "object",
17
  "properties": {
 
19
  "type": "string",
20
  "description": "The URL of the website to fetch content from.",
21
  },
22
+ "output_type": {
23
+ "type": "string",
24
+ "enum": ["summary", "full_text"],
25
+ "description": "The type of output to return. 'summary' returns a summary of the text, 'full_text' returns the full text content.",
26
+ "default": "full_text"
27
+ },
28
+ "css_selector": {
29
+ "type": "string",
30
+ "description": "A CSS selector to extract specific content from the page.",
31
+ }
32
  },
33
  "required": ["url"],
34
  }
 
38
  # Clean the text more thoroughly
39
  text = re.sub(r'\[[0-9]*\]', ' ', text)
40
  text = re.sub(r'\s+', ' ', text)
41
+ text = re.sub(r'[^a-zA-Z0-9.\s]', '', text) # Remove special characters except periods
42
 
43
  # Tokenize into sentences
44
  sentences = re.split(r'(?<=[.!?])\s+', text)
 
70
  score += sentence_length_factor * 0.1
71
 
72
  # Add a coherence score
73
+ if i > 0 and sentences[i - 1] in sentence_scores:
74
+ previous_sentence_words = sentences[i - 1].lower().split()
75
  common_words = set(words) & set(previous_sentence_words)
76
  coherence_score = len(common_words) / len(words)
77
  score += coherence_score * 0.1
 
100
  'Sec-Fetch-User': '?1',
101
  'Priority': 'u=0, i',
102
  }
103
+ print("Running enhanced web scraper")
104
 
105
  url = kwargs.get("url")
106
+ output_type = kwargs.get("output_type", "summary")
107
+ css_selector = kwargs.get("css_selector")
108
 
109
  if not url:
110
  return {
 
119
  BeautifulSoup = bs4.BeautifulSoup
120
  try:
121
  response = requests.get(url, headers=headers, timeout=10)
122
+ response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
123
+ response.encoding = response.apparent_encoding # Handle encoding
124
+
125
+ # Parse the content using BeautifulSoup
126
+ soup = BeautifulSoup(response.content, 'html.parser')
127
+
128
+ if css_selector:
129
+ # Extract text from the selected elements
130
+ elements = soup.select(css_selector)
131
+ text = '\n'.join([element.get_text() for element in elements])
132
+ else:
133
  # Extract text from the parsed HTML
134
  text = soup.get_text()
135
 
136
+ if output_type == "summary":
137
  # Summarize the text
138
  output = self.summarize_text(text)
139
+ elif output_type == "full_text":
140
+ output = text
141
  else:
142
  return {
143
  "status": "error",
144
+ "message": f"Invalid output_type: {output_type}",
145
  "output": None
146
  }
147
 
148
+
149
  return {
150
  "status": "success",
151
  "message": "Search completed successfully",
152
  "output": output,
153
  }
154
+ except requests.exceptions.RequestException as e:
155
+ return {
156
+ "status": "error",
157
+ "message": f"Request failed: {str(e)}",
158
+ "output": None
159
+ }
160
  except Exception as e:
161
  return {
162
  "status": "error",