Nymbo commited on
Commit
82f0069
·
verified ·
1 Parent(s): 5ca3b99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -26,7 +26,7 @@ from duckduckgo_search import DDGS
26
 
27
  def _http_get(url: str) -> requests.Response:
28
  """
29
- (layman) Download the page politely with a short timeout and realistic headers.
30
  """
31
  headers = {
32
  "User-Agent": "Mozilla/5.0 (compatible; WebMCP/1.0; +https://example.com)",
@@ -38,7 +38,7 @@ def _http_get(url: str) -> requests.Response:
38
 
39
  def _normalize_whitespace(text: str) -> str:
40
  """
41
- (layman) Squeeze extra spaces and blank lines to keep things compact.
42
  """
43
  text = re.sub(r"[ \t\u00A0]+", " ", text)
44
  text = re.sub(r"\n\s*\n\s*\n+", "\n\n", text.strip())
@@ -47,7 +47,7 @@ def _normalize_whitespace(text: str) -> str:
47
 
48
  def _truncate(text: str, max_chars: int) -> Tuple[str, bool]:
49
  """
50
- (layman) Cut text if it gets too long; return the text and whether we trimmed.
51
  """
52
  if max_chars is None or max_chars <= 0 or len(text) <= max_chars:
53
  return text, False
@@ -56,7 +56,7 @@ def _truncate(text: str, max_chars: int) -> Tuple[str, bool]:
56
 
57
  def _shorten(text: str, limit: int) -> str:
58
  """
59
- (layman) Hard cap a string with an ellipsis to keep tokens small.
60
  """
61
  if limit <= 0 or len(text) <= limit:
62
  return text
@@ -65,7 +65,7 @@ def _shorten(text: str, limit: int) -> str:
65
 
66
  def _domain_of(url: str) -> str:
67
  """
68
- (layman) Show a friendly site name like "example.com".
69
  """
70
  try:
71
  return urlparse(url).netloc or ""
@@ -85,7 +85,7 @@ def _og(soup: BeautifulSoup, prop: str) -> str | None:
85
 
86
  def _extract_metadata(soup: BeautifulSoup, final_url: str) -> Dict[str, str]:
87
  """
88
- (layman) Pull the useful bits: title, description, site name, canonical URL, language, etc.
89
  """
90
  meta: Dict[str, str] = {}
91
 
@@ -123,7 +123,7 @@ def _extract_metadata(soup: BeautifulSoup, final_url: str) -> Dict[str, str]:
123
 
124
  def _extract_main_text(html: str) -> Tuple[str, BeautifulSoup]:
125
  """
126
- (layman) Use Readability to isolate the main article and turn it into clean text.
127
  Returns (clean_text, soup_of_readable_html).
128
  """
129
  # Simplified article HTML from Readability
@@ -151,7 +151,7 @@ def _extract_main_text(html: str) -> Tuple[str, BeautifulSoup]:
151
 
152
  def _extract_links(readable_soup: BeautifulSoup, base_url: str, max_links: int) -> List[Tuple[str, str]]:
153
  """
154
- (layman) Collect clean, unique, absolute links from the readable section only.
155
  """
156
  seen = set()
157
  links: List[Tuple[str, str]] = []
@@ -193,7 +193,7 @@ def _format_markdown(
193
  verbosity: str,
194
  ) -> str:
195
  """
196
- (layman) Assemble a compact Markdown summary with optional sections.
197
  """
198
  lines: List[str] = []
199
 
@@ -412,7 +412,7 @@ def Search_Concise( # <-- MCP tool #4 (Concise DDG)
412
 
413
  # --- Fetch tab (compact controllable extraction) ---
414
  fetch_interface = gr.Interface(
415
- fn=Fetch_Webpage, # (layman) connect the function to the UI
416
  inputs=[
417
  gr.Textbox(label="URL", placeholder="https://example.com/article"),
418
  gr.Dropdown(label="Verbosity", choices=["Brief", "Standard", "Full"], value="Standard"),
@@ -431,7 +431,7 @@ fetch_interface = gr.Interface(
431
 
432
  # --- Websearch tab (structured DDG via LangChain) ---
433
  websearch_interface = gr.Interface(
434
- fn=Search_Structured, # (layman) connect the function to the UI
435
  inputs=[
436
  gr.Textbox(value="", label="Search query", placeholder="site:example.com interesting topic"),
437
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
 
26
 
27
  def _http_get(url: str) -> requests.Response:
28
  """
29
+ Download the page politely with a short timeout and realistic headers.
30
  """
31
  headers = {
32
  "User-Agent": "Mozilla/5.0 (compatible; WebMCP/1.0; +https://example.com)",
 
38
 
39
  def _normalize_whitespace(text: str) -> str:
40
  """
41
+ Squeeze extra spaces and blank lines to keep things compact.
42
  """
43
  text = re.sub(r"[ \t\u00A0]+", " ", text)
44
  text = re.sub(r"\n\s*\n\s*\n+", "\n\n", text.strip())
 
47
 
48
  def _truncate(text: str, max_chars: int) -> Tuple[str, bool]:
49
  """
50
+ Cut text if it gets too long; return the text and whether we trimmed.
51
  """
52
  if max_chars is None or max_chars <= 0 or len(text) <= max_chars:
53
  return text, False
 
56
 
57
  def _shorten(text: str, limit: int) -> str:
58
  """
59
+ Hard cap a string with an ellipsis to keep tokens small.
60
  """
61
  if limit <= 0 or len(text) <= limit:
62
  return text
 
65
 
66
  def _domain_of(url: str) -> str:
67
  """
68
+ Show a friendly site name like "example.com".
69
  """
70
  try:
71
  return urlparse(url).netloc or ""
 
85
 
86
  def _extract_metadata(soup: BeautifulSoup, final_url: str) -> Dict[str, str]:
87
  """
88
+ Pull the useful bits: title, description, site name, canonical URL, language, etc.
89
  """
90
  meta: Dict[str, str] = {}
91
 
 
123
 
124
  def _extract_main_text(html: str) -> Tuple[str, BeautifulSoup]:
125
  """
126
+ Use Readability to isolate the main article and turn it into clean text.
127
  Returns (clean_text, soup_of_readable_html).
128
  """
129
  # Simplified article HTML from Readability
 
151
 
152
  def _extract_links(readable_soup: BeautifulSoup, base_url: str, max_links: int) -> List[Tuple[str, str]]:
153
  """
154
+ Collect clean, unique, absolute links from the readable section only.
155
  """
156
  seen = set()
157
  links: List[Tuple[str, str]] = []
 
193
  verbosity: str,
194
  ) -> str:
195
  """
196
+ Assemble a compact Markdown summary with optional sections.
197
  """
198
  lines: List[str] = []
199
 
 
412
 
413
  # --- Fetch tab (compact controllable extraction) ---
414
  fetch_interface = gr.Interface(
415
+ fn=Fetch_Webpage, # connect the function to the UI
416
  inputs=[
417
  gr.Textbox(label="URL", placeholder="https://example.com/article"),
418
  gr.Dropdown(label="Verbosity", choices=["Brief", "Standard", "Full"], value="Standard"),
 
431
 
432
  # --- Websearch tab (structured DDG via LangChain) ---
433
  websearch_interface = gr.Interface(
434
+ fn=Search_Structured, # connect the function to the UI
435
  inputs=[
436
  gr.Textbox(value="", label="Search query", placeholder="site:example.com interesting topic"),
437
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),