Nymbo commited on
Commit
b923a7c
Β·
verified Β·
1 Parent(s): a655b89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -20
app.py CHANGED
@@ -1,12 +1,13 @@
1
  # File: main/app.py
2
- # Purpose: One Space that offers two tools:
3
- # 1) Fetch: extract relevant page content (title, metadata, clean text, hyperlinks)
4
- # 2) Websearch: DuckDuckGo web search
 
5
  #
6
  # Notes:
7
- # - Launched with mcp_server=True so both functions are available as MCP tools.
8
- # - UI uses TabbedInterface so you can use each tool from its own tab.
9
- # - Inline comments explain each section in plain language.
10
 
11
  from __future__ import annotations
12
 
@@ -19,9 +20,12 @@ from bs4 import BeautifulSoup # (layman) for parsing HTML
19
  from readability import Document # (layman) to isolate main readable content
20
  from urllib.parse import urljoin, urldefrag, urlparse # (layman) to fix/clean URLs
21
 
22
- # DuckDuckGo via LangChain community tool
23
  from langchain_community.tools import DuckDuckGoSearchResults
24
 
 
 
 
25
 
26
  # ==============================
27
  # Fetch: HTTP + extraction utils
@@ -299,9 +303,9 @@ def extract_relevant( # <-- MCP tool #1
299
  return md or "No content could be extracted."
300
 
301
 
302
- # ==========================
303
- # Websearch: DuckDuckGo tool
304
- # ==========================
305
 
306
  def web_search( # <-- MCP tool #2
307
  input_query: str,
@@ -321,8 +325,73 @@ def web_search( # <-- MCP tool #2
321
  return results
322
 
323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  # =====================
325
- # UI: two-tab interface
326
  # =====================
327
 
328
  # --- Fetch tab (compact controllable extraction) ---
@@ -344,7 +413,7 @@ fetch_interface = gr.Interface(
344
  theme="Nymbo/Nymbo_Theme",
345
  )
346
 
347
- # --- Websearch tab (DuckDuckGo) ---
348
  websearch_interface = gr.Interface(
349
  fn=web_search, # (layman) connect the function to the UI
350
  inputs=[
@@ -352,20 +421,43 @@ websearch_interface = gr.Interface(
352
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
353
  ],
354
  outputs=gr.JSON(label="Search results"),
355
- title="Websearch β€” DuckDuckGo",
356
- description="Search the web using DuckDuckGo; returns snippet, title, and link.",
357
  allow_flagging="never",
358
  theme="Nymbo/Nymbo_Theme",
359
  )
360
 
361
- # --- Combine both into a single app with tabs ---
362
- demo = gr.TabbedInterface(
363
- interface_list=[fetch_interface, websearch_interface],
364
- tab_names=["Fetch", "Websearch"],
365
- title="Web MCP β€” Fetch + Websearch",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  theme="Nymbo/Nymbo_Theme",
367
  )
368
 
369
- # Launch the UI and expose both functions as MCP tools in one server
 
 
 
 
 
 
370
  if __name__ == "__main__":
371
  demo.launch(mcp_server=True)
 
1
  # File: main/app.py
2
+ # Purpose: One Space that offers three tools in one MCP server:
3
+ # 1) Fetch β€” extract relevant page content (title, metadata, clean text, hyperlinks)
4
+ # 2) Websearch β€” structured DuckDuckGo results (JSON via LangChain wrapper)
5
+ # 3) DDG (Unstructured) β€” compact plain-text DuckDuckGo results for low token usage
6
  #
7
  # Notes:
8
+ # - Launched with mcp_server=True so all functions are exposed as MCP tools.
9
+ # - UI uses TabbedInterface: each tool has its own tab.
10
+ # - Inline comments describe each section in plain language.
11
 
12
  from __future__ import annotations
13
 
 
20
  from readability import Document # (layman) to isolate main readable content
21
  from urllib.parse import urljoin, urldefrag, urlparse # (layman) to fix/clean URLs
22
 
23
+ # Structured DDG search (LangChain wrapper)
24
  from langchain_community.tools import DuckDuckGoSearchResults
25
 
26
+ # Unstructured DDG search (lightweight direct client)
27
+ from duckduckgo_search import DDGS
28
+
29
 
30
  # ==============================
31
  # Fetch: HTTP + extraction utils
 
303
  return md or "No content could be extracted."
304
 
305
 
306
+ # ========================================
307
+ # Websearch (Structured): DuckDuckGo (JSON)
308
+ # ========================================
309
 
310
  def web_search( # <-- MCP tool #2
311
  input_query: str,
 
325
  return results
326
 
327
 
328
+ # ===================================================
329
+ # DDG (Unstructured): compact plain-text, low tokens
330
+ # ===================================================
331
+
332
+ def web_search_unstructured( # <-- MCP tool #3
333
+ input_query: str,
334
+ max_results: int = 5,
335
+ style: Literal["urls", "titles+urls", "titles+urls+snippets"] = "titles+urls",
336
+ snippet_max_chars: int = 160,
337
+ ) -> str:
338
+ """
339
+ (layman) A lightweight DDG search that returns a plain-text list.
340
+ - Fewer tokens than JSON; great for quick scanning or piping into LLM prompts.
341
+ - 'style' controls how much text we include per line.
342
+ """
343
+ if not input_query or not input_query.strip():
344
+ return ""
345
+
346
+ # (layman) Run the search using the lightweight DDG client
347
+ with DDGS() as ddgs:
348
+ results = list(ddgs.text(input_query, max_results=max_results))
349
+
350
+ # (layman) Normalize fields because DDG library keys can vary by version
351
+ lines: List[str] = []
352
+ for r in results:
353
+ title = (r.get("title") or "").strip()
354
+ url = (r.get("href") or r.get("link") or r.get("url") or "").strip()
355
+ snippet = (r.get("body") or r.get("snippet") or "").strip()
356
+
357
+ # (layman) Truncate snippet to keep output tight
358
+ if snippet_max_chars and len(snippet) > snippet_max_chars:
359
+ snippet = snippet[:snippet_max_chars - 1].rstrip() + "…"
360
+
361
+ # (layman) Build each line according to the chosen style
362
+ if style == "urls":
363
+ if url:
364
+ lines.append(url)
365
+ elif style == "titles+urls":
366
+ if title and url:
367
+ lines.append(f"{title} β€” {url}")
368
+ elif url:
369
+ lines.append(url)
370
+ elif title:
371
+ lines.append(title)
372
+ else: # titles+urls+snippets
373
+ if title and url and snippet:
374
+ lines.append(f"{title} β€” {url}\n {snippet}")
375
+ elif title and url:
376
+ lines.append(f"{title} β€” {url}")
377
+ elif url:
378
+ # (layman) If only URL is available, still show it
379
+ if snippet:
380
+ lines.append(f"{url}\n {snippet}")
381
+ else:
382
+ lines.append(url)
383
+ elif title:
384
+ if snippet:
385
+ lines.append(f"{title}\n {snippet}")
386
+ else:
387
+ lines.append(title)
388
+
389
+ # (layman) Join lines with newlines to form a compact text block
390
+ return "\n".join(lines).strip()
391
+
392
+
393
  # =====================
394
+ # UI: three-tab interface
395
  # =====================
396
 
397
  # --- Fetch tab (compact controllable extraction) ---
 
413
  theme="Nymbo/Nymbo_Theme",
414
  )
415
 
416
+ # --- Websearch tab (structured JSON) ---
417
  websearch_interface = gr.Interface(
418
  fn=web_search, # (layman) connect the function to the UI
419
  inputs=[
 
421
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
422
  ],
423
  outputs=gr.JSON(label="Search results"),
424
+ title="Websearch β€” DuckDuckGo (JSON)",
425
+ description="Search the web using DuckDuckGo; returns snippet, title, and link as JSON.",
426
  allow_flagging="never",
427
  theme="Nymbo/Nymbo_Theme",
428
  )
429
 
430
+ # --- DDG (Unstructured) tab (plain text, low tokens) ---
431
+ unstructured_interface = gr.Interface(
432
+ fn=web_search_unstructured, # (layman) connect the function to the UI
433
+ inputs=[
434
+ gr.Textbox(value="", label="Search query", placeholder="concise keywords"),
435
+ gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
436
+ gr.Dropdown(
437
+ label="Output style",
438
+ choices=["urls", "titles+urls", "titles+urls+snippets"],
439
+ value="titles+urls",
440
+ info="Plain text list; choose how much detail to include."
441
+ ),
442
+ gr.Slider(
443
+ minimum=40, maximum=400, value=160, step=10,
444
+ label="Snippet max chars",
445
+ info="Truncate snippet length to keep token usage low."
446
+ ),
447
+ ],
448
+ outputs=gr.Textbox(label="Results (plain text)", interactive=False),
449
+ title="DDG β€” Unstructured (Compact)",
450
+ description="Outputs a plain-text list (great for low-token prompts).",
451
+ allow_flagging="never",
452
  theme="Nymbo/Nymbo_Theme",
453
  )
454
 
455
+ # --- Combine all three into a single app with tabs ---
456
+ demo = gr.TabbedInterface(
457
+ interface_list=[fetch_interface, websearch_interface, unstructured_interface],
458
+ tab_names=["Fetch", "Websearch", "DDG (Unstructured)"],
459
+ )
460
+
461
+ # Launch the UI and expose all functions as MCP tools in one server
462
  if __name__ == "__main__":
463
  demo.launch(mcp_server=True)