Nymbo commited on
Commit
0a0a050
Β·
verified Β·
1 Parent(s): 575a45b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -100
app.py CHANGED
@@ -1,13 +1,10 @@
1
  # File: main/app.py
2
- # Purpose: One Space that offers three tools in one MCP server:
3
- # 1) Fetch β€” extract relevant page content (title, metadata, clean text, hyperlinks)
4
- # 2) Websearch β€” structured DuckDuckGo results (JSON via LangChain wrapper)
5
- # 3) DDG (Unstructured) β€” compact plain-text DuckDuckGo results for low token usage
6
  #
7
- # Notes:
8
- # - Launched with mcp_server=True so all functions are exposed as MCP tools.
9
- # - UI uses TabbedInterface: each tool has its own tab.
10
- # - Inline comments describe each section in plain language.
11
 
12
  from __future__ import annotations
13
 
@@ -20,10 +17,9 @@ from bs4 import BeautifulSoup # (layman) for parsing HTML
20
  from readability import Document # (layman) to isolate main readable content
21
  from urllib.parse import urljoin, urldefrag, urlparse # (layman) to fix/clean URLs
22
 
23
- # Structured DDG search (LangChain wrapper)
24
  from langchain_community.tools import DuckDuckGoSearchResults
25
-
26
- # Unstructured DDG search (lightweight direct client)
27
  from duckduckgo_search import DDGS
28
 
29
 
@@ -239,7 +235,7 @@ def _format_markdown(
239
  return "\n\n".join(lines).strip()
240
 
241
 
242
- def extract_relevant( # <-- MCP tool #1
243
  url: str,
244
  verbosity: str = "Standard",
245
  include_metadata: bool = True,
@@ -303,11 +299,11 @@ def extract_relevant( # <-- MCP tool #1
303
  return md or "No content could be extracted."
304
 
305
 
306
- # ========================================
307
- # Websearch (Structured): DuckDuckGo (JSON)
308
- # ========================================
309
 
310
- def web_search( # <-- MCP tool #2
311
  input_query: str,
312
  max_results: int = 5,
313
  ) -> List[Dict[Literal["snippet", "title", "link"], str]]:
@@ -325,69 +321,21 @@ def web_search( # <-- MCP tool #2
325
  return results
326
 
327
 
328
- # ===================================================
329
- # DDG (Unstructured): compact plain-text, low tokens
330
- # ===================================================
331
 
332
- def web_search_unstructured( # <-- MCP tool #3
333
- input_query: str,
334
- max_results: int = 5,
335
- style: Literal["urls", "titles+urls", "titles+urls+snippets"] = "titles+urls",
336
- snippet_max_chars: int = 160,
337
- ) -> str:
338
  """
339
- (layman) A lightweight DDG search that returns a plain-text list.
340
- - Fewer tokens than JSON; great for quick scanning or piping into LLM prompts.
341
- - 'style' controls how much text we include per line.
342
  """
343
- if not input_query or not input_query.strip():
344
- return ""
345
-
346
- # (layman) Run the search using the lightweight DDG client
347
  with DDGS() as ddgs:
348
- results = list(ddgs.text(input_query, max_results=max_results))
349
-
350
- # (layman) Normalize fields because DDG library keys can vary by version
351
- lines: List[str] = []
352
- for r in results:
353
- title = (r.get("title") or "").strip()
354
- url = (r.get("href") or r.get("link") or r.get("url") or "").strip()
355
- snippet = (r.get("body") or r.get("snippet") or "").strip()
356
-
357
- # (layman) Truncate snippet to keep output tight
358
- if snippet_max_chars and len(snippet) > snippet_max_chars:
359
- snippet = snippet[:snippet_max_chars - 1].rstrip() + "…"
360
-
361
- # (layman) Build each line according to the chosen style
362
- if style == "urls":
363
- if url:
364
- lines.append(url)
365
- elif style == "titles+urls":
366
- if title and url:
367
- lines.append(f"{title} β€” {url}")
368
- elif url:
369
- lines.append(url)
370
- elif title:
371
- lines.append(title)
372
- else: # titles+urls+snippets
373
- if title and url and snippet:
374
- lines.append(f"{title} β€” {url}\n {snippet}")
375
- elif title and url:
376
- lines.append(f"{title} β€” {url}")
377
- elif url:
378
- # (layman) If only URL is available, still show it
379
- if snippet:
380
- lines.append(f"{url}\n {snippet}")
381
- else:
382
- lines.append(url)
383
- elif title:
384
- if snippet:
385
- lines.append(f"{title}\n {snippet}")
386
- else:
387
- lines.append(title)
388
-
389
- # (layman) Join lines with newlines to form a compact text block
390
- return "\n".join(lines).strip()
391
 
392
 
393
  # =====================
@@ -413,7 +361,7 @@ fetch_interface = gr.Interface(
413
  theme="Nymbo/Nymbo_Theme",
414
  )
415
 
416
- # --- Websearch tab (structured JSON) ---
417
  websearch_interface = gr.Interface(
418
  fn=web_search, # (layman) connect the function to the UI
419
  inputs=[
@@ -421,41 +369,30 @@ websearch_interface = gr.Interface(
421
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
422
  ],
423
  outputs=gr.JSON(label="Search results"),
424
- title="Websearch β€” DuckDuckGo (JSON)",
425
- description="Search the web using DuckDuckGo; returns snippet, title, and link as JSON.",
426
  allow_flagging="never",
427
  theme="Nymbo/Nymbo_Theme",
428
  )
429
 
430
- # --- DDG (Unstructured) tab (plain text, low tokens) ---
431
  unstructured_interface = gr.Interface(
432
- fn=web_search_unstructured, # (layman) connect the function to the UI
433
- inputs=[
434
- gr.Textbox(value="", label="Search query", placeholder="concise keywords"),
435
- gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
436
- gr.Dropdown(
437
- label="Output style",
438
- choices=["urls", "titles+urls", "titles+urls+snippets"],
439
- value="titles+urls",
440
- info="Plain text list; choose how much detail to include."
441
- ),
442
- gr.Slider(
443
- minimum=40, maximum=400, value=160, step=10,
444
- label="Snippet max chars",
445
- info="Truncate snippet length to keep token usage low."
446
- ),
447
- ],
448
- outputs=gr.Textbox(label="Results (plain text)", interactive=False),
449
- title="DDG β€” Unstructured (Compact)",
450
- description="Outputs a plain-text list (great for low-token prompts).",
451
  allow_flagging="never",
452
  theme="Nymbo/Nymbo_Theme",
 
453
  )
454
 
455
- # --- Combine all three into a single app with tabs ---
456
  demo = gr.TabbedInterface(
457
  interface_list=[fetch_interface, websearch_interface, unstructured_interface],
458
- tab_names=["Fetch", "Websearch", "DDG (Unstructured)"],
 
 
459
  )
460
 
461
  # Launch the UI and expose all functions as MCP tools in one server
 
1
  # File: main/app.py
2
+ # Purpose: One Space that offers three tools/tabs:
3
+ # 1) Fetch β€” extract relevant page content (title, metadata, clean text, hyperlinks)
4
+ # 2) Websearch β€” structured DuckDuckGo search via LangChain tool (JSON)
5
+ # 3) Unstructured DDG β€” raw DuckDuckGo list[dict] rendered into a Textbox (matches your app)
6
  #
7
+ # Launched with mcp_server=True so all functions are available as MCP tools.
 
 
 
8
 
9
  from __future__ import annotations
10
 
 
17
  from readability import Document # (layman) to isolate main readable content
18
  from urllib.parse import urljoin, urldefrag, urlparse # (layman) to fix/clean URLs
19
 
20
+ # Structured search via LangChain community tool
21
  from langchain_community.tools import DuckDuckGoSearchResults
22
+ # Unstructured search using the native DDG client (matches your separate space)
 
23
  from duckduckgo_search import DDGS
24
 
25
 
 
235
  return "\n\n".join(lines).strip()
236
 
237
 
238
+ def extract_relevant( # <-- MCP tool #1 (Fetch)
239
  url: str,
240
  verbosity: str = "Standard",
241
  include_metadata: bool = True,
 
299
  return md or "No content could be extracted."
300
 
301
 
302
+ # ==========================
303
+ # Websearch: DuckDuckGo tool
304
+ # ==========================
305
 
306
+ def web_search( # <-- MCP tool #2 (Structured DDG)
307
  input_query: str,
308
  max_results: int = 5,
309
  ) -> List[Dict[Literal["snippet", "title", "link"], str]]:
 
321
  return results
322
 
323
 
324
+ # ========================================
325
+ # Unstructured DDG: raw list into Textbox
326
+ # ========================================
327
 
328
+ def ddg_unstructured( # <-- MCP tool #3 (Unstructured DDG)
329
+ query: str,
330
+ ) -> list[dict]:
 
 
 
331
  """
332
+ (layman) Native DDG client. Returns a plain list[dict] β€” exactly like your separate space.
 
 
333
  """
334
+ if not query or not query.strip():
335
+ return []
 
 
336
  with DDGS() as ddgs:
337
+ results = ddgs.text(query, max_results=5)
338
+ return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
 
340
 
341
  # =====================
 
361
  theme="Nymbo/Nymbo_Theme",
362
  )
363
 
364
+ # --- Websearch tab (structured DDG via LangChain) ---
365
  websearch_interface = gr.Interface(
366
  fn=web_search, # (layman) connect the function to the UI
367
  inputs=[
 
369
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
370
  ],
371
  outputs=gr.JSON(label="Search results"),
372
+ title="Websearch β€” DuckDuckGo (Structured)",
373
+ description="Search the web using DuckDuckGo; returns snippet, title, and link.",
374
  allow_flagging="never",
375
  theme="Nymbo/Nymbo_Theme",
376
  )
377
 
378
+ # --- Unstructured DDG tab (matches your separate app’s output) ---
379
  unstructured_interface = gr.Interface(
380
+ fn=ddg_unstructured, # (layman) raw DDG function
381
+ inputs=gr.Textbox(label="Enter Search Query"), # (layman) same single input
382
+ outputs=gr.Textbox(label="Results", interactive=False), # (layman) Textbox showing str(list[dict])
383
+ title="Unstructured DDG (Raw List)", # (layman) clear label
384
+ description="Returns the raw list of results (list[dict]) shown as text.", # (layman) behavior note
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  allow_flagging="never",
386
  theme="Nymbo/Nymbo_Theme",
387
+ submit_btn="Search", # (layman) match your original button label
388
  )
389
 
390
+ # --- Combine all into a single app with tabs ---
391
  demo = gr.TabbedInterface(
392
  interface_list=[fetch_interface, websearch_interface, unstructured_interface],
393
+ tab_names=["Fetch", "Websearch", "Unstructured DDG"],
394
+ title="Web MCP β€” Fetch + Websearch + Unstructured DDG",
395
+ theme="Nymbo/Nymbo_Theme",
396
  )
397
 
398
  # Launch the UI and expose all functions as MCP tools in one server