Spaces:

Nymbo
/

Web

Running

App Files Files Community

Nymbo commited on 5 days ago

Commit

b923a7c

verified ·

1 Parent(s): a655b89

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -20

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 # File: main/app.py
-# Purpose: One Space that offers two tools:
-#          1) Fetch: extract relevant page content (title, metadata, clean text, hyperlinks)
-#          2) Websearch: DuckDuckGo web search
 #
 # Notes:
-# - Launched with mcp_server=True so both functions are available as MCP tools.
-# - UI uses TabbedInterface so you can use each tool from its own tab.
-# - Inline comments explain each section in plain language.
 from __future__ import annotations
@@ -19,9 +20,12 @@ from bs4 import BeautifulSoup                     # (layman) for parsing HTML
 from readability import Document                  # (layman) to isolate main readable content
 from urllib.parse import urljoin, urldefrag, urlparse  # (layman) to fix/clean URLs
-# DuckDuckGo via LangChain community tool
 from langchain_community.tools import DuckDuckGoSearchResults
 # ==============================
 # Fetch: HTTP + extraction utils
@@ -299,9 +303,9 @@ def extract_relevant(  # <-- MCP tool #1
     return md or "No content could be extracted."
-# ==========================
-# Websearch: DuckDuckGo tool
-# ==========================
 def web_search(  # <-- MCP tool #2
     input_query: str,
@@ -321,8 +325,73 @@ def web_search(  # <-- MCP tool #2
     return results
 # =====================
-# UI: two-tab interface
 # =====================
 # --- Fetch tab (compact controllable extraction) ---
@@ -344,7 +413,7 @@ fetch_interface = gr.Interface(
     theme="Nymbo/Nymbo_Theme",
 )
-# --- Websearch tab (DuckDuckGo) ---
 websearch_interface = gr.Interface(
     fn=web_search,  # (layman) connect the function to the UI
     inputs=[
@@ -352,20 +421,43 @@ websearch_interface = gr.Interface(
         gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
     ],
     outputs=gr.JSON(label="Search results"),
-    title="Websearch — DuckDuckGo",
-    description="Search the web using DuckDuckGo; returns snippet, title, and link.",
     allow_flagging="never",
     theme="Nymbo/Nymbo_Theme",
 )
-# --- Combine both into a single app with tabs ---
-demo = gr.TabbedInterface(
-    interface_list=[fetch_interface, websearch_interface],
-    tab_names=["Fetch", "Websearch"],
-    title="Web MCP — Fetch + Websearch",
     theme="Nymbo/Nymbo_Theme",
 )
-# Launch the UI and expose both functions as MCP tools in one server
 if __name__ == "__main__":
     demo.launch(mcp_server=True)

 # File: main/app.py
+# Purpose: One Space that offers three tools in one MCP server:
+#          1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
+#          2) Websearch — structured DuckDuckGo results (JSON via LangChain wrapper)
+#          3) DDG (Unstructured) — compact plain-text DuckDuckGo results for low token usage
 #
 # Notes:
+# - Launched with mcp_server=True so all functions are exposed as MCP tools.
+# - UI uses TabbedInterface: each tool has its own tab.
+# - Inline comments describe each section in plain language.
 from __future__ import annotations
 from readability import Document                  # (layman) to isolate main readable content
 from urllib.parse import urljoin, urldefrag, urlparse  # (layman) to fix/clean URLs
+# Structured DDG search (LangChain wrapper)
 from langchain_community.tools import DuckDuckGoSearchResults
+# Unstructured DDG search (lightweight direct client)
+from duckduckgo_search import DDGS
 # ==============================
 # Fetch: HTTP + extraction utils
     return md or "No content could be extracted."
+# ========================================
+# Websearch (Structured): DuckDuckGo (JSON)
+# ========================================
 def web_search(  # <-- MCP tool #2
     input_query: str,
     return results
+# ===================================================
+# DDG (Unstructured): compact plain-text, low tokens
+# ===================================================
+def web_search_unstructured(  # <-- MCP tool #3
+    input_query: str,
+    max_results: int = 5,
+    style: Literal["urls", "titles+urls", "titles+urls+snippets"] = "titles+urls",
+    snippet_max_chars: int = 160,
+) -> str:
+    """
+    (layman) A lightweight DDG search that returns a plain-text list.
+    - Fewer tokens than JSON; great for quick scanning or piping into LLM prompts.
+    - 'style' controls how much text we include per line.
+    """
+    if not input_query or not input_query.strip():
+        return ""
+    # (layman) Run the search using the lightweight DDG client
+    with DDGS() as ddgs:
+        results = list(ddgs.text(input_query, max_results=max_results))
+    # (layman) Normalize fields because DDG library keys can vary by version
+    lines: List[str] = []
+    for r in results:
+        title = (r.get("title") or "").strip()
+        url = (r.get("href") or r.get("link") or r.get("url") or "").strip()
+        snippet = (r.get("body") or r.get("snippet") or "").strip()
+        # (layman) Truncate snippet to keep output tight
+        if snippet_max_chars and len(snippet) > snippet_max_chars:
+            snippet = snippet[:snippet_max_chars - 1].rstrip() + "…"
+        # (layman) Build each line according to the chosen style
+        if style == "urls":
+            if url:
+                lines.append(url)
+        elif style == "titles+urls":
+            if title and url:
+                lines.append(f"{title} — {url}")
+            elif url:
+                lines.append(url)
+            elif title:
+                lines.append(title)
+        else:  # titles+urls+snippets
+            if title and url and snippet:
+                lines.append(f"{title} — {url}\n  {snippet}")
+            elif title and url:
+                lines.append(f"{title} — {url}")
+            elif url:
+                # (layman) If only URL is available, still show it
+                if snippet:
+                    lines.append(f"{url}\n  {snippet}")
+                else:
+                    lines.append(url)
+            elif title:
+                if snippet:
+                    lines.append(f"{title}\n  {snippet}")
+                else:
+                    lines.append(title)
+    # (layman) Join lines with newlines to form a compact text block
+    return "\n".join(lines).strip()
 # =====================
+# UI: three-tab interface
 # =====================
 # --- Fetch tab (compact controllable extraction) ---
     theme="Nymbo/Nymbo_Theme",
 )
+# --- Websearch tab (structured JSON) ---
 websearch_interface = gr.Interface(
     fn=web_search,  # (layman) connect the function to the UI
     inputs=[
         gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
     ],
     outputs=gr.JSON(label="Search results"),
+    title="Websearch — DuckDuckGo (JSON)",
+    description="Search the web using DuckDuckGo; returns snippet, title, and link as JSON.",
     allow_flagging="never",
     theme="Nymbo/Nymbo_Theme",
 )
+# --- DDG (Unstructured) tab (plain text, low tokens) ---
+unstructured_interface = gr.Interface(
+    fn=web_search_unstructured,  # (layman) connect the function to the UI
+    inputs=[
+        gr.Textbox(value="", label="Search query", placeholder="concise keywords"),
+        gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
+        gr.Dropdown(
+            label="Output style",
+            choices=["urls", "titles+urls", "titles+urls+snippets"],
+            value="titles+urls",
+            info="Plain text list; choose how much detail to include."
+        ),
+        gr.Slider(
+            minimum=40, maximum=400, value=160, step=10,
+            label="Snippet max chars",
+            info="Truncate snippet length to keep token usage low."
+        ),
+    ],
+    outputs=gr.Textbox(label="Results (plain text)", interactive=False),
+    title="DDG — Unstructured (Compact)",
+    description="Outputs a plain-text list (great for low-token prompts).",
+    allow_flagging="never",
     theme="Nymbo/Nymbo_Theme",
 )
+# --- Combine all three into a single app with tabs ---
+demo = gr.TabbedInterface(
+    interface_list=[fetch_interface, websearch_interface, unstructured_interface],
+    tab_names=["Fetch", "Websearch", "DDG (Unstructured)"],
+)
+# Launch the UI and expose all functions as MCP tools in one server
 if __name__ == "__main__":
     demo.launch(mcp_server=True)