Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
# File: main/app.py
|
2 |
# Purpose: One Space that offers five tools/tabs:
|
3 |
# 1) Fetch β extract relevant page content (title, metadata, clean text, hyperlinks)
|
4 |
-
# 2)
|
5 |
-
# 3)
|
6 |
-
# 4) DDG
|
7 |
# 5) Generate Sitemap β LIMITED: grouped internal/external links with an optional per-domain cap (and a .md download)
|
8 |
|
9 |
from __future__ import annotations
|
@@ -322,7 +322,7 @@ def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
|
|
322 |
# Websearch: DuckDuckGo tool
|
323 |
# ==========================
|
324 |
|
325 |
-
def Search_Structured( # <-- MCP tool #
|
326 |
input_query: str,
|
327 |
max_results: int = 5,
|
328 |
) -> List[Dict[Literal["snippet", "title", "link"], str]]:
|
@@ -345,7 +345,7 @@ def Search_Structured( # <-- MCP tool #2 (Structured DDG)
|
|
345 |
# Unstructured DDG: raw list into Textbox
|
346 |
# ========================================
|
347 |
|
348 |
-
def Search_Raw( # <-- MCP tool #
|
349 |
query: str,
|
350 |
) -> list[dict]:
|
351 |
"""
|
@@ -364,7 +364,7 @@ def Search_Raw( # <-- MCP tool #3 (Unstructured DDG)
|
|
364 |
# Concise DDG: ultra-succinct JSONL for tokens
|
365 |
# ============================================
|
366 |
|
367 |
-
def Search_Concise( # <-- MCP tool #
|
368 |
query: str,
|
369 |
max_results: int = 5,
|
370 |
include_snippets: bool = False,
|
@@ -568,6 +568,30 @@ fetch_interface = gr.Interface(
|
|
568 |
theme="Nymbo/Nymbo_Theme",
|
569 |
)
|
570 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
571 |
# --- Websearch tab (structured DDG via LangChain) ---
|
572 |
websearch_interface = gr.Interface(
|
573 |
fn=Search_Structured, # connect the function to the UI
|
@@ -602,30 +626,6 @@ unstructured_interface = gr.Interface(
|
|
602 |
submit_btn="Search",
|
603 |
)
|
604 |
|
605 |
-
# --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
|
606 |
-
concise_interface = gr.Interface(
|
607 |
-
fn=Search_Concise,
|
608 |
-
inputs=[
|
609 |
-
gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
|
610 |
-
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
611 |
-
gr.Checkbox(value=False, label="Include snippets (adds tokens)"),
|
612 |
-
gr.Slider(minimum=20, maximum=200, value=80, step=5, label="Max snippet chars"),
|
613 |
-
gr.Checkbox(value=True, label="Dedupe by domain"),
|
614 |
-
gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
|
615 |
-
],
|
616 |
-
outputs=gr.Textbox(label="Results (JSONL)", interactive=False),
|
617 |
-
title="DuckDuckGo Search (Concise)",
|
618 |
-
description="Emits JSONL with short keys (t,u[,s]). Defaults avoid snippets and duplicate domains.",
|
619 |
-
api_description=(
|
620 |
-
"Run a DuckDuckGo search and return newline-delimited JSON with short keys: "
|
621 |
-
"t=title, u=url, optional s=snippet. Options control result count, "
|
622 |
-
"snippet inclusion and length, domain deduping, and title length."
|
623 |
-
),
|
624 |
-
allow_flagging="never",
|
625 |
-
theme="Nymbo/Nymbo_Theme",
|
626 |
-
submit_btn="Search",
|
627 |
-
)
|
628 |
-
|
629 |
# --- Generate Sitemap tab (LIMITED, grouped + optional per-domain cap) ---
|
630 |
sitemap_interface = gr.Interface(
|
631 |
fn=Generate_Sitemap,
|
@@ -656,12 +656,12 @@ sitemap_interface = gr.Interface(
|
|
656 |
|
657 |
# --- Combine all into a single app with tabs ---
|
658 |
demo = gr.TabbedInterface(
|
659 |
-
interface_list=[fetch_interface, websearch_interface, unstructured_interface,
|
660 |
tab_names=[
|
661 |
"Fetch Webpage",
|
|
|
662 |
"DuckDuckGo Search (Structured)",
|
663 |
"DuckDuckGo Search (Raw)",
|
664 |
-
"DuckDuckGo Search (Concise)",
|
665 |
"Generate Sitemap",
|
666 |
],
|
667 |
title="Web MCP β Fetch, Search, and Sitemaps with customizable output modes.",
|
|
|
1 |
# File: main/app.py
|
2 |
# Purpose: One Space that offers five tools/tabs:
|
3 |
# 1) Fetch β extract relevant page content (title, metadata, clean text, hyperlinks)
|
4 |
+
# 2) DDG (Concise) β ultra-succinct DuckDuckGo search that emits JSONL with short keys to minimize tokens
|
5 |
+
# 3) Websearch β structured DuckDuckGo search via LangChain tool (JSON)
|
6 |
+
# 4) Unstructured DDG β raw DuckDuckGo list[dict] rendered into a Textbox
|
7 |
# 5) Generate Sitemap β LIMITED: grouped internal/external links with an optional per-domain cap (and a .md download)
|
8 |
|
9 |
from __future__ import annotations
|
|
|
322 |
# Websearch: DuckDuckGo tool
|
323 |
# ==========================
|
324 |
|
325 |
+
def Search_Structured( # <-- MCP tool #3 (Structured DDG)
|
326 |
input_query: str,
|
327 |
max_results: int = 5,
|
328 |
) -> List[Dict[Literal["snippet", "title", "link"], str]]:
|
|
|
345 |
# Unstructured DDG: raw list into Textbox
|
346 |
# ========================================
|
347 |
|
348 |
+
def Search_Raw( # <-- MCP tool #4 (Unstructured DDG)
|
349 |
query: str,
|
350 |
) -> list[dict]:
|
351 |
"""
|
|
|
364 |
# Concise DDG: ultra-succinct JSONL for tokens
|
365 |
# ============================================
|
366 |
|
367 |
+
def Search_Concise( # <-- MCP tool #2 (Concise DDG)
|
368 |
query: str,
|
369 |
max_results: int = 5,
|
370 |
include_snippets: bool = False,
|
|
|
568 |
theme="Nymbo/Nymbo_Theme",
|
569 |
)
|
570 |
|
571 |
+
# --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
|
572 |
+
concise_interface = gr.Interface(
|
573 |
+
fn=Search_Concise,
|
574 |
+
inputs=[
|
575 |
+
gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
|
576 |
+
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
577 |
+
gr.Checkbox(value=False, label="Include snippets (adds tokens)"),
|
578 |
+
gr.Slider(minimum=20, maximum=200, value=80, step=5, label="Max snippet chars"),
|
579 |
+
gr.Checkbox(value=True, label="Dedupe by domain"),
|
580 |
+
gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
|
581 |
+
],
|
582 |
+
outputs=gr.Textbox(label="Results (JSONL)", interactive=False),
|
583 |
+
title="DuckDuckGo Search (Concise)",
|
584 |
+
description="Emits JSONL with short keys (t,u[,s]). Defaults avoid snippets and duplicate domains.",
|
585 |
+
api_description=(
|
586 |
+
"Run a DuckDuckGo search and return newline-delimited JSON with short keys: "
|
587 |
+
"t=title, u=url, optional s=snippet. Options control result count, "
|
588 |
+
"snippet inclusion and length, domain deduping, and title length."
|
589 |
+
),
|
590 |
+
allow_flagging="never",
|
591 |
+
theme="Nymbo/Nymbo_Theme",
|
592 |
+
submit_btn="Search",
|
593 |
+
)
|
594 |
+
|
595 |
# --- Websearch tab (structured DDG via LangChain) ---
|
596 |
websearch_interface = gr.Interface(
|
597 |
fn=Search_Structured, # connect the function to the UI
|
|
|
626 |
submit_btn="Search",
|
627 |
)
|
628 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
629 |
# --- Generate Sitemap tab (LIMITED, grouped + optional per-domain cap) ---
|
630 |
sitemap_interface = gr.Interface(
|
631 |
fn=Generate_Sitemap,
|
|
|
656 |
|
657 |
# --- Combine all into a single app with tabs ---
|
658 |
demo = gr.TabbedInterface(
|
659 |
+
interface_list=[fetch_interface, concise_interface, websearch_interface, unstructured_interface, sitemap_interface],
|
660 |
tab_names=[
|
661 |
"Fetch Webpage",
|
662 |
+
"DuckDuckGo Search (Concise)",
|
663 |
"DuckDuckGo Search (Structured)",
|
664 |
"DuckDuckGo Search (Raw)",
|
|
|
665 |
"Generate Sitemap",
|
666 |
],
|
667 |
title="Web MCP β Fetch, Search, and Sitemaps with customizable output modes.",
|