Final_Assignment_Template

Running

App Files Files Community

naman1102 commited on 14 days ago

Commit

622f2bb

1 Parent(s): aeb4eba

No Brave

Browse files

Files changed (2) hide show

app.py +5 -6
tools.py +22 -173

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ from typing import List, Dict, Any, Annotated
 from langgraph.graph import Graph, StateGraph
 from typing_extensions import TypedDict
 from openai import OpenAI
-from tools import smart_search
 # -------------------------
 # Utility helpers
@@ -29,7 +29,6 @@ def merge_dicts(old: Dict, new: Dict) -> Dict:
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-BRAVE_KEY = os.getenv("BRAVE_API_KEY") or ""   # set in HF Space secrets
 # Remove logs directory creation since we're not storing logs anymore
@@ -97,10 +96,10 @@ class BasicAgent:
         return state
     def _perform_search(self, state: AgentState) -> AgentState:
-        try:
-            results = smart_search(state["search_query"], BRAVE_KEY, max_results=5)
-        except Exception as e:
-            results = [f"SEARCH_ERROR: {e}"]
         state["history"].append({"step": "search", "results": results})
         state["logs"]["search"] = {"query": state["search_query"], "results": results}
         state["current_step"] = "answer"

 from langgraph.graph import Graph, StateGraph
 from typing_extensions import TypedDict
 from openai import OpenAI
+from tools import simple_search
 # -------------------------
 # Utility helpers
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 # Remove logs directory creation since we're not storing logs anymore
         return state
     def _perform_search(self, state: AgentState) -> AgentState:
+        results = simple_search(state["search_query"], max_results=5)
+        print("\nSearch Results:")
+        for i, s in enumerate(results, 1):
+            print(f"[{i}] {s[:120]}…")
         state["history"].append({"step": "search", "results": results})
         state["logs"]["search"] = {"query": state["search_query"], "results": results}
         state["current_step"] = "answer"

tools.py CHANGED Viewed

@@ -1,179 +1,28 @@
 from typing import List
-from duckduckgo_search import DDGS
-import requests
-import os
-import json
-import time
-from urllib.parse import quote_plus
 import re
-# --- Simple Search Tool ---
-def tighten(question: str) -> str:
-    """Extract key terms from verbose questions to improve search results."""
-    print("\n=== Query Tightening ===")
-    print(f"Original query: {question}")
-    # Find quoted phrases
-    quoted = re.findall(r'"([^"]+)"', question)
-    print(f"Quoted phrases: {quoted}")
-    # Find capitalized terms
-    caps = re.findall(r'\b([A-Z0-9][\w-]{2,})', question)
-    print(f"Capitalized terms: {caps}")
-    # Combine and clean
-    short = " ".join(quoted + caps)
-    result = short or question
-    print(f"Tightened query: {result}")
-    return result
-def simple_search(query: str, max_results: int = 3) -> List[str]:
-    """Fallback to DuckDuckGo search."""
-    print("\n=== DuckDuckGo Search Debug ===")
-    print(f"Query: {query}")
-    print(f"Max Results: {max_results}")
-    try:
-        with DDGS() as ddgs:
-            raw = list(ddgs.text(query, max_results=max_results))
-        print(f"Retrieved {len(raw)} raw results")
-        out = []
-        for r in raw:
-            try:
-                result = f"{r.get('title','')} – {r.get('href') or r.get('link','')}"
-                out.append(result)
-                print(f"Processed result: {result[:100]}...")
-            except Exception as e:
-                print(f"Error processing result: {e}")
-                pass
-        print(f"\nFinal results count: {len(out)}")
-        if out:
-            print("\nFirst result preview:")
-            print(out[0][:200] + "..." if len(out[0]) > 200 else out[0])
-        return out
-    except Exception as e:
-        print(f"ERROR in DuckDuckGo search: {str(e)}")
-        raise
-# --- Jina Search Tool ---
-def jina_search_tool(query: str, api_key: str) -> List[str]:
-    """
-    Perform a web search using Jina AI's s.jina.ai endpoint and retrieve clean, LLM-friendly content.
-    """
-    api_endpoint = f"https://s.jina.ai/{query.replace(' ', '+')}"
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Accept": "application/json",
-        "User-Agent": "Mozilla/5.0"
-    }
-    try:
-        response = requests.get(api_endpoint, headers=headers, timeout=10)
-        if response.status_code == 200:
-            data = response.json()
-            contents = [item.get("content", "") for item in data.get("results", [])]
-            return contents
-        else:
-            print(f"Failed to fetch search results: Status code {response.status_code}")
-            return []
-    except Exception as e:
-        print(f"Error fetching search results: {e}")
-        return []
-def brave_search(query: str, api_key: str, max_results: int = 5) -> List[str]:
-    """
-    Query Brave Web Search API and return a list of 'title – url\\nbody' strings.
-    """
-    print("\n=== Brave Search Debug ===")
-    print(f"Query: {query}")
-    print(f"Max Results: {max_results}")
-    if not api_key:
-        print("ERROR: BRAVE_API_KEY not set")
-        raise RuntimeError("BRAVE_API_KEY not set")
-    url = (
-        "https://api.search.brave.com/res/v1/web/search?"
-        f"q={quote_plus(query)}&count={max_results}"
-    )
-    print(f"URL: {url}")
-    hdrs = {
-        "X-Subscription-Token": api_key,
-        "Accept": "application/json",
-        "User-Agent": "Mozilla/5.0",
-    }
-    print("Headers:", {k: v[:10] + "..." if k == "X-Subscription-Token" else v for k, v in hdrs.items()})
-    try:
-        print("\nSending request to Brave API...")
-        r = requests.get(url, headers=hdrs, timeout=12)
-        print(f"Response Status: {r.status_code}")
-        if r.status_code == 429:
-            print("Rate limit hit, waiting 2 seconds and retrying...")
-            time.sleep(2)
-            r = requests.get(url, headers=hdrs, timeout=12)
-            print(f"Retry Response Status: {r.status_code}")
-        r.raise_for_status()
-        data = r.json().get("web", {}).get("results", [])
-        print("\nResponse Data Structure:")
-        print(f"Number of results: {len(data)}")
-        results = [
-            f"{d['title']} – {d['url']}\n{d['body']}"
-            for d in data
-        ][:max_results]
-        print(f"\nRetrieved {len(results)} results")
-        if results:
-            print("\nFirst result preview:")
-            print(results[0][:200] + "..." if len(results[0]) > 200 else results[0])
-        if not results:
-            print("WARNING: No results found in the response")
-            print("Full response data:", json.dumps(r.json(), indent=2)[:1000])
-        return results
-    except requests.exceptions.Timeout:
-        print("ERROR: Request timed out after 12 seconds")
-        raise
-    except requests.exceptions.RequestException as e:
-        print(f"ERROR: Network/Request error: {str(e)}")
-        raise
-    except json.JSONDecodeError as e:
-        print(f"ERROR: Failed to parse JSON response: {str(e)}")
-        print(f"Raw response: {r.text[:500]}")
-        raise
-    except Exception as e:
-        print(f"ERROR: Unexpected error: {str(e)}")
-        raise
-def smart_search(query: str, api_key: str, max_results: int = 5) -> List[str]:
     """
-    1) Brave (if key + quota OK) → 2) DuckDuckGo.
     """
-    print("\n=== Smart Search Debug ===")
-    print(f"Original query: {query}")
-    # Tighten the query before searching
-    query = tighten(query)
-    print(f"Tightened query: {query}")
-    print(f"Max Results: {max_results}")
-    try:
-        print("Attempting Brave search first...")
-        return brave_search(query, api_key, max_results)
-    except Exception as e:
-        print(f"Brave search failed: {e}")
-        print("Falling back to DuckDuckGo...")
-        return simple_search(query, max_results)

 from typing import List
+from duckduckgo_search import DDGS   # pip install -U duckduckgo-search
 import re
+# -------- helper to shorten very long GAIA questions (optional but helpful)
+def tighten(q: str) -> str:
+    quoted = re.findall(r'"([^"]+)"', q)
+    caps   = re.findall(r'\b([A-Z0-9][\w-]{2,})', q)
+    short  = " ".join(quoted + caps)
+    return short or q
+# -------- the only search function your agent will call
+def simple_search(query: str, max_results: int = 5) -> List[str]:
     """
+    Perform a DuckDuckGo search and return 'title – url' snippets.
     """
+    query = tighten(query)           # optional heuristic cleaner
+    with DDGS() as ddgs:             # context-manager is the recommended way 🐤
+        raw = list(ddgs.text(query, max_results=max_results))  # DDGS.text() returns list of dicts
+    out = []
+    for r in raw:
+        try:
+            title = r.get("title", "")
+            link  = r.get("href") or r.get("link", "")
+            out.append(f"{title} – {link}")
+        except Exception:
+            pass
+    return out