No Brave
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ from typing import List, Dict, Any, Annotated
|
|
10 |
from langgraph.graph import Graph, StateGraph
|
11 |
from typing_extensions import TypedDict
|
12 |
from openai import OpenAI
|
13 |
-
from tools import
|
14 |
|
15 |
# -------------------------
|
16 |
# Utility helpers
|
@@ -29,7 +29,6 @@ def merge_dicts(old: Dict, new: Dict) -> Dict:
|
|
29 |
|
30 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
31 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
32 |
-
BRAVE_KEY = os.getenv("BRAVE_API_KEY") or "" # set in HF Space secrets
|
33 |
|
34 |
# Remove logs directory creation since we're not storing logs anymore
|
35 |
|
@@ -97,10 +96,10 @@ class BasicAgent:
|
|
97 |
return state
|
98 |
|
99 |
def _perform_search(self, state: AgentState) -> AgentState:
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
state["history"].append({"step": "search", "results": results})
|
105 |
state["logs"]["search"] = {"query": state["search_query"], "results": results}
|
106 |
state["current_step"] = "answer"
|
|
|
10 |
from langgraph.graph import Graph, StateGraph
|
11 |
from typing_extensions import TypedDict
|
12 |
from openai import OpenAI
|
13 |
+
from tools import simple_search
|
14 |
|
15 |
# -------------------------
|
16 |
# Utility helpers
|
|
|
29 |
|
30 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
31 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
|
32 |
|
33 |
# Remove logs directory creation since we're not storing logs anymore
|
34 |
|
|
|
96 |
return state
|
97 |
|
98 |
def _perform_search(self, state: AgentState) -> AgentState:
|
99 |
+
results = simple_search(state["search_query"], max_results=5)
|
100 |
+
print("\nSearch Results:")
|
101 |
+
for i, s in enumerate(results, 1):
|
102 |
+
print(f"[{i}] {s[:120]}…")
|
103 |
state["history"].append({"step": "search", "results": results})
|
104 |
state["logs"]["search"] = {"query": state["search_query"], "results": results}
|
105 |
state["current_step"] = "answer"
|
tools.py
CHANGED
@@ -1,179 +1,28 @@
|
|
1 |
from typing import List
|
2 |
-
from duckduckgo_search import DDGS
|
3 |
-
import requests
|
4 |
-
import os
|
5 |
-
import json
|
6 |
-
import time
|
7 |
-
from urllib.parse import quote_plus
|
8 |
import re
|
9 |
|
10 |
-
#
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
print("\n=== Query Tightening ===")
|
15 |
-
print(f"Original query: {question}")
|
16 |
-
|
17 |
-
# Find quoted phrases
|
18 |
-
quoted = re.findall(r'"([^"]+)"', question)
|
19 |
-
print(f"Quoted phrases: {quoted}")
|
20 |
-
|
21 |
-
# Find capitalized terms
|
22 |
-
caps = re.findall(r'\b([A-Z0-9][\w-]{2,})', question)
|
23 |
-
print(f"Capitalized terms: {caps}")
|
24 |
-
|
25 |
-
# Combine and clean
|
26 |
-
short = " ".join(quoted + caps)
|
27 |
-
result = short or question
|
28 |
-
print(f"Tightened query: {result}")
|
29 |
-
|
30 |
-
return result
|
31 |
-
|
32 |
-
def simple_search(query: str, max_results: int = 3) -> List[str]:
|
33 |
-
"""Fallback to DuckDuckGo search."""
|
34 |
-
print("\n=== DuckDuckGo Search Debug ===")
|
35 |
-
print(f"Query: {query}")
|
36 |
-
print(f"Max Results: {max_results}")
|
37 |
-
|
38 |
-
try:
|
39 |
-
with DDGS() as ddgs:
|
40 |
-
raw = list(ddgs.text(query, max_results=max_results))
|
41 |
-
print(f"Retrieved {len(raw)} raw results")
|
42 |
-
|
43 |
-
out = []
|
44 |
-
for r in raw:
|
45 |
-
try:
|
46 |
-
result = f"{r.get('title','')} – {r.get('href') or r.get('link','')}"
|
47 |
-
out.append(result)
|
48 |
-
print(f"Processed result: {result[:100]}...")
|
49 |
-
except Exception as e:
|
50 |
-
print(f"Error processing result: {e}")
|
51 |
-
pass
|
52 |
-
|
53 |
-
print(f"\nFinal results count: {len(out)}")
|
54 |
-
if out:
|
55 |
-
print("\nFirst result preview:")
|
56 |
-
print(out[0][:200] + "..." if len(out[0]) > 200 else out[0])
|
57 |
-
|
58 |
-
return out
|
59 |
-
except Exception as e:
|
60 |
-
print(f"ERROR in DuckDuckGo search: {str(e)}")
|
61 |
-
raise
|
62 |
-
|
63 |
-
# --- Jina Search Tool ---
|
64 |
-
|
65 |
-
def jina_search_tool(query: str, api_key: str) -> List[str]:
|
66 |
-
"""
|
67 |
-
Perform a web search using Jina AI's s.jina.ai endpoint and retrieve clean, LLM-friendly content.
|
68 |
-
"""
|
69 |
-
api_endpoint = f"https://s.jina.ai/{query.replace(' ', '+')}"
|
70 |
-
headers = {
|
71 |
-
"Authorization": f"Bearer {api_key}",
|
72 |
-
"Accept": "application/json",
|
73 |
-
"User-Agent": "Mozilla/5.0"
|
74 |
-
}
|
75 |
-
|
76 |
-
try:
|
77 |
-
response = requests.get(api_endpoint, headers=headers, timeout=10)
|
78 |
-
if response.status_code == 200:
|
79 |
-
data = response.json()
|
80 |
-
contents = [item.get("content", "") for item in data.get("results", [])]
|
81 |
-
return contents
|
82 |
-
else:
|
83 |
-
print(f"Failed to fetch search results: Status code {response.status_code}")
|
84 |
-
return []
|
85 |
-
except Exception as e:
|
86 |
-
print(f"Error fetching search results: {e}")
|
87 |
-
return []
|
88 |
-
|
89 |
-
def brave_search(query: str, api_key: str, max_results: int = 5) -> List[str]:
|
90 |
-
"""
|
91 |
-
Query Brave Web Search API and return a list of 'title – url\\nbody' strings.
|
92 |
-
"""
|
93 |
-
print("\n=== Brave Search Debug ===")
|
94 |
-
print(f"Query: {query}")
|
95 |
-
print(f"Max Results: {max_results}")
|
96 |
-
|
97 |
-
if not api_key:
|
98 |
-
print("ERROR: BRAVE_API_KEY not set")
|
99 |
-
raise RuntimeError("BRAVE_API_KEY not set")
|
100 |
-
|
101 |
-
url = (
|
102 |
-
"https://api.search.brave.com/res/v1/web/search?"
|
103 |
-
f"q={quote_plus(query)}&count={max_results}"
|
104 |
-
)
|
105 |
-
print(f"URL: {url}")
|
106 |
-
|
107 |
-
hdrs = {
|
108 |
-
"X-Subscription-Token": api_key,
|
109 |
-
"Accept": "application/json",
|
110 |
-
"User-Agent": "Mozilla/5.0",
|
111 |
-
}
|
112 |
-
print("Headers:", {k: v[:10] + "..." if k == "X-Subscription-Token" else v for k, v in hdrs.items()})
|
113 |
-
|
114 |
-
try:
|
115 |
-
print("\nSending request to Brave API...")
|
116 |
-
r = requests.get(url, headers=hdrs, timeout=12)
|
117 |
-
print(f"Response Status: {r.status_code}")
|
118 |
-
|
119 |
-
if r.status_code == 429:
|
120 |
-
print("Rate limit hit, waiting 2 seconds and retrying...")
|
121 |
-
time.sleep(2)
|
122 |
-
r = requests.get(url, headers=hdrs, timeout=12)
|
123 |
-
print(f"Retry Response Status: {r.status_code}")
|
124 |
-
|
125 |
-
r.raise_for_status()
|
126 |
-
data = r.json().get("web", {}).get("results", [])
|
127 |
-
|
128 |
-
print("\nResponse Data Structure:")
|
129 |
-
print(f"Number of results: {len(data)}")
|
130 |
-
|
131 |
-
results = [
|
132 |
-
f"{d['title']} – {d['url']}\n{d['body']}"
|
133 |
-
for d in data
|
134 |
-
][:max_results]
|
135 |
-
|
136 |
-
print(f"\nRetrieved {len(results)} results")
|
137 |
-
if results:
|
138 |
-
print("\nFirst result preview:")
|
139 |
-
print(results[0][:200] + "..." if len(results[0]) > 200 else results[0])
|
140 |
-
|
141 |
-
if not results:
|
142 |
-
print("WARNING: No results found in the response")
|
143 |
-
print("Full response data:", json.dumps(r.json(), indent=2)[:1000])
|
144 |
-
|
145 |
-
return results
|
146 |
-
|
147 |
-
except requests.exceptions.Timeout:
|
148 |
-
print("ERROR: Request timed out after 12 seconds")
|
149 |
-
raise
|
150 |
-
except requests.exceptions.RequestException as e:
|
151 |
-
print(f"ERROR: Network/Request error: {str(e)}")
|
152 |
-
raise
|
153 |
-
except json.JSONDecodeError as e:
|
154 |
-
print(f"ERROR: Failed to parse JSON response: {str(e)}")
|
155 |
-
print(f"Raw response: {r.text[:500]}")
|
156 |
-
raise
|
157 |
-
except Exception as e:
|
158 |
-
print(f"ERROR: Unexpected error: {str(e)}")
|
159 |
-
raise
|
160 |
-
|
161 |
-
def smart_search(query: str, api_key: str, max_results: int = 5) -> List[str]:
|
162 |
"""
|
163 |
-
|
164 |
"""
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
print(f"Brave search failed: {e}")
|
178 |
-
print("Falling back to DuckDuckGo...")
|
179 |
-
return simple_search(query, max_results)
|
|
|
1 |
from typing import List
|
2 |
+
from duckduckgo_search import DDGS # pip install -U duckduckgo-search
|
|
|
|
|
|
|
|
|
|
|
3 |
import re
|
4 |
|
5 |
+
# -------- helper to shorten very long GAIA questions (optional but helpful)
|
6 |
+
def tighten(q: str) -> str:
|
7 |
+
quoted = re.findall(r'"([^"]+)"', q)
|
8 |
+
caps = re.findall(r'\b([A-Z0-9][\w-]{2,})', q)
|
9 |
+
short = " ".join(quoted + caps)
|
10 |
+
return short or q
|
11 |
|
12 |
+
# -------- the only search function your agent will call
|
13 |
+
def simple_search(query: str, max_results: int = 5) -> List[str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
"""
|
15 |
+
Perform a DuckDuckGo search and return 'title – url' snippets.
|
16 |
"""
|
17 |
+
query = tighten(query) # optional heuristic cleaner
|
18 |
+
with DDGS() as ddgs: # context-manager is the recommended way 🐤
|
19 |
+
raw = list(ddgs.text(query, max_results=max_results)) # DDGS.text() returns list of dicts
|
20 |
+
out = []
|
21 |
+
for r in raw:
|
22 |
+
try:
|
23 |
+
title = r.get("title", "")
|
24 |
+
link = r.get("href") or r.get("link", "")
|
25 |
+
out.append(f"{title} – {link}")
|
26 |
+
except Exception:
|
27 |
+
pass
|
28 |
+
return out
|
|
|
|
|
|