brave
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ from typing import List, Dict, Any, Annotated
|
|
10 |
from langgraph.graph import Graph, StateGraph
|
11 |
from typing_extensions import TypedDict
|
12 |
from openai import OpenAI
|
13 |
-
from
|
14 |
|
15 |
# -------------------------
|
16 |
# Utility helpers
|
@@ -29,82 +29,10 @@ def merge_dicts(old: Dict, new: Dict) -> Dict:
|
|
29 |
|
30 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
31 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
32 |
-
|
33 |
|
34 |
# Remove logs directory creation since we're not storing logs anymore
|
35 |
|
36 |
-
# -------------------------
|
37 |
-
# Jina AI search tool (replaces DDG + Reader)
|
38 |
-
# -------------------------
|
39 |
-
|
40 |
-
def jina_search_tool(query: str, api_key: str, max_results: int = 5) -> List[str]:
|
41 |
-
"""Return *max_results* clean markdown snippets for *query* using s.jina.ai."""
|
42 |
-
print("\n=== Jina Search Debug ===")
|
43 |
-
print(f"Query: {query}")
|
44 |
-
print(f"Max Results: {max_results}")
|
45 |
-
|
46 |
-
url = f"https://s.jina.ai/?q={quote_plus(query)}"
|
47 |
-
print(f"URL: {url}")
|
48 |
-
|
49 |
-
headers = {
|
50 |
-
"Authorization": f"Bearer {api_key}",
|
51 |
-
"Accept": "application/json",
|
52 |
-
"User-Agent": "Mozilla/5.0"
|
53 |
-
}
|
54 |
-
print("Headers:", {k: v[:10] + "..." if k == "Authorization" else v for k, v in headers.items()})
|
55 |
-
|
56 |
-
try:
|
57 |
-
print("\nSending request to Jina API...")
|
58 |
-
r = requests.get(url, headers=headers, timeout=15)
|
59 |
-
print(f"Response Status: {r.status_code}")
|
60 |
-
|
61 |
-
r.raise_for_status()
|
62 |
-
data = r.json()
|
63 |
-
|
64 |
-
print("\nResponse Data Structure:")
|
65 |
-
print(f"Keys in response: {list(data.keys())}")
|
66 |
-
print(f"Number of results: {len(data.get('results', []))}")
|
67 |
-
|
68 |
-
results = [
|
69 |
-
f"{item['title']} – {item['url']}\n{item['content']}"
|
70 |
-
for item in data.get("results", [])[:max_results]
|
71 |
-
]
|
72 |
-
|
73 |
-
print(f"\nRetrieved {len(results)} results")
|
74 |
-
if results:
|
75 |
-
print("\nFirst result preview:")
|
76 |
-
print(results[0][:200] + "..." if len(results[0]) > 200 else results[0])
|
77 |
-
|
78 |
-
if not results:
|
79 |
-
print("WARNING: No results found in the response")
|
80 |
-
print("Full response data:", json.dumps(data, indent=2)[:1000])
|
81 |
-
|
82 |
-
return results
|
83 |
-
|
84 |
-
except requests.exceptions.Timeout:
|
85 |
-
print("ERROR: Request timed out after 15 seconds")
|
86 |
-
raise
|
87 |
-
except requests.exceptions.RequestException as e:
|
88 |
-
print(f"ERROR: Network/Request error: {str(e)}")
|
89 |
-
raise
|
90 |
-
except json.JSONDecodeError as e:
|
91 |
-
print(f"ERROR: Failed to parse JSON response: {str(e)}")
|
92 |
-
print(f"Raw response: {r.text[:500]}")
|
93 |
-
raise
|
94 |
-
except Exception as e:
|
95 |
-
print(f"ERROR: Unexpected error: {str(e)}")
|
96 |
-
raise
|
97 |
-
|
98 |
-
# -------------------------
|
99 |
-
# Logging helper
|
100 |
-
# -------------------------
|
101 |
-
|
102 |
-
def log_to_file(task_id: str, question: str, log_data: Dict[str, Any]):
|
103 |
-
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
104 |
-
filename = os.path.join(LOGS_DIR, f"question_{task_id}_{ts}.json")
|
105 |
-
with open(filename, "w", encoding="utf-8") as f:
|
106 |
-
json.dump({"task_id": task_id, "question": question, "timestamp": ts, "logs": log_data}, f, indent=2)
|
107 |
-
|
108 |
# -------------------------
|
109 |
# State definition
|
110 |
# -------------------------
|
@@ -127,8 +55,6 @@ class BasicAgent:
|
|
127 |
def __init__(self):
|
128 |
if not OPENAI_API_KEY:
|
129 |
raise EnvironmentError("OPENAI_API_KEY not set")
|
130 |
-
if not JINA_API_KEY:
|
131 |
-
raise EnvironmentError("JINA_API_KEY not set")
|
132 |
self.llm = OpenAI(api_key=OPENAI_API_KEY)
|
133 |
self.workflow = self._build_workflow()
|
134 |
|
@@ -172,7 +98,7 @@ class BasicAgent:
|
|
172 |
|
173 |
def _perform_search(self, state: AgentState) -> AgentState:
|
174 |
try:
|
175 |
-
results =
|
176 |
except Exception as e:
|
177 |
results = [f"SEARCH_ERROR: {e}"]
|
178 |
state["history"].append({"step": "search", "results": results})
|
|
|
10 |
from langgraph.graph import Graph, StateGraph
|
11 |
from typing_extensions import TypedDict
|
12 |
from openai import OpenAI
|
13 |
+
from tools import smart_search
|
14 |
|
15 |
# -------------------------
|
16 |
# Utility helpers
|
|
|
29 |
|
30 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
31 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
32 |
+
BRAVE_KEY = os.getenv("BRAVE_API_KEY") or "" # set in HF Space secrets
|
33 |
|
34 |
# Remove logs directory creation since we're not storing logs anymore
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
# -------------------------
|
37 |
# State definition
|
38 |
# -------------------------
|
|
|
55 |
def __init__(self):
|
56 |
if not OPENAI_API_KEY:
|
57 |
raise EnvironmentError("OPENAI_API_KEY not set")
|
|
|
|
|
58 |
self.llm = OpenAI(api_key=OPENAI_API_KEY)
|
59 |
self.workflow = self._build_workflow()
|
60 |
|
|
|
98 |
|
99 |
def _perform_search(self, state: AgentState) -> AgentState:
|
100 |
try:
|
101 |
+
results = smart_search(state["search_query"], BRAVE_KEY, max_results=5)
|
102 |
except Exception as e:
|
103 |
results = [f"SEARCH_ERROR: {e}"]
|
104 |
state["history"].append({"step": "search", "results": results})
|
tools.py
CHANGED
@@ -2,27 +2,63 @@ from typing import List
|
|
2 |
from duckduckgo_search import DDGS
|
3 |
import requests
|
4 |
import os
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# --- Simple Search Tool ---
|
7 |
|
8 |
-
def
|
9 |
-
"""
|
10 |
-
|
11 |
-
""
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# --- Jina Search Tool ---
|
28 |
|
@@ -49,3 +85,95 @@ def jina_search_tool(query: str, api_key: str) -> List[str]:
|
|
49 |
except Exception as e:
|
50 |
print(f"Error fetching search results: {e}")
|
51 |
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from duckduckgo_search import DDGS
|
3 |
import requests
|
4 |
import os
|
5 |
+
import json
|
6 |
+
import time
|
7 |
+
from urllib.parse import quote_plus
|
8 |
+
import re
|
9 |
|
10 |
# --- Simple Search Tool ---
|
11 |
|
12 |
+
def tighten(question: str) -> str:
|
13 |
+
"""Extract key terms from verbose questions to improve search results."""
|
14 |
+
print("\n=== Query Tightening ===")
|
15 |
+
print(f"Original query: {question}")
|
16 |
+
|
17 |
+
# Find quoted phrases
|
18 |
+
quoted = re.findall(r'"([^"]+)"', question)
|
19 |
+
print(f"Quoted phrases: {quoted}")
|
20 |
+
|
21 |
+
# Find capitalized terms
|
22 |
+
caps = re.findall(r'\b([A-Z0-9][\w-]{2,})', question)
|
23 |
+
print(f"Capitalized terms: {caps}")
|
24 |
+
|
25 |
+
# Combine and clean
|
26 |
+
short = " ".join(quoted + caps)
|
27 |
+
result = short or question
|
28 |
+
print(f"Tightened query: {result}")
|
29 |
+
|
30 |
+
return result
|
31 |
|
32 |
+
def simple_search(query: str, max_results: int = 3) -> List[str]:
|
33 |
+
"""Fallback to DuckDuckGo search."""
|
34 |
+
print("\n=== DuckDuckGo Search Debug ===")
|
35 |
+
print(f"Query: {query}")
|
36 |
+
print(f"Max Results: {max_results}")
|
37 |
+
|
38 |
+
try:
|
39 |
+
with DDGS() as ddgs:
|
40 |
+
raw = list(ddgs.text(query, max_results=max_results))
|
41 |
+
print(f"Retrieved {len(raw)} raw results")
|
42 |
+
|
43 |
+
out = []
|
44 |
+
for r in raw:
|
45 |
+
try:
|
46 |
+
result = f"{r.get('title','')} – {r.get('href') or r.get('link','')}"
|
47 |
+
out.append(result)
|
48 |
+
print(f"Processed result: {result[:100]}...")
|
49 |
+
except Exception as e:
|
50 |
+
print(f"Error processing result: {e}")
|
51 |
+
pass
|
52 |
+
|
53 |
+
print(f"\nFinal results count: {len(out)}")
|
54 |
+
if out:
|
55 |
+
print("\nFirst result preview:")
|
56 |
+
print(out[0][:200] + "..." if len(out[0]) > 200 else out[0])
|
57 |
+
|
58 |
+
return out
|
59 |
+
except Exception as e:
|
60 |
+
print(f"ERROR in DuckDuckGo search: {str(e)}")
|
61 |
+
raise
|
62 |
|
63 |
# --- Jina Search Tool ---
|
64 |
|
|
|
85 |
except Exception as e:
|
86 |
print(f"Error fetching search results: {e}")
|
87 |
return []
|
88 |
+
|
89 |
+
def brave_search(query: str, api_key: str, max_results: int = 5) -> List[str]:
|
90 |
+
"""
|
91 |
+
Query Brave Web Search API and return a list of 'title – url\\nbody' strings.
|
92 |
+
"""
|
93 |
+
print("\n=== Brave Search Debug ===")
|
94 |
+
print(f"Query: {query}")
|
95 |
+
print(f"Max Results: {max_results}")
|
96 |
+
|
97 |
+
if not api_key:
|
98 |
+
print("ERROR: BRAVE_API_KEY not set")
|
99 |
+
raise RuntimeError("BRAVE_API_KEY not set")
|
100 |
+
|
101 |
+
url = (
|
102 |
+
"https://api.search.brave.com/res/v1/web/search?"
|
103 |
+
f"q={quote_plus(query)}&count={max_results}"
|
104 |
+
)
|
105 |
+
print(f"URL: {url}")
|
106 |
+
|
107 |
+
hdrs = {
|
108 |
+
"X-Subscription-Token": api_key,
|
109 |
+
"Accept": "application/json",
|
110 |
+
"User-Agent": "Mozilla/5.0",
|
111 |
+
}
|
112 |
+
print("Headers:", {k: v[:10] + "..." if k == "X-Subscription-Token" else v for k, v in hdrs.items()})
|
113 |
+
|
114 |
+
try:
|
115 |
+
print("\nSending request to Brave API...")
|
116 |
+
r = requests.get(url, headers=hdrs, timeout=12)
|
117 |
+
print(f"Response Status: {r.status_code}")
|
118 |
+
|
119 |
+
if r.status_code == 429:
|
120 |
+
print("Rate limit hit, waiting 2 seconds and retrying...")
|
121 |
+
time.sleep(2)
|
122 |
+
r = requests.get(url, headers=hdrs, timeout=12)
|
123 |
+
print(f"Retry Response Status: {r.status_code}")
|
124 |
+
|
125 |
+
r.raise_for_status()
|
126 |
+
data = r.json().get("web", {}).get("results", [])
|
127 |
+
|
128 |
+
print("\nResponse Data Structure:")
|
129 |
+
print(f"Number of results: {len(data)}")
|
130 |
+
|
131 |
+
results = [
|
132 |
+
f"{d['title']} – {d['url']}\n{d['body']}"
|
133 |
+
for d in data
|
134 |
+
][:max_results]
|
135 |
+
|
136 |
+
print(f"\nRetrieved {len(results)} results")
|
137 |
+
if results:
|
138 |
+
print("\nFirst result preview:")
|
139 |
+
print(results[0][:200] + "..." if len(results[0]) > 200 else results[0])
|
140 |
+
|
141 |
+
if not results:
|
142 |
+
print("WARNING: No results found in the response")
|
143 |
+
print("Full response data:", json.dumps(r.json(), indent=2)[:1000])
|
144 |
+
|
145 |
+
return results
|
146 |
+
|
147 |
+
except requests.exceptions.Timeout:
|
148 |
+
print("ERROR: Request timed out after 12 seconds")
|
149 |
+
raise
|
150 |
+
except requests.exceptions.RequestException as e:
|
151 |
+
print(f"ERROR: Network/Request error: {str(e)}")
|
152 |
+
raise
|
153 |
+
except json.JSONDecodeError as e:
|
154 |
+
print(f"ERROR: Failed to parse JSON response: {str(e)}")
|
155 |
+
print(f"Raw response: {r.text[:500]}")
|
156 |
+
raise
|
157 |
+
except Exception as e:
|
158 |
+
print(f"ERROR: Unexpected error: {str(e)}")
|
159 |
+
raise
|
160 |
+
|
161 |
+
def smart_search(query: str, api_key: str, max_results: int = 5) -> List[str]:
|
162 |
+
"""
|
163 |
+
1) Brave (if key + quota OK) → 2) DuckDuckGo.
|
164 |
+
"""
|
165 |
+
print("\n=== Smart Search Debug ===")
|
166 |
+
print(f"Original query: {query}")
|
167 |
+
|
168 |
+
# Tighten the query before searching
|
169 |
+
query = tighten(query)
|
170 |
+
print(f"Tightened query: {query}")
|
171 |
+
print(f"Max Results: {max_results}")
|
172 |
+
|
173 |
+
try:
|
174 |
+
print("Attempting Brave search first...")
|
175 |
+
return brave_search(query, api_key, max_results)
|
176 |
+
except Exception as e:
|
177 |
+
print(f"Brave search failed: {e}")
|
178 |
+
print("Falling back to DuckDuckGo...")
|
179 |
+
return simple_search(query, max_results)
|