naman1102 commited on
Commit
622f2bb
·
1 Parent(s): aeb4eba
Files changed (2) hide show
  1. app.py +5 -6
  2. tools.py +22 -173
app.py CHANGED
@@ -10,7 +10,7 @@ from typing import List, Dict, Any, Annotated
10
  from langgraph.graph import Graph, StateGraph
11
  from typing_extensions import TypedDict
12
  from openai import OpenAI
13
- from tools import smart_search
14
 
15
  # -------------------------
16
  # Utility helpers
@@ -29,7 +29,6 @@ def merge_dicts(old: Dict, new: Dict) -> Dict:
29
 
30
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
31
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
32
- BRAVE_KEY = os.getenv("BRAVE_API_KEY") or "" # set in HF Space secrets
33
 
34
  # Remove logs directory creation since we're not storing logs anymore
35
 
@@ -97,10 +96,10 @@ class BasicAgent:
97
  return state
98
 
99
  def _perform_search(self, state: AgentState) -> AgentState:
100
- try:
101
- results = smart_search(state["search_query"], BRAVE_KEY, max_results=5)
102
- except Exception as e:
103
- results = [f"SEARCH_ERROR: {e}"]
104
  state["history"].append({"step": "search", "results": results})
105
  state["logs"]["search"] = {"query": state["search_query"], "results": results}
106
  state["current_step"] = "answer"
 
10
  from langgraph.graph import Graph, StateGraph
11
  from typing_extensions import TypedDict
12
  from openai import OpenAI
13
+ from tools import simple_search
14
 
15
  # -------------------------
16
  # Utility helpers
 
29
 
30
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
31
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
32
 
33
  # Remove logs directory creation since we're not storing logs anymore
34
 
 
96
  return state
97
 
98
  def _perform_search(self, state: AgentState) -> AgentState:
99
+ results = simple_search(state["search_query"], max_results=5)
100
+ print("\nSearch Results:")
101
+ for i, s in enumerate(results, 1):
102
+ print(f"[{i}] {s[:120]}")
103
  state["history"].append({"step": "search", "results": results})
104
  state["logs"]["search"] = {"query": state["search_query"], "results": results}
105
  state["current_step"] = "answer"
tools.py CHANGED
@@ -1,179 +1,28 @@
1
  from typing import List
2
- from duckduckgo_search import DDGS
3
- import requests
4
- import os
5
- import json
6
- import time
7
- from urllib.parse import quote_plus
8
  import re
9
 
10
- # --- Simple Search Tool ---
 
 
 
 
 
11
 
12
- def tighten(question: str) -> str:
13
- """Extract key terms from verbose questions to improve search results."""
14
- print("\n=== Query Tightening ===")
15
- print(f"Original query: {question}")
16
-
17
- # Find quoted phrases
18
- quoted = re.findall(r'"([^"]+)"', question)
19
- print(f"Quoted phrases: {quoted}")
20
-
21
- # Find capitalized terms
22
- caps = re.findall(r'\b([A-Z0-9][\w-]{2,})', question)
23
- print(f"Capitalized terms: {caps}")
24
-
25
- # Combine and clean
26
- short = " ".join(quoted + caps)
27
- result = short or question
28
- print(f"Tightened query: {result}")
29
-
30
- return result
31
-
32
- def simple_search(query: str, max_results: int = 3) -> List[str]:
33
- """Fallback to DuckDuckGo search."""
34
- print("\n=== DuckDuckGo Search Debug ===")
35
- print(f"Query: {query}")
36
- print(f"Max Results: {max_results}")
37
-
38
- try:
39
- with DDGS() as ddgs:
40
- raw = list(ddgs.text(query, max_results=max_results))
41
- print(f"Retrieved {len(raw)} raw results")
42
-
43
- out = []
44
- for r in raw:
45
- try:
46
- result = f"{r.get('title','')} – {r.get('href') or r.get('link','')}"
47
- out.append(result)
48
- print(f"Processed result: {result[:100]}...")
49
- except Exception as e:
50
- print(f"Error processing result: {e}")
51
- pass
52
-
53
- print(f"\nFinal results count: {len(out)}")
54
- if out:
55
- print("\nFirst result preview:")
56
- print(out[0][:200] + "..." if len(out[0]) > 200 else out[0])
57
-
58
- return out
59
- except Exception as e:
60
- print(f"ERROR in DuckDuckGo search: {str(e)}")
61
- raise
62
-
63
- # --- Jina Search Tool ---
64
-
65
- def jina_search_tool(query: str, api_key: str) -> List[str]:
66
- """
67
- Perform a web search using Jina AI's s.jina.ai endpoint and retrieve clean, LLM-friendly content.
68
- """
69
- api_endpoint = f"https://s.jina.ai/{query.replace(' ', '+')}"
70
- headers = {
71
- "Authorization": f"Bearer {api_key}",
72
- "Accept": "application/json",
73
- "User-Agent": "Mozilla/5.0"
74
- }
75
-
76
- try:
77
- response = requests.get(api_endpoint, headers=headers, timeout=10)
78
- if response.status_code == 200:
79
- data = response.json()
80
- contents = [item.get("content", "") for item in data.get("results", [])]
81
- return contents
82
- else:
83
- print(f"Failed to fetch search results: Status code {response.status_code}")
84
- return []
85
- except Exception as e:
86
- print(f"Error fetching search results: {e}")
87
- return []
88
-
89
- def brave_search(query: str, api_key: str, max_results: int = 5) -> List[str]:
90
- """
91
- Query Brave Web Search API and return a list of 'title – url\\nbody' strings.
92
- """
93
- print("\n=== Brave Search Debug ===")
94
- print(f"Query: {query}")
95
- print(f"Max Results: {max_results}")
96
-
97
- if not api_key:
98
- print("ERROR: BRAVE_API_KEY not set")
99
- raise RuntimeError("BRAVE_API_KEY not set")
100
-
101
- url = (
102
- "https://api.search.brave.com/res/v1/web/search?"
103
- f"q={quote_plus(query)}&count={max_results}"
104
- )
105
- print(f"URL: {url}")
106
-
107
- hdrs = {
108
- "X-Subscription-Token": api_key,
109
- "Accept": "application/json",
110
- "User-Agent": "Mozilla/5.0",
111
- }
112
- print("Headers:", {k: v[:10] + "..." if k == "X-Subscription-Token" else v for k, v in hdrs.items()})
113
-
114
- try:
115
- print("\nSending request to Brave API...")
116
- r = requests.get(url, headers=hdrs, timeout=12)
117
- print(f"Response Status: {r.status_code}")
118
-
119
- if r.status_code == 429:
120
- print("Rate limit hit, waiting 2 seconds and retrying...")
121
- time.sleep(2)
122
- r = requests.get(url, headers=hdrs, timeout=12)
123
- print(f"Retry Response Status: {r.status_code}")
124
-
125
- r.raise_for_status()
126
- data = r.json().get("web", {}).get("results", [])
127
-
128
- print("\nResponse Data Structure:")
129
- print(f"Number of results: {len(data)}")
130
-
131
- results = [
132
- f"{d['title']} – {d['url']}\n{d['body']}"
133
- for d in data
134
- ][:max_results]
135
-
136
- print(f"\nRetrieved {len(results)} results")
137
- if results:
138
- print("\nFirst result preview:")
139
- print(results[0][:200] + "..." if len(results[0]) > 200 else results[0])
140
-
141
- if not results:
142
- print("WARNING: No results found in the response")
143
- print("Full response data:", json.dumps(r.json(), indent=2)[:1000])
144
-
145
- return results
146
-
147
- except requests.exceptions.Timeout:
148
- print("ERROR: Request timed out after 12 seconds")
149
- raise
150
- except requests.exceptions.RequestException as e:
151
- print(f"ERROR: Network/Request error: {str(e)}")
152
- raise
153
- except json.JSONDecodeError as e:
154
- print(f"ERROR: Failed to parse JSON response: {str(e)}")
155
- print(f"Raw response: {r.text[:500]}")
156
- raise
157
- except Exception as e:
158
- print(f"ERROR: Unexpected error: {str(e)}")
159
- raise
160
-
161
- def smart_search(query: str, api_key: str, max_results: int = 5) -> List[str]:
162
  """
163
- 1) Brave (if key + quota OK) 2) DuckDuckGo.
164
  """
165
- print("\n=== Smart Search Debug ===")
166
- print(f"Original query: {query}")
167
-
168
- # Tighten the query before searching
169
- query = tighten(query)
170
- print(f"Tightened query: {query}")
171
- print(f"Max Results: {max_results}")
172
-
173
- try:
174
- print("Attempting Brave search first...")
175
- return brave_search(query, api_key, max_results)
176
- except Exception as e:
177
- print(f"Brave search failed: {e}")
178
- print("Falling back to DuckDuckGo...")
179
- return simple_search(query, max_results)
 
1
  from typing import List
2
+ from duckduckgo_search import DDGS # pip install -U duckduckgo-search
 
 
 
 
 
3
  import re
4
 
5
+ # -------- helper to shorten very long GAIA questions (optional but helpful)
6
+ def tighten(q: str) -> str:
7
+ quoted = re.findall(r'"([^"]+)"', q)
8
+ caps = re.findall(r'\b([A-Z0-9][\w-]{2,})', q)
9
+ short = " ".join(quoted + caps)
10
+ return short or q
11
 
12
+ # -------- the only search function your agent will call
13
+ def simple_search(query: str, max_results: int = 5) -> List[str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  """
15
+ Perform a DuckDuckGo search and return 'title url' snippets.
16
  """
17
+ query = tighten(query) # optional heuristic cleaner
18
+ with DDGS() as ddgs: # context-manager is the recommended way 🐤
19
+ raw = list(ddgs.text(query, max_results=max_results)) # DDGS.text() returns list of dicts
20
+ out = []
21
+ for r in raw:
22
+ try:
23
+ title = r.get("title", "")
24
+ link = r.get("href") or r.get("link", "")
25
+ out.append(f"{title} – {link}")
26
+ except Exception:
27
+ pass
28
+ return out