naman1102 commited on
Commit
aeb4eba
·
1 Parent(s): 039628a
Files changed (2) hide show
  1. app.py +3 -77
  2. tools.py +145 -17
app.py CHANGED
@@ -10,7 +10,7 @@ from typing import List, Dict, Any, Annotated
10
  from langgraph.graph import Graph, StateGraph
11
  from typing_extensions import TypedDict
12
  from openai import OpenAI
13
- from urllib.parse import quote_plus
14
 
15
  # -------------------------
16
  # Utility helpers
@@ -29,82 +29,10 @@ def merge_dicts(old: Dict, new: Dict) -> Dict:
29
 
30
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
31
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
32
- JINA_API_KEY = os.getenv("JINA_API_KEY")
33
 
34
  # Remove logs directory creation since we're not storing logs anymore
35
 
36
- # -------------------------
37
- # Jina AI search tool (replaces DDG + Reader)
38
- # -------------------------
39
-
40
- def jina_search_tool(query: str, api_key: str, max_results: int = 5) -> List[str]:
41
- """Return *max_results* clean markdown snippets for *query* using s.jina.ai."""
42
- print("\n=== Jina Search Debug ===")
43
- print(f"Query: {query}")
44
- print(f"Max Results: {max_results}")
45
-
46
- url = f"https://s.jina.ai/?q={quote_plus(query)}"
47
- print(f"URL: {url}")
48
-
49
- headers = {
50
- "Authorization": f"Bearer {api_key}",
51
- "Accept": "application/json",
52
- "User-Agent": "Mozilla/5.0"
53
- }
54
- print("Headers:", {k: v[:10] + "..." if k == "Authorization" else v for k, v in headers.items()})
55
-
56
- try:
57
- print("\nSending request to Jina API...")
58
- r = requests.get(url, headers=headers, timeout=15)
59
- print(f"Response Status: {r.status_code}")
60
-
61
- r.raise_for_status()
62
- data = r.json()
63
-
64
- print("\nResponse Data Structure:")
65
- print(f"Keys in response: {list(data.keys())}")
66
- print(f"Number of results: {len(data.get('results', []))}")
67
-
68
- results = [
69
- f"{item['title']} – {item['url']}\n{item['content']}"
70
- for item in data.get("results", [])[:max_results]
71
- ]
72
-
73
- print(f"\nRetrieved {len(results)} results")
74
- if results:
75
- print("\nFirst result preview:")
76
- print(results[0][:200] + "..." if len(results[0]) > 200 else results[0])
77
-
78
- if not results:
79
- print("WARNING: No results found in the response")
80
- print("Full response data:", json.dumps(data, indent=2)[:1000])
81
-
82
- return results
83
-
84
- except requests.exceptions.Timeout:
85
- print("ERROR: Request timed out after 15 seconds")
86
- raise
87
- except requests.exceptions.RequestException as e:
88
- print(f"ERROR: Network/Request error: {str(e)}")
89
- raise
90
- except json.JSONDecodeError as e:
91
- print(f"ERROR: Failed to parse JSON response: {str(e)}")
92
- print(f"Raw response: {r.text[:500]}")
93
- raise
94
- except Exception as e:
95
- print(f"ERROR: Unexpected error: {str(e)}")
96
- raise
97
-
98
- # -------------------------
99
- # Logging helper
100
- # -------------------------
101
-
102
- def log_to_file(task_id: str, question: str, log_data: Dict[str, Any]):
103
- ts = datetime.now().strftime("%Y%m%d_%H%M%S")
104
- filename = os.path.join(LOGS_DIR, f"question_{task_id}_{ts}.json")
105
- with open(filename, "w", encoding="utf-8") as f:
106
- json.dump({"task_id": task_id, "question": question, "timestamp": ts, "logs": log_data}, f, indent=2)
107
-
108
  # -------------------------
109
  # State definition
110
  # -------------------------
@@ -127,8 +55,6 @@ class BasicAgent:
127
  def __init__(self):
128
  if not OPENAI_API_KEY:
129
  raise EnvironmentError("OPENAI_API_KEY not set")
130
- if not JINA_API_KEY:
131
- raise EnvironmentError("JINA_API_KEY not set")
132
  self.llm = OpenAI(api_key=OPENAI_API_KEY)
133
  self.workflow = self._build_workflow()
134
 
@@ -172,7 +98,7 @@ class BasicAgent:
172
 
173
  def _perform_search(self, state: AgentState) -> AgentState:
174
  try:
175
- results = jina_search_tool(state["search_query"], JINA_API_KEY)
176
  except Exception as e:
177
  results = [f"SEARCH_ERROR: {e}"]
178
  state["history"].append({"step": "search", "results": results})
 
10
  from langgraph.graph import Graph, StateGraph
11
  from typing_extensions import TypedDict
12
  from openai import OpenAI
13
+ from tools import smart_search
14
 
15
  # -------------------------
16
  # Utility helpers
 
29
 
30
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
31
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
32
+ BRAVE_KEY = os.getenv("BRAVE_API_KEY") or "" # set in HF Space secrets
33
 
34
  # Remove logs directory creation since we're not storing logs anymore
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  # -------------------------
37
  # State definition
38
  # -------------------------
 
55
  def __init__(self):
56
  if not OPENAI_API_KEY:
57
  raise EnvironmentError("OPENAI_API_KEY not set")
 
 
58
  self.llm = OpenAI(api_key=OPENAI_API_KEY)
59
  self.workflow = self._build_workflow()
60
 
 
98
 
99
  def _perform_search(self, state: AgentState) -> AgentState:
100
  try:
101
+ results = smart_search(state["search_query"], BRAVE_KEY, max_results=5)
102
  except Exception as e:
103
  results = [f"SEARCH_ERROR: {e}"]
104
  state["history"].append({"step": "search", "results": results})
tools.py CHANGED
@@ -2,27 +2,63 @@ from typing import List
2
  from duckduckgo_search import DDGS
3
  import requests
4
  import os
 
 
 
 
5
 
6
  # --- Simple Search Tool ---
7
 
8
- def simple_search(query: str, max_results: int = 3) -> List[str]:
9
- """
10
- Perform a DuckDuckGo search and return a list of strings summarizing the top results.
11
- """
12
- with DDGS() as ddgs:
13
- raw_results = list(ddgs.text(query, max_results=max_results))
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- results = []
16
- for r in raw_results:
17
- try:
18
- title = r.get("title", "")
19
- link = r.get("href") or r.get("link", "")
20
- summary = f"{title} - {link}"
21
- results.append(summary)
22
- except Exception as e:
23
- print("Skipping malformed search result:", r, "Error:", e)
24
-
25
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # --- Jina Search Tool ---
28
 
@@ -49,3 +85,95 @@ def jina_search_tool(query: str, api_key: str) -> List[str]:
49
  except Exception as e:
50
  print(f"Error fetching search results: {e}")
51
  return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from duckduckgo_search import DDGS
3
  import requests
4
  import os
5
+ import json
6
+ import time
7
+ from urllib.parse import quote_plus
8
+ import re
9
 
10
  # --- Simple Search Tool ---
11
 
12
+ def tighten(question: str) -> str:
13
+ """Extract key terms from verbose questions to improve search results."""
14
+ print("\n=== Query Tightening ===")
15
+ print(f"Original query: {question}")
16
+
17
+ # Find quoted phrases
18
+ quoted = re.findall(r'"([^"]+)"', question)
19
+ print(f"Quoted phrases: {quoted}")
20
+
21
+ # Find capitalized terms
22
+ caps = re.findall(r'\b([A-Z0-9][\w-]{2,})', question)
23
+ print(f"Capitalized terms: {caps}")
24
+
25
+ # Combine and clean
26
+ short = " ".join(quoted + caps)
27
+ result = short or question
28
+ print(f"Tightened query: {result}")
29
+
30
+ return result
31
 
32
+ def simple_search(query: str, max_results: int = 3) -> List[str]:
33
+ """Fallback to DuckDuckGo search."""
34
+ print("\n=== DuckDuckGo Search Debug ===")
35
+ print(f"Query: {query}")
36
+ print(f"Max Results: {max_results}")
37
+
38
+ try:
39
+ with DDGS() as ddgs:
40
+ raw = list(ddgs.text(query, max_results=max_results))
41
+ print(f"Retrieved {len(raw)} raw results")
42
+
43
+ out = []
44
+ for r in raw:
45
+ try:
46
+ result = f"{r.get('title','')} – {r.get('href') or r.get('link','')}"
47
+ out.append(result)
48
+ print(f"Processed result: {result[:100]}...")
49
+ except Exception as e:
50
+ print(f"Error processing result: {e}")
51
+ pass
52
+
53
+ print(f"\nFinal results count: {len(out)}")
54
+ if out:
55
+ print("\nFirst result preview:")
56
+ print(out[0][:200] + "..." if len(out[0]) > 200 else out[0])
57
+
58
+ return out
59
+ except Exception as e:
60
+ print(f"ERROR in DuckDuckGo search: {str(e)}")
61
+ raise
62
 
63
  # --- Jina Search Tool ---
64
 
 
85
  except Exception as e:
86
  print(f"Error fetching search results: {e}")
87
  return []
88
+
89
+ def brave_search(query: str, api_key: str, max_results: int = 5) -> List[str]:
90
+ """
91
+ Query Brave Web Search API and return a list of 'title – url\\nbody' strings.
92
+ """
93
+ print("\n=== Brave Search Debug ===")
94
+ print(f"Query: {query}")
95
+ print(f"Max Results: {max_results}")
96
+
97
+ if not api_key:
98
+ print("ERROR: BRAVE_API_KEY not set")
99
+ raise RuntimeError("BRAVE_API_KEY not set")
100
+
101
+ url = (
102
+ "https://api.search.brave.com/res/v1/web/search?"
103
+ f"q={quote_plus(query)}&count={max_results}"
104
+ )
105
+ print(f"URL: {url}")
106
+
107
+ hdrs = {
108
+ "X-Subscription-Token": api_key,
109
+ "Accept": "application/json",
110
+ "User-Agent": "Mozilla/5.0",
111
+ }
112
+ print("Headers:", {k: v[:10] + "..." if k == "X-Subscription-Token" else v for k, v in hdrs.items()})
113
+
114
+ try:
115
+ print("\nSending request to Brave API...")
116
+ r = requests.get(url, headers=hdrs, timeout=12)
117
+ print(f"Response Status: {r.status_code}")
118
+
119
+ if r.status_code == 429:
120
+ print("Rate limit hit, waiting 2 seconds and retrying...")
121
+ time.sleep(2)
122
+ r = requests.get(url, headers=hdrs, timeout=12)
123
+ print(f"Retry Response Status: {r.status_code}")
124
+
125
+ r.raise_for_status()
126
+ data = r.json().get("web", {}).get("results", [])
127
+
128
+ print("\nResponse Data Structure:")
129
+ print(f"Number of results: {len(data)}")
130
+
131
+ results = [
132
+ f"{d['title']} – {d['url']}\n{d['body']}"
133
+ for d in data
134
+ ][:max_results]
135
+
136
+ print(f"\nRetrieved {len(results)} results")
137
+ if results:
138
+ print("\nFirst result preview:")
139
+ print(results[0][:200] + "..." if len(results[0]) > 200 else results[0])
140
+
141
+ if not results:
142
+ print("WARNING: No results found in the response")
143
+ print("Full response data:", json.dumps(r.json(), indent=2)[:1000])
144
+
145
+ return results
146
+
147
+ except requests.exceptions.Timeout:
148
+ print("ERROR: Request timed out after 12 seconds")
149
+ raise
150
+ except requests.exceptions.RequestException as e:
151
+ print(f"ERROR: Network/Request error: {str(e)}")
152
+ raise
153
+ except json.JSONDecodeError as e:
154
+ print(f"ERROR: Failed to parse JSON response: {str(e)}")
155
+ print(f"Raw response: {r.text[:500]}")
156
+ raise
157
+ except Exception as e:
158
+ print(f"ERROR: Unexpected error: {str(e)}")
159
+ raise
160
+
161
+ def smart_search(query: str, api_key: str, max_results: int = 5) -> List[str]:
162
+ """
163
+ 1) Brave (if key + quota OK) → 2) DuckDuckGo.
164
+ """
165
+ print("\n=== Smart Search Debug ===")
166
+ print(f"Original query: {query}")
167
+
168
+ # Tighten the query before searching
169
+ query = tighten(query)
170
+ print(f"Tightened query: {query}")
171
+ print(f"Max Results: {max_results}")
172
+
173
+ try:
174
+ print("Attempting Brave search first...")
175
+ return brave_search(query, api_key, max_results)
176
+ except Exception as e:
177
+ print(f"Brave search failed: {e}")
178
+ print("Falling back to DuckDuckGo...")
179
+ return simple_search(query, max_results)