from typing import List, Callable from duckduckgo_search import DDGS # pip install -U duckduckgo-search import re import time # -------- helper to shorten very long GAIA questions (optional but helpful) def tighten(q: str) -> str: quoted = re.findall(r'"([^"]+)"', q) caps = re.findall(r'\b([A-Z0-9][\w-]{2,})', q) short = " ".join(quoted + caps) return short or q def _raw_search(query: str, max_results: int = 5) -> List[str]: """Internal function that performs the actual DuckDuckGo search.""" with DDGS() as ddgs: raw = list(ddgs.text(query, max_results=max_results)) out = [] for r in raw: try: title = r.get("title", "") link = r.get("href") or r.get("link", "") snippet = r.get("body") or r.get("snippet", "") out.append(f"{title} – {link}\n{snippet}") except Exception: pass return out def retry_ddg( query: str, max_results: int = 5, attempts: int = 4, delay_sec: int = 10, search_fn: Callable[[str, int], List[str]] = _raw_search, ) -> List[str]: """ Retry DuckDuckGo search up to *attempts* times, waiting *delay_sec* seconds between attempts if no results were returned or an exception was raised. Parameters ---------- query : str Search query. max_results : int, default 5 Number of results to return. attempts : int, default 4 Maximum number of attempts before giving up. delay_sec : int, default 10 Seconds to sleep between attempts. search_fn : Callable A function with signature (query: str, max_results: int) -> List[str]. Defaults to _raw_search. Returns ------- List[str] List of result strings; may be empty if every attempt failed. """ last_err = None for i in range(1, attempts + 1): try: results = search_fn(query, max_results) if results: # Success return results print(f"Attempt {i}/{attempts}: no results, retrying in {delay_sec}s…") except Exception as e: last_err = e # Keep last error for optional logging print(f"Attempt {i}/{attempts} failed: {e}. Retrying in {delay_sec}s…") if i < attempts: time.sleep(delay_sec) # All attempts failed or returned empty if last_err: print(f"All {attempts} attempts failed. Last exception: {last_err}") else: print(f"All {attempts} attempts returned empty results.") return [] # -------- the only search function your agent will call def simple_search(query: str, max_results: int = 5) -> List[str]: """ Perform a web search using DuckDuckGo and return formatted results. Includes retry logic and better error handling. """ def _raw_search(q: str, max_results: int) -> List[str]: try: # Ensure we have a valid search query if not q or not q.strip(): print("Warning: Empty search query") return [] # Clean and validate the query q = q.strip() if len(q) < 2: # DuckDuckGo requires at least 2 characters print("Warning: Query too short") return [] with DDGS() as ddgs: results = [] for r in ddgs.text(q, max_results=max_results): # Handle missing keys gracefully title = r.get('title', 'No title') link = r.get('link', r.get('href', 'No link')) body = r.get('body', r.get('snippet', 'No description')) # Format result with available information result = f"{title} – {link}\n{body}" results.append(result) return results except Exception as e: print(f"Search error: {str(e)}") return [] # Retry logic with rate limit handling max_attempts = 4 rate_limit_delay = 20 # seconds to wait on rate limit # Clean the input query query = query.strip() if not query: print("Error: Empty search query provided") return [] for attempt in range(max_attempts): try: results = _raw_search(query, max_results) if results: return results print(f"Attempt {attempt + 1}/{max_attempts}: No results found") except Exception as e: error_msg = str(e) print(f"Attempt {attempt + 1}/{max_attempts} failed: {error_msg}") # Check if it's a rate limit error if "Ratelimit" in error_msg or "202" in error_msg: print(f"Rate limit detected. Waiting {rate_limit_delay} seconds...") time.sleep(rate_limit_delay) elif attempt < max_attempts - 1: # For other errors, use exponential backoff delay = 30 * (2 ** attempt) print(f"Retrying in {delay}s...") time.sleep(delay) else: print(f"All {max_attempts} attempts failed. Last exception: {error_msg}") return [] return []