|
""" |
|
Base class for all research tools |
|
""" |
|
from abc import ABC, abstractmethod |
|
from typing import Dict, Any, Optional |
|
import time |
|
import re |
|
from datetime import datetime |
|
|
|
|
|
class BaseTool(ABC): |
|
"""Base class for all research tools""" |
|
|
|
def __init__(self, name: str, description: str): |
|
self.name = name |
|
self.description = description |
|
self.last_request_time = 0 |
|
self.rate_limit_delay = 1.0 |
|
|
|
@abstractmethod |
|
def search(self, query: str, **kwargs) -> str: |
|
"""Main search method - must be implemented by subclasses""" |
|
pass |
|
|
|
def rate_limit(self): |
|
"""Simple rate limiting to be respectful to APIs""" |
|
current_time = time.time() |
|
time_since_last = current_time - self.last_request_time |
|
if time_since_last < self.rate_limit_delay: |
|
time.sleep(self.rate_limit_delay - time_since_last) |
|
self.last_request_time = time.time() |
|
|
|
def score_research_quality(self, research_result: str, source: str = "web") -> Dict[str, float]: |
|
"""Score research based on multiple quality indicators""" |
|
|
|
quality_score = { |
|
"recency": self._check_recency(research_result), |
|
"authority": self._check_authority(research_result, source), |
|
"specificity": self._check_specificity(research_result), |
|
"relevance": self._check_relevance(research_result), |
|
"overall": 0.0 |
|
} |
|
|
|
|
|
weights = {"recency": 0.2, "authority": 0.3, "specificity": 0.3, "relevance": 0.2} |
|
quality_score["overall"] = sum(quality_score[metric] * weight for metric, weight in weights.items()) |
|
|
|
return quality_score |
|
|
|
def _check_recency(self, text: str) -> float: |
|
"""Check for recent dates and current information""" |
|
if not text: |
|
return 0.3 |
|
|
|
|
|
years = re.findall(r'\b(20\d{2})\b', text) |
|
if years: |
|
latest_year = max(int(year) for year in years) |
|
current_year = datetime.now().year |
|
recency = max(0, 1 - (current_year - latest_year) / 10) |
|
return recency |
|
return 0.3 |
|
|
|
def _check_authority(self, text: str, source: str) -> float: |
|
"""Check source authority and credibility indicators""" |
|
authority_indicators = { |
|
'arxiv': 0.9, |
|
'sec': 0.95, |
|
'github': 0.7, |
|
'wikipedia': 0.8, |
|
'web': 0.5 |
|
} |
|
|
|
base_score = authority_indicators.get(source.lower(), 0.5) |
|
|
|
|
|
if text: |
|
credibility_markers = ['study', 'research', 'university', 'published', 'peer-reviewed', 'official'] |
|
marker_count = sum(1 for marker in credibility_markers if marker in text.lower()) |
|
credibility_boost = min(0.3, marker_count * 0.05) |
|
base_score += credibility_boost |
|
|
|
return min(1.0, base_score) |
|
|
|
def _check_specificity(self, text: str) -> float: |
|
"""Check for specific data points and quantitative information""" |
|
if not text: |
|
return 0.1 |
|
|
|
|
|
numbers = len(re.findall(r'\b\d+(?:\.\d+)?%?\b', text)) |
|
specific_terms = len(re.findall(r'\b(?:exactly|precisely|specifically|measured|calculated)\b', text, re.IGNORECASE)) |
|
|
|
specificity = min(1.0, (numbers * 0.02) + (specific_terms * 0.1)) |
|
return max(0.1, specificity) |
|
|
|
def _check_relevance(self, text: str) -> float: |
|
"""Check relevance to query (simplified implementation)""" |
|
|
|
|
|
return 0.7 |
|
|
|
def should_use_for_query(self, query: str) -> bool: |
|
"""Determine if this tool should be used for the given query""" |
|
|
|
return True |
|
|
|
def extract_key_info(self, text: str) -> Dict[str, Any]: |
|
"""Extract key information from research results""" |
|
if not text: |
|
return {} |
|
|
|
return { |
|
'length': len(text), |
|
'has_numbers': bool(re.search(r'\d+', text)), |
|
'has_dates': bool(re.search(r'\b20\d{2}\b', text)), |
|
'has_urls': bool(re.search(r'http[s]?://', text)) |
|
} |
|
|
|
def format_error_response(self, query: str, error: str) -> str: |
|
"""Format a consistent error response""" |
|
return f"**{self.name} Research for: {query}**\n\nResearch temporarily unavailable: {str(error)[:100]}..." |