File size: 4,901 Bytes
ce0bf87 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
"""
Base class for all research tools
"""
from abc import ABC, abstractmethod
from typing import Dict, Any, Optional
import time
import re
from datetime import datetime
class BaseTool(ABC):
"""Base class for all research tools"""
def __init__(self, name: str, description: str):
self.name = name
self.description = description
self.last_request_time = 0
self.rate_limit_delay = 1.0 # seconds between requests
@abstractmethod
def search(self, query: str, **kwargs) -> str:
"""Main search method - must be implemented by subclasses"""
pass
def rate_limit(self):
"""Simple rate limiting to be respectful to APIs"""
current_time = time.time()
time_since_last = current_time - self.last_request_time
if time_since_last < self.rate_limit_delay:
time.sleep(self.rate_limit_delay - time_since_last)
self.last_request_time = time.time()
def score_research_quality(self, research_result: str, source: str = "web") -> Dict[str, float]:
"""Score research based on multiple quality indicators"""
quality_score = {
"recency": self._check_recency(research_result),
"authority": self._check_authority(research_result, source),
"specificity": self._check_specificity(research_result),
"relevance": self._check_relevance(research_result),
"overall": 0.0
}
# Weighted overall score
weights = {"recency": 0.2, "authority": 0.3, "specificity": 0.3, "relevance": 0.2}
quality_score["overall"] = sum(quality_score[metric] * weight for metric, weight in weights.items())
return quality_score
def _check_recency(self, text: str) -> float:
"""Check for recent dates and current information"""
if not text:
return 0.3
# Look for years
years = re.findall(r'\b(20\d{2})\b', text)
if years:
latest_year = max(int(year) for year in years)
current_year = datetime.now().year
recency = max(0, 1 - (current_year - latest_year) / 10) # Decay over 10 years
return recency
return 0.3 # Default for no date found
def _check_authority(self, text: str, source: str) -> float:
"""Check source authority and credibility indicators"""
authority_indicators = {
'arxiv': 0.9,
'sec': 0.95,
'github': 0.7,
'wikipedia': 0.8,
'web': 0.5
}
base_score = authority_indicators.get(source.lower(), 0.5)
# Look for credibility markers in text
if text:
credibility_markers = ['study', 'research', 'university', 'published', 'peer-reviewed', 'official']
marker_count = sum(1 for marker in credibility_markers if marker in text.lower())
credibility_boost = min(0.3, marker_count * 0.05)
base_score += credibility_boost
return min(1.0, base_score)
def _check_specificity(self, text: str) -> float:
"""Check for specific data points and quantitative information"""
if not text:
return 0.1
# Count numbers, percentages, specific metrics
numbers = len(re.findall(r'\b\d+(?:\.\d+)?%?\b', text))
specific_terms = len(re.findall(r'\b(?:exactly|precisely|specifically|measured|calculated)\b', text, re.IGNORECASE))
specificity = min(1.0, (numbers * 0.02) + (specific_terms * 0.1))
return max(0.1, specificity) # Minimum baseline
def _check_relevance(self, text: str) -> float:
"""Check relevance to query (simplified implementation)"""
# This would ideally use the original query for comparison
# For now, return a baseline that could be enhanced
return 0.7 # Placeholder - could be enhanced with query matching
def should_use_for_query(self, query: str) -> bool:
"""Determine if this tool should be used for the given query"""
# Default implementation - override in subclasses for smart routing
return True
def extract_key_info(self, text: str) -> Dict[str, Any]:
"""Extract key information from research results"""
if not text:
return {}
return {
'length': len(text),
'has_numbers': bool(re.search(r'\d+', text)),
'has_dates': bool(re.search(r'\b20\d{2}\b', text)),
'has_urls': bool(re.search(r'http[s]?://', text))
}
def format_error_response(self, query: str, error: str) -> str:
"""Format a consistent error response"""
return f"**{self.name} Research for: {query}**\n\nResearch temporarily unavailable: {str(error)[:100]}..." |