Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Web Search Functionality Verification for GAIA Enhanced Agent | |
This script comprehensively tests the web search capabilities of the deployment-ready | |
GAIA Enhanced Agent to ensure it's ready for GAIA benchmark evaluation. | |
Tests include: | |
1. Environment configuration verification | |
2. Exa API connectivity and authentication | |
3. AGNO tools initialization and web search tool availability | |
4. End-to-end web search workflow testing | |
5. Integration with the enhanced unified AGNO agent | |
""" | |
import os | |
import sys | |
import logging | |
import traceback | |
from pathlib import Path | |
from typing import Dict, Any, List | |
# Setup logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
def load_env_file(): | |
"""Load environment variables from .env file if it exists.""" | |
env_file = Path('.env') | |
if env_file.exists(): | |
with open(env_file, 'r') as f: | |
for line in f: | |
line = line.strip() | |
if line and not line.startswith('#') and '=' in line: | |
key, value = line.split('=', 1) | |
os.environ[key.strip()] = value.strip() | |
# Load environment variables | |
load_env_file() | |
class WebSearchFunctionalityTester: | |
"""Comprehensive tester for web search functionality in GAIA Enhanced Agent.""" | |
def __init__(self): | |
"""Initialize the web search functionality tester.""" | |
self.test_results = {} | |
self.errors = [] | |
def run_all_tests(self) -> Dict[str, Any]: | |
"""Run all web search functionality tests.""" | |
logger.info("π Starting comprehensive web search functionality verification...") | |
# Test 1: Environment Configuration | |
self.test_environment_configuration() | |
# Test 2: Exa API Connectivity | |
self.test_exa_api_connectivity() | |
# Test 3: AGNO Tools Initialization | |
self.test_agno_tools_initialization() | |
# Test 4: Enhanced Unified AGNO Agent | |
self.test_enhanced_unified_agno_agent() | |
# Test 5: End-to-End Web Search Workflow | |
self.test_end_to_end_web_search() | |
# Generate summary report | |
return self.generate_summary_report() | |
def test_environment_configuration(self): | |
"""Test 1: Verify environment configuration for web search.""" | |
logger.info("π§ Test 1: Environment Configuration Verification") | |
try: | |
# Check required API keys | |
required_keys = { | |
'MISTRAL_API_KEY': 'Mistral API for AGNO orchestration', | |
'EXA_API_KEY': 'Exa API for advanced web search', | |
'FIRECRAWL_API_KEY': 'Firecrawl API for web content extraction' | |
} | |
missing_keys = [] | |
configured_keys = [] | |
for key, description in required_keys.items(): | |
value = os.getenv(key) | |
if value and value != 'your_api_key_here': | |
configured_keys.append(f"{key}: {description}") | |
logger.info(f"β {key} configured") | |
else: | |
missing_keys.append(f"{key}: {description}") | |
logger.warning(f"β οΈ {key} not configured") | |
# Check .env file existence | |
env_file_exists = Path('.env').exists() | |
logger.info(f"π .env file exists: {env_file_exists}") | |
self.test_results['environment_configuration'] = { | |
'status': 'PASS' if not missing_keys else 'PARTIAL', | |
'configured_keys': configured_keys, | |
'missing_keys': missing_keys, | |
'env_file_exists': env_file_exists, | |
'details': f"Configured: {len(configured_keys)}/{len(required_keys)} API keys" | |
} | |
if missing_keys: | |
logger.warning(f"β οΈ Missing API keys may limit functionality: {missing_keys}") | |
else: | |
logger.info("β All required API keys configured") | |
except Exception as e: | |
self.test_results['environment_configuration'] = { | |
'status': 'FAIL', | |
'error': str(e), | |
'details': 'Failed to verify environment configuration' | |
} | |
self.errors.append(f"Environment configuration test failed: {e}") | |
logger.error(f"β Environment configuration test failed: {e}") | |
def test_exa_api_connectivity(self): | |
"""Test 2: Test Exa API connectivity and authentication.""" | |
logger.info("π Test 2: Exa API Connectivity Test") | |
try: | |
exa_api_key = os.getenv('EXA_API_KEY') | |
if not exa_api_key or exa_api_key == 'your_api_key_here': | |
self.test_results['exa_api_connectivity'] = { | |
'status': 'SKIP', | |
'details': 'EXA_API_KEY not configured, skipping connectivity test' | |
} | |
logger.warning("β οΈ EXA_API_KEY not configured, skipping connectivity test") | |
return | |
# Test Exa API import and basic functionality | |
try: | |
from exa_py import Exa | |
logger.info("β Exa Python library imported successfully") | |
# Initialize Exa client | |
exa_client = Exa(api_key=exa_api_key) | |
logger.info("β Exa client initialized successfully") | |
# Test basic search functionality | |
test_query = "artificial intelligence recent developments" | |
logger.info(f"π Testing Exa search with query: '{test_query}'") | |
search_results = exa_client.search( | |
query=test_query, | |
num_results=3, | |
type="neural" | |
) | |
if search_results and hasattr(search_results, 'results') and search_results.results: | |
result_count = len(search_results.results) | |
logger.info(f"β Exa search successful: {result_count} results returned") | |
# Log first result for verification | |
first_result = search_results.results[0] | |
logger.info(f"π First result: {first_result.title[:100]}...") | |
self.test_results['exa_api_connectivity'] = { | |
'status': 'PASS', | |
'details': f'Exa API working correctly, returned {result_count} results', | |
'test_query': test_query, | |
'result_count': result_count, | |
'first_result_title': first_result.title[:100] | |
} | |
else: | |
self.test_results['exa_api_connectivity'] = { | |
'status': 'FAIL', | |
'details': 'Exa API returned no results or invalid response', | |
'test_query': test_query | |
} | |
logger.error("β Exa API returned no results or invalid response") | |
except ImportError as e: | |
self.test_results['exa_api_connectivity'] = { | |
'status': 'FAIL', | |
'error': f'Exa library import failed: {e}', | |
'details': 'exa-py library not available' | |
} | |
logger.error(f"β Exa library import failed: {e}") | |
except Exception as e: | |
self.test_results['exa_api_connectivity'] = { | |
'status': 'FAIL', | |
'error': str(e), | |
'details': 'Exa API connectivity test failed' | |
} | |
self.errors.append(f"Exa API connectivity test failed: {e}") | |
logger.error(f"β Exa API connectivity test failed: {e}") | |
def test_agno_tools_initialization(self): | |
"""Test 3: Test AGNO tools initialization including web search tools.""" | |
logger.info("π οΈ Test 3: AGNO Tools Initialization Test") | |
try: | |
# Test AGNO framework import | |
try: | |
from agno.tools.exa import ExaTools | |
from agno.tools.firecrawl import FirecrawlTools | |
logger.info("β AGNO web search tools imported successfully") | |
except ImportError as e: | |
self.test_results['agno_tools_initialization'] = { | |
'status': 'FAIL', | |
'error': f'AGNO tools import failed: {e}', | |
'details': 'AGNO framework or web search tools not available' | |
} | |
logger.error(f"β AGNO tools import failed: {e}") | |
return | |
# Test Exa Tools initialization | |
exa_api_key = os.getenv('EXA_API_KEY') | |
if exa_api_key and exa_api_key != 'your_api_key_here': | |
try: | |
exa_tools = ExaTools(api_key=exa_api_key) | |
logger.info("β AGNO ExaTools initialized successfully") | |
exa_tools_status = "Available" | |
except Exception as e: | |
logger.warning(f"β οΈ AGNO ExaTools initialization failed: {e}") | |
exa_tools_status = f"Failed: {e}" | |
else: | |
exa_tools_status = "Skipped (no API key)" | |
logger.warning("β οΈ EXA_API_KEY not configured, skipping ExaTools initialization") | |
# Test Firecrawl Tools initialization | |
firecrawl_api_key = os.getenv('FIRECRAWL_API_KEY') | |
if firecrawl_api_key and firecrawl_api_key != 'your_api_key_here': | |
try: | |
firecrawl_tools = FirecrawlTools(api_key=firecrawl_api_key) | |
logger.info("β AGNO FirecrawlTools initialized successfully") | |
firecrawl_tools_status = "Available" | |
except Exception as e: | |
logger.warning(f"β οΈ AGNO FirecrawlTools initialization failed: {e}") | |
firecrawl_tools_status = f"Failed: {e}" | |
else: | |
firecrawl_tools_status = "Skipped (no API key)" | |
logger.warning("β οΈ FIRECRAWL_API_KEY not configured, skipping FirecrawlTools initialization") | |
# Determine overall status | |
if "Available" in [exa_tools_status, firecrawl_tools_status]: | |
overall_status = "PASS" | |
details = "At least one web search tool available" | |
elif "Failed" in [exa_tools_status, firecrawl_tools_status]: | |
overall_status = "PARTIAL" | |
details = "Some web search tools failed to initialize" | |
else: | |
overall_status = "SKIP" | |
details = "No web search tools configured" | |
self.test_results['agno_tools_initialization'] = { | |
'status': overall_status, | |
'details': details, | |
'exa_tools_status': exa_tools_status, | |
'firecrawl_tools_status': firecrawl_tools_status | |
} | |
except Exception as e: | |
self.test_results['agno_tools_initialization'] = { | |
'status': 'FAIL', | |
'error': str(e), | |
'details': 'AGNO tools initialization test failed' | |
} | |
self.errors.append(f"AGNO tools initialization test failed: {e}") | |
logger.error(f"β AGNO tools initialization test failed: {e}") | |
def test_enhanced_unified_agno_agent(self): | |
"""Test 4: Test Enhanced Unified AGNO Agent initialization and web search integration.""" | |
logger.info("π€ Test 4: Enhanced Unified AGNO Agent Test") | |
try: | |
# Import the Enhanced Unified AGNO Agent | |
try: | |
from agents.enhanced_unified_agno_agent import GAIAAgent | |
logger.info("β Enhanced Unified AGNO Agent imported successfully") | |
except ImportError as e: | |
self.test_results['enhanced_unified_agno_agent'] = { | |
'status': 'FAIL', | |
'error': f'Enhanced Unified AGNO Agent import failed: {e}', | |
'details': 'Agent module not available' | |
} | |
logger.error(f"β Enhanced Unified AGNO Agent import failed: {e}") | |
return | |
# Initialize the agent | |
try: | |
agent = GAIAAgent() | |
logger.info("β Enhanced Unified AGNO Agent initialized successfully") | |
# Check agent availability | |
if hasattr(agent, 'available') and agent.available: | |
logger.info("β Enhanced Unified AGNO Agent is available and ready") | |
agent_status = "Available and ready" | |
else: | |
logger.warning("β οΈ Enhanced Unified AGNO Agent initialized but not available") | |
agent_status = "Initialized but not available" | |
# Check tool status | |
if hasattr(agent, 'get_tool_status'): | |
tool_status = agent.get_tool_status() | |
web_search_tools = [] | |
for tool_name, status in tool_status.items(): | |
if tool_name in ['exa', 'firecrawl']: | |
web_search_tools.append(f"{tool_name}: {status}") | |
logger.info(f"π οΈ Web search tools status: {web_search_tools}") | |
else: | |
web_search_tools = ["Tool status method not available"] | |
self.test_results['enhanced_unified_agno_agent'] = { | |
'status': 'PASS' if agent.available else 'PARTIAL', | |
'details': agent_status, | |
'web_search_tools': web_search_tools, | |
'agent_available': agent.available if hasattr(agent, 'available') else 'Unknown' | |
} | |
except Exception as e: | |
self.test_results['enhanced_unified_agno_agent'] = { | |
'status': 'FAIL', | |
'error': str(e), | |
'details': 'Enhanced Unified AGNO Agent initialization failed' | |
} | |
logger.error(f"β Enhanced Unified AGNO Agent initialization failed: {e}") | |
except Exception as e: | |
self.test_results['enhanced_unified_agno_agent'] = { | |
'status': 'FAIL', | |
'error': str(e), | |
'details': 'Enhanced Unified AGNO Agent test failed' | |
} | |
self.errors.append(f"Enhanced Unified AGNO Agent test failed: {e}") | |
logger.error(f"β Enhanced Unified AGNO Agent test failed: {e}") | |
def test_end_to_end_web_search(self): | |
"""Test 5: End-to-end web search workflow test.""" | |
logger.info("π Test 5: End-to-End Web Search Workflow Test") | |
try: | |
# Check if we have the necessary components | |
if 'enhanced_unified_agno_agent' not in self.test_results or \ | |
self.test_results['enhanced_unified_agno_agent']['status'] == 'FAIL': | |
self.test_results['end_to_end_web_search'] = { | |
'status': 'SKIP', | |
'details': 'Enhanced Unified AGNO Agent not available, skipping end-to-end test' | |
} | |
logger.warning("β οΈ Enhanced Unified AGNO Agent not available, skipping end-to-end test") | |
return | |
# Import and initialize the agent | |
from agents.enhanced_unified_agno_agent import GAIAAgent | |
agent = GAIAAgent() | |
if not (hasattr(agent, 'available') and agent.available): | |
self.test_results['end_to_end_web_search'] = { | |
'status': 'SKIP', | |
'details': 'Enhanced Unified AGNO Agent not available for testing' | |
} | |
logger.warning("β οΈ Enhanced Unified AGNO Agent not available for testing") | |
return | |
# Test web search with a sample question that requires current information | |
test_questions = [ | |
"What are the latest developments in artificial intelligence in 2024?", | |
"Who is the current CEO of OpenAI?", | |
"What is the latest version of Python as of 2024?" | |
] | |
test_results = [] | |
for i, question in enumerate(test_questions, 1): | |
logger.info(f"π Testing question {i}: {question}") | |
try: | |
# Process the question with the agent | |
answer = agent(question) | |
if answer and answer != "Agent not available" and answer != "Unable to process this question": | |
logger.info(f"β Question {i} processed successfully") | |
logger.info(f"π Answer preview: {answer[:200]}...") | |
test_results.append({ | |
'question': question, | |
'status': 'SUCCESS', | |
'answer_preview': answer[:200], | |
'answer_length': len(answer) | |
}) | |
else: | |
logger.warning(f"β οΈ Question {i} returned empty or error response") | |
test_results.append({ | |
'question': question, | |
'status': 'EMPTY_RESPONSE', | |
'answer': answer | |
}) | |
except Exception as e: | |
logger.error(f"β Question {i} processing failed: {e}") | |
test_results.append({ | |
'question': question, | |
'status': 'ERROR', | |
'error': str(e) | |
}) | |
# Determine overall status | |
successful_tests = sum(1 for result in test_results if result['status'] == 'SUCCESS') | |
total_tests = len(test_questions) | |
if successful_tests == total_tests: | |
overall_status = 'PASS' | |
details = f'All {total_tests} test questions processed successfully' | |
elif successful_tests > 0: | |
overall_status = 'PARTIAL' | |
details = f'{successful_tests}/{total_tests} test questions processed successfully' | |
else: | |
overall_status = 'FAIL' | |
details = 'No test questions processed successfully' | |
self.test_results['end_to_end_web_search'] = { | |
'status': overall_status, | |
'details': details, | |
'successful_tests': successful_tests, | |
'total_tests': total_tests, | |
'test_results': test_results | |
} | |
logger.info(f"π End-to-end test results: {successful_tests}/{total_tests} successful") | |
except Exception as e: | |
self.test_results['end_to_end_web_search'] = { | |
'status': 'FAIL', | |
'error': str(e), | |
'details': 'End-to-end web search workflow test failed' | |
} | |
self.errors.append(f"End-to-end web search test failed: {e}") | |
logger.error(f"β End-to-end web search test failed: {e}") | |
def generate_summary_report(self) -> Dict[str, Any]: | |
"""Generate a comprehensive summary report of all tests.""" | |
logger.info("π Generating comprehensive test summary report...") | |
# Count test results | |
passed_tests = sum(1 for result in self.test_results.values() if result['status'] == 'PASS') | |
partial_tests = sum(1 for result in self.test_results.values() if result['status'] == 'PARTIAL') | |
failed_tests = sum(1 for result in self.test_results.values() if result['status'] == 'FAIL') | |
skipped_tests = sum(1 for result in self.test_results.values() if result['status'] == 'SKIP') | |
total_tests = len(self.test_results) | |
# Determine overall status | |
if failed_tests == 0 and passed_tests > 0: | |
if partial_tests == 0 and skipped_tests == 0: | |
overall_status = 'FULLY_READY' | |
else: | |
overall_status = 'MOSTLY_READY' | |
elif passed_tests > 0 or partial_tests > 0: | |
overall_status = 'PARTIALLY_READY' | |
else: | |
overall_status = 'NOT_READY' | |
# Generate recommendations | |
recommendations = [] | |
if 'environment_configuration' in self.test_results: | |
env_result = self.test_results['environment_configuration'] | |
if env_result['status'] != 'PASS' and 'missing_keys' in env_result: | |
recommendations.append(f"Configure missing API keys: {env_result['missing_keys']}") | |
if 'exa_api_connectivity' in self.test_results: | |
exa_result = self.test_results['exa_api_connectivity'] | |
if exa_result['status'] == 'FAIL': | |
recommendations.append("Fix Exa API connectivity issues") | |
elif exa_result['status'] == 'SKIP': | |
recommendations.append("Configure EXA_API_KEY for web search functionality") | |
if 'enhanced_unified_agno_agent' in self.test_results: | |
agent_result = self.test_results['enhanced_unified_agno_agent'] | |
if agent_result['status'] == 'FAIL': | |
recommendations.append("Fix Enhanced Unified AGNO Agent initialization issues") | |
if not recommendations: | |
recommendations.append("Web search functionality is ready for deployment!") | |
summary_report = { | |
'overall_status': overall_status, | |
'test_summary': { | |
'total_tests': total_tests, | |
'passed': passed_tests, | |
'partial': partial_tests, | |
'failed': failed_tests, | |
'skipped': skipped_tests | |
}, | |
'detailed_results': self.test_results, | |
'errors': self.errors, | |
'recommendations': recommendations, | |
'deployment_readiness': { | |
'web_search_ready': overall_status in ['FULLY_READY', 'MOSTLY_READY'], | |
'critical_issues': failed_tests, | |
'minor_issues': partial_tests + skipped_tests | |
} | |
} | |
# Log summary | |
logger.info("=" * 80) | |
logger.info("π WEB SEARCH FUNCTIONALITY VERIFICATION SUMMARY") | |
logger.info("=" * 80) | |
logger.info(f"Overall Status: {overall_status}") | |
logger.info(f"Tests: {passed_tests} passed, {partial_tests} partial, {failed_tests} failed, {skipped_tests} skipped") | |
logger.info(f"Web Search Ready: {summary_report['deployment_readiness']['web_search_ready']}") | |
if recommendations: | |
logger.info("\nπ Recommendations:") | |
for i, rec in enumerate(recommendations, 1): | |
logger.info(f" {i}. {rec}") | |
if self.errors: | |
logger.info(f"\nβ Errors encountered: {len(self.errors)}") | |
for error in self.errors: | |
logger.error(f" - {error}") | |
logger.info("=" * 80) | |
return summary_report | |
def main(): | |
"""Main function to run web search functionality verification.""" | |
print("π GAIA Enhanced Agent - Web Search Functionality Verification") | |
print("=" * 80) | |
try: | |
# Initialize tester | |
tester = WebSearchFunctionalityTester() | |
# Run all tests | |
summary_report = tester.run_all_tests() | |
# Print final status | |
print("\n" + "=" * 80) | |
print("π― FINAL VERIFICATION RESULT") | |
print("=" * 80) | |
overall_status = summary_report['overall_status'] | |
web_search_ready = summary_report['deployment_readiness']['web_search_ready'] | |
if overall_status == 'FULLY_READY': | |
print("β WEB SEARCH FUNCTIONALITY: FULLY READY FOR GAIA EVALUATION") | |
elif overall_status == 'MOSTLY_READY': | |
print("β WEB SEARCH FUNCTIONALITY: MOSTLY READY FOR GAIA EVALUATION") | |
elif overall_status == 'PARTIALLY_READY': | |
print("β οΈ WEB SEARCH FUNCTIONALITY: PARTIALLY READY - SOME ISSUES NEED ATTENTION") | |
else: | |
print("β WEB SEARCH FUNCTIONALITY: NOT READY - CRITICAL ISSUES NEED RESOLUTION") | |
print(f"Deployment Ready: {'YES' if web_search_ready else 'NO'}") | |
print(f"Critical Issues: {summary_report['deployment_readiness']['critical_issues']}") | |
print(f"Minor Issues: {summary_report['deployment_readiness']['minor_issues']}") | |
return 0 if web_search_ready else 1 | |
except Exception as e: | |
print(f"β Verification failed with error: {e}") | |
traceback.print_exc() | |
return 1 | |
if __name__ == "__main__": | |
exit_code = main() | |
sys.exit(exit_code) |