#!/usr/bin/env python3 """ Web Search Functionality Verification for GAIA Enhanced Agent This script comprehensively tests the web search capabilities of the deployment-ready GAIA Enhanced Agent to ensure it's ready for GAIA benchmark evaluation. Tests include: 1. Environment configuration verification 2. Exa API connectivity and authentication 3. AGNO tools initialization and web search tool availability 4. End-to-end web search workflow testing 5. Integration with the enhanced unified AGNO agent """ import os import sys import logging import traceback from pathlib import Path from typing import Dict, Any, List # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) def load_env_file(): """Load environment variables from .env file if it exists.""" env_file = Path('.env') if env_file.exists(): with open(env_file, 'r') as f: for line in f: line = line.strip() if line and not line.startswith('#') and '=' in line: key, value = line.split('=', 1) os.environ[key.strip()] = value.strip() # Load environment variables load_env_file() class WebSearchFunctionalityTester: """Comprehensive tester for web search functionality in GAIA Enhanced Agent.""" def __init__(self): """Initialize the web search functionality tester.""" self.test_results = {} self.errors = [] def run_all_tests(self) -> Dict[str, Any]: """Run all web search functionality tests.""" logger.info("šŸš€ Starting comprehensive web search functionality verification...") # Test 1: Environment Configuration self.test_environment_configuration() # Test 2: Exa API Connectivity self.test_exa_api_connectivity() # Test 3: AGNO Tools Initialization self.test_agno_tools_initialization() # Test 4: Enhanced Unified AGNO Agent self.test_enhanced_unified_agno_agent() # Test 5: End-to-End Web Search Workflow self.test_end_to_end_web_search() # Generate summary report return self.generate_summary_report() def test_environment_configuration(self): """Test 1: Verify environment configuration for web search.""" logger.info("šŸ”§ Test 1: Environment Configuration Verification") try: # Check required API keys required_keys = { 'MISTRAL_API_KEY': 'Mistral API for AGNO orchestration', 'EXA_API_KEY': 'Exa API for advanced web search', 'FIRECRAWL_API_KEY': 'Firecrawl API for web content extraction' } missing_keys = [] configured_keys = [] for key, description in required_keys.items(): value = os.getenv(key) if value and value != 'your_api_key_here': configured_keys.append(f"{key}: {description}") logger.info(f"āœ… {key} configured") else: missing_keys.append(f"{key}: {description}") logger.warning(f"āš ļø {key} not configured") # Check .env file existence env_file_exists = Path('.env').exists() logger.info(f"šŸ“„ .env file exists: {env_file_exists}") self.test_results['environment_configuration'] = { 'status': 'PASS' if not missing_keys else 'PARTIAL', 'configured_keys': configured_keys, 'missing_keys': missing_keys, 'env_file_exists': env_file_exists, 'details': f"Configured: {len(configured_keys)}/{len(required_keys)} API keys" } if missing_keys: logger.warning(f"āš ļø Missing API keys may limit functionality: {missing_keys}") else: logger.info("āœ… All required API keys configured") except Exception as e: self.test_results['environment_configuration'] = { 'status': 'FAIL', 'error': str(e), 'details': 'Failed to verify environment configuration' } self.errors.append(f"Environment configuration test failed: {e}") logger.error(f"āŒ Environment configuration test failed: {e}") def test_exa_api_connectivity(self): """Test 2: Test Exa API connectivity and authentication.""" logger.info("🌐 Test 2: Exa API Connectivity Test") try: exa_api_key = os.getenv('EXA_API_KEY') if not exa_api_key or exa_api_key == 'your_api_key_here': self.test_results['exa_api_connectivity'] = { 'status': 'SKIP', 'details': 'EXA_API_KEY not configured, skipping connectivity test' } logger.warning("āš ļø EXA_API_KEY not configured, skipping connectivity test") return # Test Exa API import and basic functionality try: from exa_py import Exa logger.info("āœ… Exa Python library imported successfully") # Initialize Exa client exa_client = Exa(api_key=exa_api_key) logger.info("āœ… Exa client initialized successfully") # Test basic search functionality test_query = "artificial intelligence recent developments" logger.info(f"šŸ” Testing Exa search with query: '{test_query}'") search_results = exa_client.search( query=test_query, num_results=3, type="neural" ) if search_results and hasattr(search_results, 'results') and search_results.results: result_count = len(search_results.results) logger.info(f"āœ… Exa search successful: {result_count} results returned") # Log first result for verification first_result = search_results.results[0] logger.info(f"šŸ“„ First result: {first_result.title[:100]}...") self.test_results['exa_api_connectivity'] = { 'status': 'PASS', 'details': f'Exa API working correctly, returned {result_count} results', 'test_query': test_query, 'result_count': result_count, 'first_result_title': first_result.title[:100] } else: self.test_results['exa_api_connectivity'] = { 'status': 'FAIL', 'details': 'Exa API returned no results or invalid response', 'test_query': test_query } logger.error("āŒ Exa API returned no results or invalid response") except ImportError as e: self.test_results['exa_api_connectivity'] = { 'status': 'FAIL', 'error': f'Exa library import failed: {e}', 'details': 'exa-py library not available' } logger.error(f"āŒ Exa library import failed: {e}") except Exception as e: self.test_results['exa_api_connectivity'] = { 'status': 'FAIL', 'error': str(e), 'details': 'Exa API connectivity test failed' } self.errors.append(f"Exa API connectivity test failed: {e}") logger.error(f"āŒ Exa API connectivity test failed: {e}") def test_agno_tools_initialization(self): """Test 3: Test AGNO tools initialization including web search tools.""" logger.info("šŸ› ļø Test 3: AGNO Tools Initialization Test") try: # Test AGNO framework import try: from agno.tools.exa import ExaTools from agno.tools.firecrawl import FirecrawlTools logger.info("āœ… AGNO web search tools imported successfully") except ImportError as e: self.test_results['agno_tools_initialization'] = { 'status': 'FAIL', 'error': f'AGNO tools import failed: {e}', 'details': 'AGNO framework or web search tools not available' } logger.error(f"āŒ AGNO tools import failed: {e}") return # Test Exa Tools initialization exa_api_key = os.getenv('EXA_API_KEY') if exa_api_key and exa_api_key != 'your_api_key_here': try: exa_tools = ExaTools(api_key=exa_api_key) logger.info("āœ… AGNO ExaTools initialized successfully") exa_tools_status = "Available" except Exception as e: logger.warning(f"āš ļø AGNO ExaTools initialization failed: {e}") exa_tools_status = f"Failed: {e}" else: exa_tools_status = "Skipped (no API key)" logger.warning("āš ļø EXA_API_KEY not configured, skipping ExaTools initialization") # Test Firecrawl Tools initialization firecrawl_api_key = os.getenv('FIRECRAWL_API_KEY') if firecrawl_api_key and firecrawl_api_key != 'your_api_key_here': try: firecrawl_tools = FirecrawlTools(api_key=firecrawl_api_key) logger.info("āœ… AGNO FirecrawlTools initialized successfully") firecrawl_tools_status = "Available" except Exception as e: logger.warning(f"āš ļø AGNO FirecrawlTools initialization failed: {e}") firecrawl_tools_status = f"Failed: {e}" else: firecrawl_tools_status = "Skipped (no API key)" logger.warning("āš ļø FIRECRAWL_API_KEY not configured, skipping FirecrawlTools initialization") # Determine overall status if "Available" in [exa_tools_status, firecrawl_tools_status]: overall_status = "PASS" details = "At least one web search tool available" elif "Failed" in [exa_tools_status, firecrawl_tools_status]: overall_status = "PARTIAL" details = "Some web search tools failed to initialize" else: overall_status = "SKIP" details = "No web search tools configured" self.test_results['agno_tools_initialization'] = { 'status': overall_status, 'details': details, 'exa_tools_status': exa_tools_status, 'firecrawl_tools_status': firecrawl_tools_status } except Exception as e: self.test_results['agno_tools_initialization'] = { 'status': 'FAIL', 'error': str(e), 'details': 'AGNO tools initialization test failed' } self.errors.append(f"AGNO tools initialization test failed: {e}") logger.error(f"āŒ AGNO tools initialization test failed: {e}") def test_enhanced_unified_agno_agent(self): """Test 4: Test Enhanced Unified AGNO Agent initialization and web search integration.""" logger.info("šŸ¤– Test 4: Enhanced Unified AGNO Agent Test") try: # Import the Enhanced Unified AGNO Agent try: from agents.enhanced_unified_agno_agent import GAIAAgent logger.info("āœ… Enhanced Unified AGNO Agent imported successfully") except ImportError as e: self.test_results['enhanced_unified_agno_agent'] = { 'status': 'FAIL', 'error': f'Enhanced Unified AGNO Agent import failed: {e}', 'details': 'Agent module not available' } logger.error(f"āŒ Enhanced Unified AGNO Agent import failed: {e}") return # Initialize the agent try: agent = GAIAAgent() logger.info("āœ… Enhanced Unified AGNO Agent initialized successfully") # Check agent availability if hasattr(agent, 'available') and agent.available: logger.info("āœ… Enhanced Unified AGNO Agent is available and ready") agent_status = "Available and ready" else: logger.warning("āš ļø Enhanced Unified AGNO Agent initialized but not available") agent_status = "Initialized but not available" # Check tool status if hasattr(agent, 'get_tool_status'): tool_status = agent.get_tool_status() web_search_tools = [] for tool_name, status in tool_status.items(): if tool_name in ['exa', 'firecrawl']: web_search_tools.append(f"{tool_name}: {status}") logger.info(f"šŸ› ļø Web search tools status: {web_search_tools}") else: web_search_tools = ["Tool status method not available"] self.test_results['enhanced_unified_agno_agent'] = { 'status': 'PASS' if agent.available else 'PARTIAL', 'details': agent_status, 'web_search_tools': web_search_tools, 'agent_available': agent.available if hasattr(agent, 'available') else 'Unknown' } except Exception as e: self.test_results['enhanced_unified_agno_agent'] = { 'status': 'FAIL', 'error': str(e), 'details': 'Enhanced Unified AGNO Agent initialization failed' } logger.error(f"āŒ Enhanced Unified AGNO Agent initialization failed: {e}") except Exception as e: self.test_results['enhanced_unified_agno_agent'] = { 'status': 'FAIL', 'error': str(e), 'details': 'Enhanced Unified AGNO Agent test failed' } self.errors.append(f"Enhanced Unified AGNO Agent test failed: {e}") logger.error(f"āŒ Enhanced Unified AGNO Agent test failed: {e}") def test_end_to_end_web_search(self): """Test 5: End-to-end web search workflow test.""" logger.info("šŸ”„ Test 5: End-to-End Web Search Workflow Test") try: # Check if we have the necessary components if 'enhanced_unified_agno_agent' not in self.test_results or \ self.test_results['enhanced_unified_agno_agent']['status'] == 'FAIL': self.test_results['end_to_end_web_search'] = { 'status': 'SKIP', 'details': 'Enhanced Unified AGNO Agent not available, skipping end-to-end test' } logger.warning("āš ļø Enhanced Unified AGNO Agent not available, skipping end-to-end test") return # Import and initialize the agent from agents.enhanced_unified_agno_agent import GAIAAgent agent = GAIAAgent() if not (hasattr(agent, 'available') and agent.available): self.test_results['end_to_end_web_search'] = { 'status': 'SKIP', 'details': 'Enhanced Unified AGNO Agent not available for testing' } logger.warning("āš ļø Enhanced Unified AGNO Agent not available for testing") return # Test web search with a sample question that requires current information test_questions = [ "What are the latest developments in artificial intelligence in 2024?", "Who is the current CEO of OpenAI?", "What is the latest version of Python as of 2024?" ] test_results = [] for i, question in enumerate(test_questions, 1): logger.info(f"šŸ” Testing question {i}: {question}") try: # Process the question with the agent answer = agent(question) if answer and answer != "Agent not available" and answer != "Unable to process this question": logger.info(f"āœ… Question {i} processed successfully") logger.info(f"šŸ“ Answer preview: {answer[:200]}...") test_results.append({ 'question': question, 'status': 'SUCCESS', 'answer_preview': answer[:200], 'answer_length': len(answer) }) else: logger.warning(f"āš ļø Question {i} returned empty or error response") test_results.append({ 'question': question, 'status': 'EMPTY_RESPONSE', 'answer': answer }) except Exception as e: logger.error(f"āŒ Question {i} processing failed: {e}") test_results.append({ 'question': question, 'status': 'ERROR', 'error': str(e) }) # Determine overall status successful_tests = sum(1 for result in test_results if result['status'] == 'SUCCESS') total_tests = len(test_questions) if successful_tests == total_tests: overall_status = 'PASS' details = f'All {total_tests} test questions processed successfully' elif successful_tests > 0: overall_status = 'PARTIAL' details = f'{successful_tests}/{total_tests} test questions processed successfully' else: overall_status = 'FAIL' details = 'No test questions processed successfully' self.test_results['end_to_end_web_search'] = { 'status': overall_status, 'details': details, 'successful_tests': successful_tests, 'total_tests': total_tests, 'test_results': test_results } logger.info(f"šŸ“Š End-to-end test results: {successful_tests}/{total_tests} successful") except Exception as e: self.test_results['end_to_end_web_search'] = { 'status': 'FAIL', 'error': str(e), 'details': 'End-to-end web search workflow test failed' } self.errors.append(f"End-to-end web search test failed: {e}") logger.error(f"āŒ End-to-end web search test failed: {e}") def generate_summary_report(self) -> Dict[str, Any]: """Generate a comprehensive summary report of all tests.""" logger.info("šŸ“‹ Generating comprehensive test summary report...") # Count test results passed_tests = sum(1 for result in self.test_results.values() if result['status'] == 'PASS') partial_tests = sum(1 for result in self.test_results.values() if result['status'] == 'PARTIAL') failed_tests = sum(1 for result in self.test_results.values() if result['status'] == 'FAIL') skipped_tests = sum(1 for result in self.test_results.values() if result['status'] == 'SKIP') total_tests = len(self.test_results) # Determine overall status if failed_tests == 0 and passed_tests > 0: if partial_tests == 0 and skipped_tests == 0: overall_status = 'FULLY_READY' else: overall_status = 'MOSTLY_READY' elif passed_tests > 0 or partial_tests > 0: overall_status = 'PARTIALLY_READY' else: overall_status = 'NOT_READY' # Generate recommendations recommendations = [] if 'environment_configuration' in self.test_results: env_result = self.test_results['environment_configuration'] if env_result['status'] != 'PASS' and 'missing_keys' in env_result: recommendations.append(f"Configure missing API keys: {env_result['missing_keys']}") if 'exa_api_connectivity' in self.test_results: exa_result = self.test_results['exa_api_connectivity'] if exa_result['status'] == 'FAIL': recommendations.append("Fix Exa API connectivity issues") elif exa_result['status'] == 'SKIP': recommendations.append("Configure EXA_API_KEY for web search functionality") if 'enhanced_unified_agno_agent' in self.test_results: agent_result = self.test_results['enhanced_unified_agno_agent'] if agent_result['status'] == 'FAIL': recommendations.append("Fix Enhanced Unified AGNO Agent initialization issues") if not recommendations: recommendations.append("Web search functionality is ready for deployment!") summary_report = { 'overall_status': overall_status, 'test_summary': { 'total_tests': total_tests, 'passed': passed_tests, 'partial': partial_tests, 'failed': failed_tests, 'skipped': skipped_tests }, 'detailed_results': self.test_results, 'errors': self.errors, 'recommendations': recommendations, 'deployment_readiness': { 'web_search_ready': overall_status in ['FULLY_READY', 'MOSTLY_READY'], 'critical_issues': failed_tests, 'minor_issues': partial_tests + skipped_tests } } # Log summary logger.info("=" * 80) logger.info("šŸ“Š WEB SEARCH FUNCTIONALITY VERIFICATION SUMMARY") logger.info("=" * 80) logger.info(f"Overall Status: {overall_status}") logger.info(f"Tests: {passed_tests} passed, {partial_tests} partial, {failed_tests} failed, {skipped_tests} skipped") logger.info(f"Web Search Ready: {summary_report['deployment_readiness']['web_search_ready']}") if recommendations: logger.info("\nšŸ“ Recommendations:") for i, rec in enumerate(recommendations, 1): logger.info(f" {i}. {rec}") if self.errors: logger.info(f"\nāŒ Errors encountered: {len(self.errors)}") for error in self.errors: logger.error(f" - {error}") logger.info("=" * 80) return summary_report def main(): """Main function to run web search functionality verification.""" print("šŸš€ GAIA Enhanced Agent - Web Search Functionality Verification") print("=" * 80) try: # Initialize tester tester = WebSearchFunctionalityTester() # Run all tests summary_report = tester.run_all_tests() # Print final status print("\n" + "=" * 80) print("šŸŽÆ FINAL VERIFICATION RESULT") print("=" * 80) overall_status = summary_report['overall_status'] web_search_ready = summary_report['deployment_readiness']['web_search_ready'] if overall_status == 'FULLY_READY': print("āœ… WEB SEARCH FUNCTIONALITY: FULLY READY FOR GAIA EVALUATION") elif overall_status == 'MOSTLY_READY': print("āœ… WEB SEARCH FUNCTIONALITY: MOSTLY READY FOR GAIA EVALUATION") elif overall_status == 'PARTIALLY_READY': print("āš ļø WEB SEARCH FUNCTIONALITY: PARTIALLY READY - SOME ISSUES NEED ATTENTION") else: print("āŒ WEB SEARCH FUNCTIONALITY: NOT READY - CRITICAL ISSUES NEED RESOLUTION") print(f"Deployment Ready: {'YES' if web_search_ready else 'NO'}") print(f"Critical Issues: {summary_report['deployment_readiness']['critical_issues']}") print(f"Minor Issues: {summary_report['deployment_readiness']['minor_issues']}") return 0 if web_search_ready else 1 except Exception as e: print(f"āŒ Verification failed with error: {e}") traceback.print_exc() return 1 if __name__ == "__main__": exit_code = main() sys.exit(exit_code)