gaia-enhanced-agent / test_web_search_functionality.py
GAIA Agent Deployment
Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements
9a6a4dc
#!/usr/bin/env python3
"""
Web Search Functionality Verification for GAIA Enhanced Agent
This script comprehensively tests the web search capabilities of the deployment-ready
GAIA Enhanced Agent to ensure it's ready for GAIA benchmark evaluation.
Tests include:
1. Environment configuration verification
2. Exa API connectivity and authentication
3. AGNO tools initialization and web search tool availability
4. End-to-end web search workflow testing
5. Integration with the enhanced unified AGNO agent
"""
import os
import sys
import logging
import traceback
from pathlib import Path
from typing import Dict, Any, List
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def load_env_file():
"""Load environment variables from .env file if it exists."""
env_file = Path('.env')
if env_file.exists():
with open(env_file, 'r') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, value = line.split('=', 1)
os.environ[key.strip()] = value.strip()
# Load environment variables
load_env_file()
class WebSearchFunctionalityTester:
"""Comprehensive tester for web search functionality in GAIA Enhanced Agent."""
def __init__(self):
"""Initialize the web search functionality tester."""
self.test_results = {}
self.errors = []
def run_all_tests(self) -> Dict[str, Any]:
"""Run all web search functionality tests."""
logger.info("πŸš€ Starting comprehensive web search functionality verification...")
# Test 1: Environment Configuration
self.test_environment_configuration()
# Test 2: Exa API Connectivity
self.test_exa_api_connectivity()
# Test 3: AGNO Tools Initialization
self.test_agno_tools_initialization()
# Test 4: Enhanced Unified AGNO Agent
self.test_enhanced_unified_agno_agent()
# Test 5: End-to-End Web Search Workflow
self.test_end_to_end_web_search()
# Generate summary report
return self.generate_summary_report()
def test_environment_configuration(self):
"""Test 1: Verify environment configuration for web search."""
logger.info("πŸ”§ Test 1: Environment Configuration Verification")
try:
# Check required API keys
required_keys = {
'MISTRAL_API_KEY': 'Mistral API for AGNO orchestration',
'EXA_API_KEY': 'Exa API for advanced web search',
'FIRECRAWL_API_KEY': 'Firecrawl API for web content extraction'
}
missing_keys = []
configured_keys = []
for key, description in required_keys.items():
value = os.getenv(key)
if value and value != 'your_api_key_here':
configured_keys.append(f"{key}: {description}")
logger.info(f"βœ… {key} configured")
else:
missing_keys.append(f"{key}: {description}")
logger.warning(f"⚠️ {key} not configured")
# Check .env file existence
env_file_exists = Path('.env').exists()
logger.info(f"πŸ“„ .env file exists: {env_file_exists}")
self.test_results['environment_configuration'] = {
'status': 'PASS' if not missing_keys else 'PARTIAL',
'configured_keys': configured_keys,
'missing_keys': missing_keys,
'env_file_exists': env_file_exists,
'details': f"Configured: {len(configured_keys)}/{len(required_keys)} API keys"
}
if missing_keys:
logger.warning(f"⚠️ Missing API keys may limit functionality: {missing_keys}")
else:
logger.info("βœ… All required API keys configured")
except Exception as e:
self.test_results['environment_configuration'] = {
'status': 'FAIL',
'error': str(e),
'details': 'Failed to verify environment configuration'
}
self.errors.append(f"Environment configuration test failed: {e}")
logger.error(f"❌ Environment configuration test failed: {e}")
def test_exa_api_connectivity(self):
"""Test 2: Test Exa API connectivity and authentication."""
logger.info("🌐 Test 2: Exa API Connectivity Test")
try:
exa_api_key = os.getenv('EXA_API_KEY')
if not exa_api_key or exa_api_key == 'your_api_key_here':
self.test_results['exa_api_connectivity'] = {
'status': 'SKIP',
'details': 'EXA_API_KEY not configured, skipping connectivity test'
}
logger.warning("⚠️ EXA_API_KEY not configured, skipping connectivity test")
return
# Test Exa API import and basic functionality
try:
from exa_py import Exa
logger.info("βœ… Exa Python library imported successfully")
# Initialize Exa client
exa_client = Exa(api_key=exa_api_key)
logger.info("βœ… Exa client initialized successfully")
# Test basic search functionality
test_query = "artificial intelligence recent developments"
logger.info(f"πŸ” Testing Exa search with query: '{test_query}'")
search_results = exa_client.search(
query=test_query,
num_results=3,
type="neural"
)
if search_results and hasattr(search_results, 'results') and search_results.results:
result_count = len(search_results.results)
logger.info(f"βœ… Exa search successful: {result_count} results returned")
# Log first result for verification
first_result = search_results.results[0]
logger.info(f"πŸ“„ First result: {first_result.title[:100]}...")
self.test_results['exa_api_connectivity'] = {
'status': 'PASS',
'details': f'Exa API working correctly, returned {result_count} results',
'test_query': test_query,
'result_count': result_count,
'first_result_title': first_result.title[:100]
}
else:
self.test_results['exa_api_connectivity'] = {
'status': 'FAIL',
'details': 'Exa API returned no results or invalid response',
'test_query': test_query
}
logger.error("❌ Exa API returned no results or invalid response")
except ImportError as e:
self.test_results['exa_api_connectivity'] = {
'status': 'FAIL',
'error': f'Exa library import failed: {e}',
'details': 'exa-py library not available'
}
logger.error(f"❌ Exa library import failed: {e}")
except Exception as e:
self.test_results['exa_api_connectivity'] = {
'status': 'FAIL',
'error': str(e),
'details': 'Exa API connectivity test failed'
}
self.errors.append(f"Exa API connectivity test failed: {e}")
logger.error(f"❌ Exa API connectivity test failed: {e}")
def test_agno_tools_initialization(self):
"""Test 3: Test AGNO tools initialization including web search tools."""
logger.info("πŸ› οΈ Test 3: AGNO Tools Initialization Test")
try:
# Test AGNO framework import
try:
from agno.tools.exa import ExaTools
from agno.tools.firecrawl import FirecrawlTools
logger.info("βœ… AGNO web search tools imported successfully")
except ImportError as e:
self.test_results['agno_tools_initialization'] = {
'status': 'FAIL',
'error': f'AGNO tools import failed: {e}',
'details': 'AGNO framework or web search tools not available'
}
logger.error(f"❌ AGNO tools import failed: {e}")
return
# Test Exa Tools initialization
exa_api_key = os.getenv('EXA_API_KEY')
if exa_api_key and exa_api_key != 'your_api_key_here':
try:
exa_tools = ExaTools(api_key=exa_api_key)
logger.info("βœ… AGNO ExaTools initialized successfully")
exa_tools_status = "Available"
except Exception as e:
logger.warning(f"⚠️ AGNO ExaTools initialization failed: {e}")
exa_tools_status = f"Failed: {e}"
else:
exa_tools_status = "Skipped (no API key)"
logger.warning("⚠️ EXA_API_KEY not configured, skipping ExaTools initialization")
# Test Firecrawl Tools initialization
firecrawl_api_key = os.getenv('FIRECRAWL_API_KEY')
if firecrawl_api_key and firecrawl_api_key != 'your_api_key_here':
try:
firecrawl_tools = FirecrawlTools(api_key=firecrawl_api_key)
logger.info("βœ… AGNO FirecrawlTools initialized successfully")
firecrawl_tools_status = "Available"
except Exception as e:
logger.warning(f"⚠️ AGNO FirecrawlTools initialization failed: {e}")
firecrawl_tools_status = f"Failed: {e}"
else:
firecrawl_tools_status = "Skipped (no API key)"
logger.warning("⚠️ FIRECRAWL_API_KEY not configured, skipping FirecrawlTools initialization")
# Determine overall status
if "Available" in [exa_tools_status, firecrawl_tools_status]:
overall_status = "PASS"
details = "At least one web search tool available"
elif "Failed" in [exa_tools_status, firecrawl_tools_status]:
overall_status = "PARTIAL"
details = "Some web search tools failed to initialize"
else:
overall_status = "SKIP"
details = "No web search tools configured"
self.test_results['agno_tools_initialization'] = {
'status': overall_status,
'details': details,
'exa_tools_status': exa_tools_status,
'firecrawl_tools_status': firecrawl_tools_status
}
except Exception as e:
self.test_results['agno_tools_initialization'] = {
'status': 'FAIL',
'error': str(e),
'details': 'AGNO tools initialization test failed'
}
self.errors.append(f"AGNO tools initialization test failed: {e}")
logger.error(f"❌ AGNO tools initialization test failed: {e}")
def test_enhanced_unified_agno_agent(self):
"""Test 4: Test Enhanced Unified AGNO Agent initialization and web search integration."""
logger.info("πŸ€– Test 4: Enhanced Unified AGNO Agent Test")
try:
# Import the Enhanced Unified AGNO Agent
try:
from agents.enhanced_unified_agno_agent import GAIAAgent
logger.info("βœ… Enhanced Unified AGNO Agent imported successfully")
except ImportError as e:
self.test_results['enhanced_unified_agno_agent'] = {
'status': 'FAIL',
'error': f'Enhanced Unified AGNO Agent import failed: {e}',
'details': 'Agent module not available'
}
logger.error(f"❌ Enhanced Unified AGNO Agent import failed: {e}")
return
# Initialize the agent
try:
agent = GAIAAgent()
logger.info("βœ… Enhanced Unified AGNO Agent initialized successfully")
# Check agent availability
if hasattr(agent, 'available') and agent.available:
logger.info("βœ… Enhanced Unified AGNO Agent is available and ready")
agent_status = "Available and ready"
else:
logger.warning("⚠️ Enhanced Unified AGNO Agent initialized but not available")
agent_status = "Initialized but not available"
# Check tool status
if hasattr(agent, 'get_tool_status'):
tool_status = agent.get_tool_status()
web_search_tools = []
for tool_name, status in tool_status.items():
if tool_name in ['exa', 'firecrawl']:
web_search_tools.append(f"{tool_name}: {status}")
logger.info(f"πŸ› οΈ Web search tools status: {web_search_tools}")
else:
web_search_tools = ["Tool status method not available"]
self.test_results['enhanced_unified_agno_agent'] = {
'status': 'PASS' if agent.available else 'PARTIAL',
'details': agent_status,
'web_search_tools': web_search_tools,
'agent_available': agent.available if hasattr(agent, 'available') else 'Unknown'
}
except Exception as e:
self.test_results['enhanced_unified_agno_agent'] = {
'status': 'FAIL',
'error': str(e),
'details': 'Enhanced Unified AGNO Agent initialization failed'
}
logger.error(f"❌ Enhanced Unified AGNO Agent initialization failed: {e}")
except Exception as e:
self.test_results['enhanced_unified_agno_agent'] = {
'status': 'FAIL',
'error': str(e),
'details': 'Enhanced Unified AGNO Agent test failed'
}
self.errors.append(f"Enhanced Unified AGNO Agent test failed: {e}")
logger.error(f"❌ Enhanced Unified AGNO Agent test failed: {e}")
def test_end_to_end_web_search(self):
"""Test 5: End-to-end web search workflow test."""
logger.info("πŸ”„ Test 5: End-to-End Web Search Workflow Test")
try:
# Check if we have the necessary components
if 'enhanced_unified_agno_agent' not in self.test_results or \
self.test_results['enhanced_unified_agno_agent']['status'] == 'FAIL':
self.test_results['end_to_end_web_search'] = {
'status': 'SKIP',
'details': 'Enhanced Unified AGNO Agent not available, skipping end-to-end test'
}
logger.warning("⚠️ Enhanced Unified AGNO Agent not available, skipping end-to-end test")
return
# Import and initialize the agent
from agents.enhanced_unified_agno_agent import GAIAAgent
agent = GAIAAgent()
if not (hasattr(agent, 'available') and agent.available):
self.test_results['end_to_end_web_search'] = {
'status': 'SKIP',
'details': 'Enhanced Unified AGNO Agent not available for testing'
}
logger.warning("⚠️ Enhanced Unified AGNO Agent not available for testing")
return
# Test web search with a sample question that requires current information
test_questions = [
"What are the latest developments in artificial intelligence in 2024?",
"Who is the current CEO of OpenAI?",
"What is the latest version of Python as of 2024?"
]
test_results = []
for i, question in enumerate(test_questions, 1):
logger.info(f"πŸ” Testing question {i}: {question}")
try:
# Process the question with the agent
answer = agent(question)
if answer and answer != "Agent not available" and answer != "Unable to process this question":
logger.info(f"βœ… Question {i} processed successfully")
logger.info(f"πŸ“ Answer preview: {answer[:200]}...")
test_results.append({
'question': question,
'status': 'SUCCESS',
'answer_preview': answer[:200],
'answer_length': len(answer)
})
else:
logger.warning(f"⚠️ Question {i} returned empty or error response")
test_results.append({
'question': question,
'status': 'EMPTY_RESPONSE',
'answer': answer
})
except Exception as e:
logger.error(f"❌ Question {i} processing failed: {e}")
test_results.append({
'question': question,
'status': 'ERROR',
'error': str(e)
})
# Determine overall status
successful_tests = sum(1 for result in test_results if result['status'] == 'SUCCESS')
total_tests = len(test_questions)
if successful_tests == total_tests:
overall_status = 'PASS'
details = f'All {total_tests} test questions processed successfully'
elif successful_tests > 0:
overall_status = 'PARTIAL'
details = f'{successful_tests}/{total_tests} test questions processed successfully'
else:
overall_status = 'FAIL'
details = 'No test questions processed successfully'
self.test_results['end_to_end_web_search'] = {
'status': overall_status,
'details': details,
'successful_tests': successful_tests,
'total_tests': total_tests,
'test_results': test_results
}
logger.info(f"πŸ“Š End-to-end test results: {successful_tests}/{total_tests} successful")
except Exception as e:
self.test_results['end_to_end_web_search'] = {
'status': 'FAIL',
'error': str(e),
'details': 'End-to-end web search workflow test failed'
}
self.errors.append(f"End-to-end web search test failed: {e}")
logger.error(f"❌ End-to-end web search test failed: {e}")
def generate_summary_report(self) -> Dict[str, Any]:
"""Generate a comprehensive summary report of all tests."""
logger.info("πŸ“‹ Generating comprehensive test summary report...")
# Count test results
passed_tests = sum(1 for result in self.test_results.values() if result['status'] == 'PASS')
partial_tests = sum(1 for result in self.test_results.values() if result['status'] == 'PARTIAL')
failed_tests = sum(1 for result in self.test_results.values() if result['status'] == 'FAIL')
skipped_tests = sum(1 for result in self.test_results.values() if result['status'] == 'SKIP')
total_tests = len(self.test_results)
# Determine overall status
if failed_tests == 0 and passed_tests > 0:
if partial_tests == 0 and skipped_tests == 0:
overall_status = 'FULLY_READY'
else:
overall_status = 'MOSTLY_READY'
elif passed_tests > 0 or partial_tests > 0:
overall_status = 'PARTIALLY_READY'
else:
overall_status = 'NOT_READY'
# Generate recommendations
recommendations = []
if 'environment_configuration' in self.test_results:
env_result = self.test_results['environment_configuration']
if env_result['status'] != 'PASS' and 'missing_keys' in env_result:
recommendations.append(f"Configure missing API keys: {env_result['missing_keys']}")
if 'exa_api_connectivity' in self.test_results:
exa_result = self.test_results['exa_api_connectivity']
if exa_result['status'] == 'FAIL':
recommendations.append("Fix Exa API connectivity issues")
elif exa_result['status'] == 'SKIP':
recommendations.append("Configure EXA_API_KEY for web search functionality")
if 'enhanced_unified_agno_agent' in self.test_results:
agent_result = self.test_results['enhanced_unified_agno_agent']
if agent_result['status'] == 'FAIL':
recommendations.append("Fix Enhanced Unified AGNO Agent initialization issues")
if not recommendations:
recommendations.append("Web search functionality is ready for deployment!")
summary_report = {
'overall_status': overall_status,
'test_summary': {
'total_tests': total_tests,
'passed': passed_tests,
'partial': partial_tests,
'failed': failed_tests,
'skipped': skipped_tests
},
'detailed_results': self.test_results,
'errors': self.errors,
'recommendations': recommendations,
'deployment_readiness': {
'web_search_ready': overall_status in ['FULLY_READY', 'MOSTLY_READY'],
'critical_issues': failed_tests,
'minor_issues': partial_tests + skipped_tests
}
}
# Log summary
logger.info("=" * 80)
logger.info("πŸ“Š WEB SEARCH FUNCTIONALITY VERIFICATION SUMMARY")
logger.info("=" * 80)
logger.info(f"Overall Status: {overall_status}")
logger.info(f"Tests: {passed_tests} passed, {partial_tests} partial, {failed_tests} failed, {skipped_tests} skipped")
logger.info(f"Web Search Ready: {summary_report['deployment_readiness']['web_search_ready']}")
if recommendations:
logger.info("\nπŸ“ Recommendations:")
for i, rec in enumerate(recommendations, 1):
logger.info(f" {i}. {rec}")
if self.errors:
logger.info(f"\n❌ Errors encountered: {len(self.errors)}")
for error in self.errors:
logger.error(f" - {error}")
logger.info("=" * 80)
return summary_report
def main():
"""Main function to run web search functionality verification."""
print("πŸš€ GAIA Enhanced Agent - Web Search Functionality Verification")
print("=" * 80)
try:
# Initialize tester
tester = WebSearchFunctionalityTester()
# Run all tests
summary_report = tester.run_all_tests()
# Print final status
print("\n" + "=" * 80)
print("🎯 FINAL VERIFICATION RESULT")
print("=" * 80)
overall_status = summary_report['overall_status']
web_search_ready = summary_report['deployment_readiness']['web_search_ready']
if overall_status == 'FULLY_READY':
print("βœ… WEB SEARCH FUNCTIONALITY: FULLY READY FOR GAIA EVALUATION")
elif overall_status == 'MOSTLY_READY':
print("βœ… WEB SEARCH FUNCTIONALITY: MOSTLY READY FOR GAIA EVALUATION")
elif overall_status == 'PARTIALLY_READY':
print("⚠️ WEB SEARCH FUNCTIONALITY: PARTIALLY READY - SOME ISSUES NEED ATTENTION")
else:
print("❌ WEB SEARCH FUNCTIONALITY: NOT READY - CRITICAL ISSUES NEED RESOLUTION")
print(f"Deployment Ready: {'YES' if web_search_ready else 'NO'}")
print(f"Critical Issues: {summary_report['deployment_readiness']['critical_issues']}")
print(f"Minor Issues: {summary_report['deployment_readiness']['minor_issues']}")
return 0 if web_search_ready else 1
except Exception as e:
print(f"❌ Verification failed with error: {e}")
traceback.print_exc()
return 1
if __name__ == "__main__":
exit_code = main()
sys.exit(exit_code)