Spaces:

JoachimVC
/

gaia-enhanced-agent

Running

gaia-enhanced-agent / test_web_search_functionality.py

GAIA Agent Deployment

Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements

9a6a4dc about 2 months ago

25.6 kB

	#!/usr/bin/env python3
	"""
	Web Search Functionality Verification for GAIA Enhanced Agent

	This script comprehensively tests the web search capabilities of the deployment-ready
	GAIA Enhanced Agent to ensure it's ready for GAIA benchmark evaluation.

	Tests include:
	1. Environment configuration verification
	2. Exa API connectivity and authentication
	3. AGNO tools initialization and web search tool availability
	4. End-to-end web search workflow testing
	5. Integration with the enhanced unified AGNO agent
	"""

	import os
	import sys
	import logging
	import traceback
	from pathlib import Path
	from typing import Dict, Any, List

	# Setup logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	def load_env_file():
	"""Load environment variables from .env file if it exists."""
	env_file = Path('.env')
	if env_file.exists():
	with open(env_file, 'r') as f:
	for line in f:
	line = line.strip()
	if line and not line.startswith('#') and '=' in line:
	key, value = line.split('=', 1)
	os.environ[key.strip()] = value.strip()

	# Load environment variables
	load_env_file()

	class WebSearchFunctionalityTester:
	"""Comprehensive tester for web search functionality in GAIA Enhanced Agent."""

	def __init__(self):
	"""Initialize the web search functionality tester."""
	self.test_results = {}
	self.errors = []

	def run_all_tests(self) -> Dict[str, Any]:
	"""Run all web search functionality tests."""
	logger.info("🚀 Starting comprehensive web search functionality verification...")

	# Test 1: Environment Configuration
	self.test_environment_configuration()

	# Test 2: Exa API Connectivity
	self.test_exa_api_connectivity()

	# Test 3: AGNO Tools Initialization
	self.test_agno_tools_initialization()

	# Test 4: Enhanced Unified AGNO Agent
	self.test_enhanced_unified_agno_agent()

	# Test 5: End-to-End Web Search Workflow
	self.test_end_to_end_web_search()

	# Generate summary report
	return self.generate_summary_report()

	def test_environment_configuration(self):
	"""Test 1: Verify environment configuration for web search."""
	logger.info("🔧 Test 1: Environment Configuration Verification")

	try:
	# Check required API keys
	required_keys = {
	'MISTRAL_API_KEY': 'Mistral API for AGNO orchestration',
	'EXA_API_KEY': 'Exa API for advanced web search',
	'FIRECRAWL_API_KEY': 'Firecrawl API for web content extraction'
	}

	missing_keys = []
	configured_keys = []

	for key, description in required_keys.items():
	value = os.getenv(key)
	if value and value != 'your_api_key_here':
	configured_keys.append(f"{key}: {description}")
	logger.info(f"✅ {key} configured")
	else:
	missing_keys.append(f"{key}: {description}")
	logger.warning(f"⚠️ {key} not configured")

	# Check .env file existence
	env_file_exists = Path('.env').exists()
	logger.info(f"📄 .env file exists: {env_file_exists}")

	self.test_results['environment_configuration'] = {
	'status': 'PASS' if not missing_keys else 'PARTIAL',
	'configured_keys': configured_keys,
	'missing_keys': missing_keys,
	'env_file_exists': env_file_exists,
	'details': f"Configured: {len(configured_keys)}/{len(required_keys)} API keys"
	}

	if missing_keys:
	logger.warning(f"⚠️ Missing API keys may limit functionality: {missing_keys}")
	else:
	logger.info("✅ All required API keys configured")

	except Exception as e:
	self.test_results['environment_configuration'] = {
	'status': 'FAIL',
	'error': str(e),
	'details': 'Failed to verify environment configuration'
	}
	self.errors.append(f"Environment configuration test failed: {e}")
	logger.error(f"❌ Environment configuration test failed: {e}")

	def test_exa_api_connectivity(self):
	"""Test 2: Test Exa API connectivity and authentication."""
	logger.info("🌐 Test 2: Exa API Connectivity Test")

	try:
	exa_api_key = os.getenv('EXA_API_KEY')

	if not exa_api_key or exa_api_key == 'your_api_key_here':
	self.test_results['exa_api_connectivity'] = {
	'status': 'SKIP',
	'details': 'EXA_API_KEY not configured, skipping connectivity test'
	}
	logger.warning("⚠️ EXA_API_KEY not configured, skipping connectivity test")
	return

	# Test Exa API import and basic functionality
	try:
	from exa_py import Exa
	logger.info("✅ Exa Python library imported successfully")

	# Initialize Exa client
	exa_client = Exa(api_key=exa_api_key)
	logger.info("✅ Exa client initialized successfully")

	# Test basic search functionality
	test_query = "artificial intelligence recent developments"
	logger.info(f"🔍 Testing Exa search with query: '{test_query}'")

	search_results = exa_client.search(
	query=test_query,
	num_results=3,
	type="neural"
	)

	if search_results and hasattr(search_results, 'results') and search_results.results:
	result_count = len(search_results.results)
	logger.info(f"✅ Exa search successful: {result_count} results returned")

	# Log first result for verification
	first_result = search_results.results[0]
	logger.info(f"📄 First result: {first_result.title[:100]}...")

	self.test_results['exa_api_connectivity'] = {
	'status': 'PASS',
	'details': f'Exa API working correctly, returned {result_count} results',
	'test_query': test_query,
	'result_count': result_count,
	'first_result_title': first_result.title[:100]
	}
	else:
	self.test_results['exa_api_connectivity'] = {
	'status': 'FAIL',
	'details': 'Exa API returned no results or invalid response',
	'test_query': test_query
	}
	logger.error("❌ Exa API returned no results or invalid response")

	except ImportError as e:
	self.test_results['exa_api_connectivity'] = {
	'status': 'FAIL',
	'error': f'Exa library import failed: {e}',
	'details': 'exa-py library not available'
	}
	logger.error(f"❌ Exa library import failed: {e}")

	except Exception as e:
	self.test_results['exa_api_connectivity'] = {
	'status': 'FAIL',
	'error': str(e),
	'details': 'Exa API connectivity test failed'
	}
	self.errors.append(f"Exa API connectivity test failed: {e}")
	logger.error(f"❌ Exa API connectivity test failed: {e}")

	def test_agno_tools_initialization(self):
	"""Test 3: Test AGNO tools initialization including web search tools."""
	logger.info("🛠️ Test 3: AGNO Tools Initialization Test")

	try:
	# Test AGNO framework import
	try:
	from agno.tools.exa import ExaTools
	from agno.tools.firecrawl import FirecrawlTools
	logger.info("✅ AGNO web search tools imported successfully")
	except ImportError as e:
	self.test_results['agno_tools_initialization'] = {
	'status': 'FAIL',
	'error': f'AGNO tools import failed: {e}',
	'details': 'AGNO framework or web search tools not available'
	}
	logger.error(f"❌ AGNO tools import failed: {e}")
	return

	# Test Exa Tools initialization
	exa_api_key = os.getenv('EXA_API_KEY')
	if exa_api_key and exa_api_key != 'your_api_key_here':
	try:
	exa_tools = ExaTools(api_key=exa_api_key)
	logger.info("✅ AGNO ExaTools initialized successfully")
	exa_tools_status = "Available"
	except Exception as e:
	logger.warning(f"⚠️ AGNO ExaTools initialization failed: {e}")
	exa_tools_status = f"Failed: {e}"
	else:
	exa_tools_status = "Skipped (no API key)"
	logger.warning("⚠️ EXA_API_KEY not configured, skipping ExaTools initialization")

	# Test Firecrawl Tools initialization
	firecrawl_api_key = os.getenv('FIRECRAWL_API_KEY')
	if firecrawl_api_key and firecrawl_api_key != 'your_api_key_here':
	try:
	firecrawl_tools = FirecrawlTools(api_key=firecrawl_api_key)
	logger.info("✅ AGNO FirecrawlTools initialized successfully")
	firecrawl_tools_status = "Available"
	except Exception as e:
	logger.warning(f"⚠️ AGNO FirecrawlTools initialization failed: {e}")
	firecrawl_tools_status = f"Failed: {e}"
	else:
	firecrawl_tools_status = "Skipped (no API key)"
	logger.warning("⚠️ FIRECRAWL_API_KEY not configured, skipping FirecrawlTools initialization")

	# Determine overall status
	if "Available" in [exa_tools_status, firecrawl_tools_status]:
	overall_status = "PASS"
	details = "At least one web search tool available"
	elif "Failed" in [exa_tools_status, firecrawl_tools_status]:
	overall_status = "PARTIAL"
	details = "Some web search tools failed to initialize"
	else:
	overall_status = "SKIP"
	details = "No web search tools configured"

	self.test_results['agno_tools_initialization'] = {
	'status': overall_status,
	'details': details,
	'exa_tools_status': exa_tools_status,
	'firecrawl_tools_status': firecrawl_tools_status
	}

	except Exception as e:
	self.test_results['agno_tools_initialization'] = {
	'status': 'FAIL',
	'error': str(e),
	'details': 'AGNO tools initialization test failed'
	}
	self.errors.append(f"AGNO tools initialization test failed: {e}")
	logger.error(f"❌ AGNO tools initialization test failed: {e}")

	def test_enhanced_unified_agno_agent(self):
	"""Test 4: Test Enhanced Unified AGNO Agent initialization and web search integration."""
	logger.info("🤖 Test 4: Enhanced Unified AGNO Agent Test")

	try:
	# Import the Enhanced Unified AGNO Agent
	try:
	from agents.enhanced_unified_agno_agent import GAIAAgent
	logger.info("✅ Enhanced Unified AGNO Agent imported successfully")
	except ImportError as e:
	self.test_results['enhanced_unified_agno_agent'] = {
	'status': 'FAIL',
	'error': f'Enhanced Unified AGNO Agent import failed: {e}',
	'details': 'Agent module not available'
	}
	logger.error(f"❌ Enhanced Unified AGNO Agent import failed: {e}")
	return

	# Initialize the agent
	try:
	agent = GAIAAgent()
	logger.info("✅ Enhanced Unified AGNO Agent initialized successfully")

	# Check agent availability
	if hasattr(agent, 'available') and agent.available:
	logger.info("✅ Enhanced Unified AGNO Agent is available and ready")
	agent_status = "Available and ready"
	else:
	logger.warning("⚠️ Enhanced Unified AGNO Agent initialized but not available")
	agent_status = "Initialized but not available"

	# Check tool status
	if hasattr(agent, 'get_tool_status'):
	tool_status = agent.get_tool_status()
	web_search_tools = []

	for tool_name, status in tool_status.items():
	if tool_name in ['exa', 'firecrawl']:
	web_search_tools.append(f"{tool_name}: {status}")

	logger.info(f"🛠️ Web search tools status: {web_search_tools}")
	else:
	web_search_tools = ["Tool status method not available"]

	self.test_results['enhanced_unified_agno_agent'] = {
	'status': 'PASS' if agent.available else 'PARTIAL',
	'details': agent_status,
	'web_search_tools': web_search_tools,
	'agent_available': agent.available if hasattr(agent, 'available') else 'Unknown'
	}

	except Exception as e:
	self.test_results['enhanced_unified_agno_agent'] = {
	'status': 'FAIL',
	'error': str(e),
	'details': 'Enhanced Unified AGNO Agent initialization failed'
	}
	logger.error(f"❌ Enhanced Unified AGNO Agent initialization failed: {e}")

	except Exception as e:
	self.test_results['enhanced_unified_agno_agent'] = {
	'status': 'FAIL',
	'error': str(e),
	'details': 'Enhanced Unified AGNO Agent test failed'
	}
	self.errors.append(f"Enhanced Unified AGNO Agent test failed: {e}")
	logger.error(f"❌ Enhanced Unified AGNO Agent test failed: {e}")

	def test_end_to_end_web_search(self):
	"""Test 5: End-to-end web search workflow test."""
	logger.info("🔄 Test 5: End-to-End Web Search Workflow Test")

	try:
	# Check if we have the necessary components
	if 'enhanced_unified_agno_agent' not in self.test_results or \
	self.test_results['enhanced_unified_agno_agent']['status'] == 'FAIL':
	self.test_results['end_to_end_web_search'] = {
	'status': 'SKIP',
	'details': 'Enhanced Unified AGNO Agent not available, skipping end-to-end test'
	}
	logger.warning("⚠️ Enhanced Unified AGNO Agent not available, skipping end-to-end test")
	return

	# Import and initialize the agent
	from agents.enhanced_unified_agno_agent import GAIAAgent
	agent = GAIAAgent()

	if not (hasattr(agent, 'available') and agent.available):
	self.test_results['end_to_end_web_search'] = {
	'status': 'SKIP',
	'details': 'Enhanced Unified AGNO Agent not available for testing'
	}
	logger.warning("⚠️ Enhanced Unified AGNO Agent not available for testing")
	return

	# Test web search with a sample question that requires current information
	test_questions = [
	"What are the latest developments in artificial intelligence in 2024?",
	"Who is the current CEO of OpenAI?",
	"What is the latest version of Python as of 2024?"
	]

	test_results = []

	for i, question in enumerate(test_questions, 1):
	logger.info(f"🔍 Testing question {i}: {question}")

	try:
	# Process the question with the agent
	answer = agent(question)

	if answer and answer != "Agent not available" and answer != "Unable to process this question":
	logger.info(f"✅ Question {i} processed successfully")
	logger.info(f"📝 Answer preview: {answer[:200]}...")

	test_results.append({
	'question': question,
	'status': 'SUCCESS',
	'answer_preview': answer[:200],
	'answer_length': len(answer)
	})
	else:
	logger.warning(f"⚠️ Question {i} returned empty or error response")
	test_results.append({
	'question': question,
	'status': 'EMPTY_RESPONSE',
	'answer': answer
	})

	except Exception as e:
	logger.error(f"❌ Question {i} processing failed: {e}")
	test_results.append({
	'question': question,
	'status': 'ERROR',
	'error': str(e)
	})

	# Determine overall status
	successful_tests = sum(1 for result in test_results if result['status'] == 'SUCCESS')
	total_tests = len(test_questions)

	if successful_tests == total_tests:
	overall_status = 'PASS'
	details = f'All {total_tests} test questions processed successfully'
	elif successful_tests > 0:
	overall_status = 'PARTIAL'
	details = f'{successful_tests}/{total_tests} test questions processed successfully'
	else:
	overall_status = 'FAIL'
	details = 'No test questions processed successfully'

	self.test_results['end_to_end_web_search'] = {
	'status': overall_status,
	'details': details,
	'successful_tests': successful_tests,
	'total_tests': total_tests,
	'test_results': test_results
	}

	logger.info(f"📊 End-to-end test results: {successful_tests}/{total_tests} successful")

	except Exception as e:
	self.test_results['end_to_end_web_search'] = {
	'status': 'FAIL',
	'error': str(e),
	'details': 'End-to-end web search workflow test failed'
	}
	self.errors.append(f"End-to-end web search test failed: {e}")
	logger.error(f"❌ End-to-end web search test failed: {e}")

	def generate_summary_report(self) -> Dict[str, Any]:
	"""Generate a comprehensive summary report of all tests."""
	logger.info("📋 Generating comprehensive test summary report...")

	# Count test results
	passed_tests = sum(1 for result in self.test_results.values() if result['status'] == 'PASS')
	partial_tests = sum(1 for result in self.test_results.values() if result['status'] == 'PARTIAL')
	failed_tests = sum(1 for result in self.test_results.values() if result['status'] == 'FAIL')
	skipped_tests = sum(1 for result in self.test_results.values() if result['status'] == 'SKIP')
	total_tests = len(self.test_results)

	# Determine overall status
	if failed_tests == 0 and passed_tests > 0:
	if partial_tests == 0 and skipped_tests == 0:
	overall_status = 'FULLY_READY'
	else:
	overall_status = 'MOSTLY_READY'
	elif passed_tests > 0 or partial_tests > 0:
	overall_status = 'PARTIALLY_READY'
	else:
	overall_status = 'NOT_READY'

	# Generate recommendations
	recommendations = []

	if 'environment_configuration' in self.test_results:
	env_result = self.test_results['environment_configuration']
	if env_result['status'] != 'PASS' and 'missing_keys' in env_result:
	recommendations.append(f"Configure missing API keys: {env_result['missing_keys']}")

	if 'exa_api_connectivity' in self.test_results:
	exa_result = self.test_results['exa_api_connectivity']
	if exa_result['status'] == 'FAIL':
	recommendations.append("Fix Exa API connectivity issues")
	elif exa_result['status'] == 'SKIP':
	recommendations.append("Configure EXA_API_KEY for web search functionality")

	if 'enhanced_unified_agno_agent' in self.test_results:
	agent_result = self.test_results['enhanced_unified_agno_agent']
	if agent_result['status'] == 'FAIL':
	recommendations.append("Fix Enhanced Unified AGNO Agent initialization issues")

	if not recommendations:
	recommendations.append("Web search functionality is ready for deployment!")

	summary_report = {
	'overall_status': overall_status,
	'test_summary': {
	'total_tests': total_tests,
	'passed': passed_tests,
	'partial': partial_tests,
	'failed': failed_tests,
	'skipped': skipped_tests
	},
	'detailed_results': self.test_results,
	'errors': self.errors,
	'recommendations': recommendations,
	'deployment_readiness': {
	'web_search_ready': overall_status in ['FULLY_READY', 'MOSTLY_READY'],
	'critical_issues': failed_tests,
	'minor_issues': partial_tests + skipped_tests
	}
	}

	# Log summary
	logger.info("=" * 80)
	logger.info("📊 WEB SEARCH FUNCTIONALITY VERIFICATION SUMMARY")
	logger.info("=" * 80)
	logger.info(f"Overall Status: {overall_status}")
	logger.info(f"Tests: {passed_tests} passed, {partial_tests} partial, {failed_tests} failed, {skipped_tests} skipped")
	logger.info(f"Web Search Ready: {summary_report['deployment_readiness']['web_search_ready']}")

	if recommendations:
	logger.info("\n📝 Recommendations:")
	for i, rec in enumerate(recommendations, 1):
	logger.info(f" {i}. {rec}")

	if self.errors:
	logger.info(f"\n❌ Errors encountered: {len(self.errors)}")
	for error in self.errors:
	logger.error(f" - {error}")

	logger.info("=" * 80)

	return summary_report

	def main():
	"""Main function to run web search functionality verification."""
	print("🚀 GAIA Enhanced Agent - Web Search Functionality Verification")
	print("=" * 80)

	try:
	# Initialize tester
	tester = WebSearchFunctionalityTester()

	# Run all tests
	summary_report = tester.run_all_tests()

	# Print final status
	print("\n" + "=" * 80)
	print("🎯 FINAL VERIFICATION RESULT")
	print("=" * 80)

	overall_status = summary_report['overall_status']
	web_search_ready = summary_report['deployment_readiness']['web_search_ready']

	if overall_status == 'FULLY_READY':
	print("✅ WEB SEARCH FUNCTIONALITY: FULLY READY FOR GAIA EVALUATION")
	elif overall_status == 'MOSTLY_READY':
	print("✅ WEB SEARCH FUNCTIONALITY: MOSTLY READY FOR GAIA EVALUATION")
	elif overall_status == 'PARTIALLY_READY':
	print("⚠️ WEB SEARCH FUNCTIONALITY: PARTIALLY READY - SOME ISSUES NEED ATTENTION")
	else:
	print("❌ WEB SEARCH FUNCTIONALITY: NOT READY - CRITICAL ISSUES NEED RESOLUTION")

	print(f"Deployment Ready: {'YES' if web_search_ready else 'NO'}")
	print(f"Critical Issues: {summary_report['deployment_readiness']['critical_issues']}")
	print(f"Minor Issues: {summary_report['deployment_readiness']['minor_issues']}")

	return 0 if web_search_ready else 1

	except Exception as e:
	print(f"❌ Verification failed with error: {e}")
	traceback.print_exc()
	return 1

	if __name__ == "__main__":
	exit_code = main()
	sys.exit(exit_code)