#!/usr/bin/env python3 """ Test script for the enhanced GAIA agent with new response processor. """ import os import sys import logging from pathlib import Path # Add the deployment-ready directory to the path sys.path.insert(0, str(Path(__file__).parent)) # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) def test_enhanced_agent(): """Test the enhanced GAIA agent with various question types.""" print("๐Ÿš€ Testing Enhanced GAIA Agent with Response Processor") print("=" * 60) try: # Import the enhanced agent from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent # Initialize the agent print("๐Ÿ“ฆ Initializing Enhanced GAIA Agent...") agent = FixedGAIAAgent() if not agent.available: print("โŒ Agent not available - check MISTRAL_API_KEY") return False print("โœ… Agent initialized successfully") print(f"๐Ÿ“Š Tools available: {len(agent.tools)}") # Test questions of different types test_questions = [ { "question": "What is 25 * 17?", "type": "Mathematical", "expected_pattern": r"^\d+$" }, { "question": "What is the capital of France?", "type": "Factual", "expected_pattern": r"^[A-Za-z\s]+$" }, { "question": "How many continents are there?", "type": "Count", "expected_pattern": r"^\d+$" } ] print("\n๐Ÿงช Testing Response Processing...") print("-" * 40) for i, test_case in enumerate(test_questions, 1): print(f"\nTest {i}: {test_case['type']} Question") print(f"Question: {test_case['question']}") try: # Process the question answer = agent(test_case['question']) print(f"Answer: '{answer}'") # Validate the answer format import re if re.match(test_case['expected_pattern'], answer): print("โœ… Answer format valid") else: print("โš ๏ธ Answer format unexpected") except Exception as e: print(f"โŒ Error processing question: {e}") # Get processor statistics print("\n๐Ÿ“ˆ Response Processor Statistics:") print("-" * 40) stats = agent.get_processor_statistics() if stats: for key, value in stats.items(): print(f" {key}: {value}") else: print(" No statistics available") print("\nโœ… Enhanced agent testing completed successfully!") return True except ImportError as e: print(f"โŒ Import error: {e}") print("Make sure all dependencies are installed") return False except Exception as e: print(f"โŒ Unexpected error: {e}") return False def test_response_processor_only(): """Test just the response processor without the full agent.""" print("\n๐Ÿง  Testing Response Processor Standalone") print("=" * 60) try: from utils.response_processor import EnhancedResponseProcessor # Initialize processor processor = EnhancedResponseProcessor() print("โœ… Response processor initialized") # Test responses test_responses = [ { "response": "Let me calculate this. 25 * 17 = 425. FINAL ANSWER: 425", "question": "What is 25 * 17?", "expected": "425" }, { "response": "The capital of France is Paris. FINAL ANSWER: Paris", "question": "What is the capital of France?", "expected": "Paris" }, { "response": "After researching, I found that there are 7 continents on Earth. FINAL ANSWER: 7", "question": "How many continents are there?", "expected": "7" } ] print("\n๐Ÿ” Testing Answer Extraction...") print("-" * 40) for i, test_case in enumerate(test_responses, 1): print(f"\nTest {i}:") print(f"Question: {test_case['question']}") print(f"Response: {test_case['response'][:100]}...") # Extract answer result = processor.process_response(test_case['response'], test_case['question']) print(f"Extracted: '{result.answer}'") print(f"Expected: '{test_case['expected']}'") print(f"Strategy: {result.strategy.value}") print(f"Confidence: {result.confidence:.2f}") if result.answer == test_case['expected']: print("โœ… Extraction correct") else: print("โš ๏ธ Extraction differs from expected") # Get statistics print("\n๐Ÿ“Š Processor Statistics:") print("-" * 40) stats = processor.get_statistics() for key, value in stats.items(): print(f" {key}: {value}") print("\nโœ… Response processor testing completed!") return True except Exception as e: print(f"โŒ Error testing response processor: {e}") return False if __name__ == "__main__": print("๐Ÿงช Enhanced GAIA Agent Test Suite") print("=" * 60) # Test response processor standalone processor_success = test_response_processor_only() # Test full agent if API key is available if os.getenv("MISTRAL_API_KEY"): agent_success = test_enhanced_agent() else: print("\nโš ๏ธ MISTRAL_API_KEY not found - skipping full agent test") agent_success = True # Don't fail if no API key # Summary print("\n" + "=" * 60) if processor_success and agent_success: print("๐ŸŽ‰ All tests completed successfully!") sys.exit(0) else: print("โŒ Some tests failed") sys.exit(1)