gaia-enhanced-agent / test_enhanced_agent.py
GAIA Agent Deployment
Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements
9a6a4dc
#!/usr/bin/env python3
"""
Test script for the enhanced GAIA agent with new response processor.
"""
import os
import sys
import logging
from pathlib import Path
# Add the deployment-ready directory to the path
sys.path.insert(0, str(Path(__file__).parent))
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
def test_enhanced_agent():
"""Test the enhanced GAIA agent with various question types."""
print("πŸš€ Testing Enhanced GAIA Agent with Response Processor")
print("=" * 60)
try:
# Import the enhanced agent
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
# Initialize the agent
print("πŸ“¦ Initializing Enhanced GAIA Agent...")
agent = FixedGAIAAgent()
if not agent.available:
print("❌ Agent not available - check MISTRAL_API_KEY")
return False
print("βœ… Agent initialized successfully")
print(f"πŸ“Š Tools available: {len(agent.tools)}")
# Test questions of different types
test_questions = [
{
"question": "What is 25 * 17?",
"type": "Mathematical",
"expected_pattern": r"^\d+$"
},
{
"question": "What is the capital of France?",
"type": "Factual",
"expected_pattern": r"^[A-Za-z\s]+$"
},
{
"question": "How many continents are there?",
"type": "Count",
"expected_pattern": r"^\d+$"
}
]
print("\nπŸ§ͺ Testing Response Processing...")
print("-" * 40)
for i, test_case in enumerate(test_questions, 1):
print(f"\nTest {i}: {test_case['type']} Question")
print(f"Question: {test_case['question']}")
try:
# Process the question
answer = agent(test_case['question'])
print(f"Answer: '{answer}'")
# Validate the answer format
import re
if re.match(test_case['expected_pattern'], answer):
print("βœ… Answer format valid")
else:
print("⚠️ Answer format unexpected")
except Exception as e:
print(f"❌ Error processing question: {e}")
# Get processor statistics
print("\nπŸ“ˆ Response Processor Statistics:")
print("-" * 40)
stats = agent.get_processor_statistics()
if stats:
for key, value in stats.items():
print(f" {key}: {value}")
else:
print(" No statistics available")
print("\nβœ… Enhanced agent testing completed successfully!")
return True
except ImportError as e:
print(f"❌ Import error: {e}")
print("Make sure all dependencies are installed")
return False
except Exception as e:
print(f"❌ Unexpected error: {e}")
return False
def test_response_processor_only():
"""Test just the response processor without the full agent."""
print("\n🧠 Testing Response Processor Standalone")
print("=" * 60)
try:
from utils.response_processor import EnhancedResponseProcessor
# Initialize processor
processor = EnhancedResponseProcessor()
print("βœ… Response processor initialized")
# Test responses
test_responses = [
{
"response": "Let me calculate this. 25 * 17 = 425. FINAL ANSWER: 425",
"question": "What is 25 * 17?",
"expected": "425"
},
{
"response": "The capital of France is Paris. FINAL ANSWER: Paris",
"question": "What is the capital of France?",
"expected": "Paris"
},
{
"response": "After researching, I found that there are 7 continents on Earth. FINAL ANSWER: 7",
"question": "How many continents are there?",
"expected": "7"
}
]
print("\nπŸ” Testing Answer Extraction...")
print("-" * 40)
for i, test_case in enumerate(test_responses, 1):
print(f"\nTest {i}:")
print(f"Question: {test_case['question']}")
print(f"Response: {test_case['response'][:100]}...")
# Extract answer
result = processor.process_response(test_case['response'], test_case['question'])
print(f"Extracted: '{result.answer}'")
print(f"Expected: '{test_case['expected']}'")
print(f"Strategy: {result.strategy.value}")
print(f"Confidence: {result.confidence:.2f}")
if result.answer == test_case['expected']:
print("βœ… Extraction correct")
else:
print("⚠️ Extraction differs from expected")
# Get statistics
print("\nπŸ“Š Processor Statistics:")
print("-" * 40)
stats = processor.get_statistics()
for key, value in stats.items():
print(f" {key}: {value}")
print("\nβœ… Response processor testing completed!")
return True
except Exception as e:
print(f"❌ Error testing response processor: {e}")
return False
if __name__ == "__main__":
print("πŸ§ͺ Enhanced GAIA Agent Test Suite")
print("=" * 60)
# Test response processor standalone
processor_success = test_response_processor_only()
# Test full agent if API key is available
if os.getenv("MISTRAL_API_KEY"):
agent_success = test_enhanced_agent()
else:
print("\n⚠️ MISTRAL_API_KEY not found - skipping full agent test")
agent_success = True # Don't fail if no API key
# Summary
print("\n" + "=" * 60)
if processor_success and agent_success:
print("πŸŽ‰ All tests completed successfully!")
sys.exit(0)
else:
print("❌ Some tests failed")
sys.exit(1)