Spaces:
Running
Running
File size: 6,376 Bytes
9a6a4dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
#!/usr/bin/env python3
"""
Test script for the enhanced GAIA agent with new response processor.
"""
import os
import sys
import logging
from pathlib import Path
# Add the deployment-ready directory to the path
sys.path.insert(0, str(Path(__file__).parent))
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
def test_enhanced_agent():
"""Test the enhanced GAIA agent with various question types."""
print("π Testing Enhanced GAIA Agent with Response Processor")
print("=" * 60)
try:
# Import the enhanced agent
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
# Initialize the agent
print("π¦ Initializing Enhanced GAIA Agent...")
agent = FixedGAIAAgent()
if not agent.available:
print("β Agent not available - check MISTRAL_API_KEY")
return False
print("β
Agent initialized successfully")
print(f"π Tools available: {len(agent.tools)}")
# Test questions of different types
test_questions = [
{
"question": "What is 25 * 17?",
"type": "Mathematical",
"expected_pattern": r"^\d+$"
},
{
"question": "What is the capital of France?",
"type": "Factual",
"expected_pattern": r"^[A-Za-z\s]+$"
},
{
"question": "How many continents are there?",
"type": "Count",
"expected_pattern": r"^\d+$"
}
]
print("\nπ§ͺ Testing Response Processing...")
print("-" * 40)
for i, test_case in enumerate(test_questions, 1):
print(f"\nTest {i}: {test_case['type']} Question")
print(f"Question: {test_case['question']}")
try:
# Process the question
answer = agent(test_case['question'])
print(f"Answer: '{answer}'")
# Validate the answer format
import re
if re.match(test_case['expected_pattern'], answer):
print("β
Answer format valid")
else:
print("β οΈ Answer format unexpected")
except Exception as e:
print(f"β Error processing question: {e}")
# Get processor statistics
print("\nπ Response Processor Statistics:")
print("-" * 40)
stats = agent.get_processor_statistics()
if stats:
for key, value in stats.items():
print(f" {key}: {value}")
else:
print(" No statistics available")
print("\nβ
Enhanced agent testing completed successfully!")
return True
except ImportError as e:
print(f"β Import error: {e}")
print("Make sure all dependencies are installed")
return False
except Exception as e:
print(f"β Unexpected error: {e}")
return False
def test_response_processor_only():
"""Test just the response processor without the full agent."""
print("\nπ§ Testing Response Processor Standalone")
print("=" * 60)
try:
from utils.response_processor import EnhancedResponseProcessor
# Initialize processor
processor = EnhancedResponseProcessor()
print("β
Response processor initialized")
# Test responses
test_responses = [
{
"response": "Let me calculate this. 25 * 17 = 425. FINAL ANSWER: 425",
"question": "What is 25 * 17?",
"expected": "425"
},
{
"response": "The capital of France is Paris. FINAL ANSWER: Paris",
"question": "What is the capital of France?",
"expected": "Paris"
},
{
"response": "After researching, I found that there are 7 continents on Earth. FINAL ANSWER: 7",
"question": "How many continents are there?",
"expected": "7"
}
]
print("\nπ Testing Answer Extraction...")
print("-" * 40)
for i, test_case in enumerate(test_responses, 1):
print(f"\nTest {i}:")
print(f"Question: {test_case['question']}")
print(f"Response: {test_case['response'][:100]}...")
# Extract answer
result = processor.process_response(test_case['response'], test_case['question'])
print(f"Extracted: '{result.answer}'")
print(f"Expected: '{test_case['expected']}'")
print(f"Strategy: {result.strategy.value}")
print(f"Confidence: {result.confidence:.2f}")
if result.answer == test_case['expected']:
print("β
Extraction correct")
else:
print("β οΈ Extraction differs from expected")
# Get statistics
print("\nπ Processor Statistics:")
print("-" * 40)
stats = processor.get_statistics()
for key, value in stats.items():
print(f" {key}: {value}")
print("\nβ
Response processor testing completed!")
return True
except Exception as e:
print(f"β Error testing response processor: {e}")
return False
if __name__ == "__main__":
print("π§ͺ Enhanced GAIA Agent Test Suite")
print("=" * 60)
# Test response processor standalone
processor_success = test_response_processor_only()
# Test full agent if API key is available
if os.getenv("MISTRAL_API_KEY"):
agent_success = test_enhanced_agent()
else:
print("\nβ οΈ MISTRAL_API_KEY not found - skipping full agent test")
agent_success = True # Don't fail if no API key
# Summary
print("\n" + "=" * 60)
if processor_success and agent_success:
print("π All tests completed successfully!")
sys.exit(0)
else:
print("β Some tests failed")
sys.exit(1) |