Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Test script for the enhanced GAIA agent with new response processor. | |
""" | |
import os | |
import sys | |
import logging | |
from pathlib import Path | |
# Add the deployment-ready directory to the path | |
sys.path.insert(0, str(Path(__file__).parent)) | |
# Set up logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
) | |
def test_enhanced_agent(): | |
"""Test the enhanced GAIA agent with various question types.""" | |
print("π Testing Enhanced GAIA Agent with Response Processor") | |
print("=" * 60) | |
try: | |
# Import the enhanced agent | |
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent | |
# Initialize the agent | |
print("π¦ Initializing Enhanced GAIA Agent...") | |
agent = FixedGAIAAgent() | |
if not agent.available: | |
print("β Agent not available - check MISTRAL_API_KEY") | |
return False | |
print("β Agent initialized successfully") | |
print(f"π Tools available: {len(agent.tools)}") | |
# Test questions of different types | |
test_questions = [ | |
{ | |
"question": "What is 25 * 17?", | |
"type": "Mathematical", | |
"expected_pattern": r"^\d+$" | |
}, | |
{ | |
"question": "What is the capital of France?", | |
"type": "Factual", | |
"expected_pattern": r"^[A-Za-z\s]+$" | |
}, | |
{ | |
"question": "How many continents are there?", | |
"type": "Count", | |
"expected_pattern": r"^\d+$" | |
} | |
] | |
print("\nπ§ͺ Testing Response Processing...") | |
print("-" * 40) | |
for i, test_case in enumerate(test_questions, 1): | |
print(f"\nTest {i}: {test_case['type']} Question") | |
print(f"Question: {test_case['question']}") | |
try: | |
# Process the question | |
answer = agent(test_case['question']) | |
print(f"Answer: '{answer}'") | |
# Validate the answer format | |
import re | |
if re.match(test_case['expected_pattern'], answer): | |
print("β Answer format valid") | |
else: | |
print("β οΈ Answer format unexpected") | |
except Exception as e: | |
print(f"β Error processing question: {e}") | |
# Get processor statistics | |
print("\nπ Response Processor Statistics:") | |
print("-" * 40) | |
stats = agent.get_processor_statistics() | |
if stats: | |
for key, value in stats.items(): | |
print(f" {key}: {value}") | |
else: | |
print(" No statistics available") | |
print("\nβ Enhanced agent testing completed successfully!") | |
return True | |
except ImportError as e: | |
print(f"β Import error: {e}") | |
print("Make sure all dependencies are installed") | |
return False | |
except Exception as e: | |
print(f"β Unexpected error: {e}") | |
return False | |
def test_response_processor_only(): | |
"""Test just the response processor without the full agent.""" | |
print("\nπ§ Testing Response Processor Standalone") | |
print("=" * 60) | |
try: | |
from utils.response_processor import EnhancedResponseProcessor | |
# Initialize processor | |
processor = EnhancedResponseProcessor() | |
print("β Response processor initialized") | |
# Test responses | |
test_responses = [ | |
{ | |
"response": "Let me calculate this. 25 * 17 = 425. FINAL ANSWER: 425", | |
"question": "What is 25 * 17?", | |
"expected": "425" | |
}, | |
{ | |
"response": "The capital of France is Paris. FINAL ANSWER: Paris", | |
"question": "What is the capital of France?", | |
"expected": "Paris" | |
}, | |
{ | |
"response": "After researching, I found that there are 7 continents on Earth. FINAL ANSWER: 7", | |
"question": "How many continents are there?", | |
"expected": "7" | |
} | |
] | |
print("\nπ Testing Answer Extraction...") | |
print("-" * 40) | |
for i, test_case in enumerate(test_responses, 1): | |
print(f"\nTest {i}:") | |
print(f"Question: {test_case['question']}") | |
print(f"Response: {test_case['response'][:100]}...") | |
# Extract answer | |
result = processor.process_response(test_case['response'], test_case['question']) | |
print(f"Extracted: '{result.answer}'") | |
print(f"Expected: '{test_case['expected']}'") | |
print(f"Strategy: {result.strategy.value}") | |
print(f"Confidence: {result.confidence:.2f}") | |
if result.answer == test_case['expected']: | |
print("β Extraction correct") | |
else: | |
print("β οΈ Extraction differs from expected") | |
# Get statistics | |
print("\nπ Processor Statistics:") | |
print("-" * 40) | |
stats = processor.get_statistics() | |
for key, value in stats.items(): | |
print(f" {key}: {value}") | |
print("\nβ Response processor testing completed!") | |
return True | |
except Exception as e: | |
print(f"β Error testing response processor: {e}") | |
return False | |
if __name__ == "__main__": | |
print("π§ͺ Enhanced GAIA Agent Test Suite") | |
print("=" * 60) | |
# Test response processor standalone | |
processor_success = test_response_processor_only() | |
# Test full agent if API key is available | |
if os.getenv("MISTRAL_API_KEY"): | |
agent_success = test_enhanced_agent() | |
else: | |
print("\nβ οΈ MISTRAL_API_KEY not found - skipping full agent test") | |
agent_success = True # Don't fail if no API key | |
# Summary | |
print("\n" + "=" * 60) | |
if processor_success and agent_success: | |
print("π All tests completed successfully!") | |
sys.exit(0) | |
else: | |
print("β Some tests failed") | |
sys.exit(1) |