Spaces:

JoachimVC
/

gaia-enhanced-agent

Running

File size: 6,376 Bytes

9a6a4dc

#!/usr/bin/env python3
"""
Test script for the enhanced GAIA agent with new response processor.
"""

import os
import sys
import logging
from pathlib import Path

# Add the deployment-ready directory to the path
sys.path.insert(0, str(Path(__file__).parent))

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

def test_enhanced_agent():
    """Test the enhanced GAIA agent with various question types."""
    
    print("🚀 Testing Enhanced GAIA Agent with Response Processor")
    print("=" * 60)
    
    try:
        # Import the enhanced agent
        from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
        
        # Initialize the agent
        print("📦 Initializing Enhanced GAIA Agent...")
        agent = FixedGAIAAgent()
        
        if not agent.available:
            print("❌ Agent not available - check MISTRAL_API_KEY")
            return False
        
        print("✅ Agent initialized successfully")
        print(f"📊 Tools available: {len(agent.tools)}")
        
        # Test questions of different types
        test_questions = [
            {
                "question": "What is 25 * 17?",
                "type": "Mathematical",
                "expected_pattern": r"^\d+$"
            },
            {
                "question": "What is the capital of France?",
                "type": "Factual",
                "expected_pattern": r"^[A-Za-z\s]+$"
            },
            {
                "question": "How many continents are there?",
                "type": "Count",
                "expected_pattern": r"^\d+$"
            }
        ]
        
        print("\n🧪 Testing Response Processing...")
        print("-" * 40)
        
        for i, test_case in enumerate(test_questions, 1):
            print(f"\nTest {i}: {test_case['type']} Question")
            print(f"Question: {test_case['question']}")
            
            try:
                # Process the question
                answer = agent(test_case['question'])
                print(f"Answer: '{answer}'")
                
                # Validate the answer format
                import re
                if re.match(test_case['expected_pattern'], answer):
                    print("✅ Answer format valid")
                else:
                    print("⚠️ Answer format unexpected")
                
            except Exception as e:
                print(f"❌ Error processing question: {e}")
        
        # Get processor statistics
        print("\n📈 Response Processor Statistics:")
        print("-" * 40)
        stats = agent.get_processor_statistics()
        if stats:
            for key, value in stats.items():
                print(f"  {key}: {value}")
        else:
            print("  No statistics available")
        
        print("\n✅ Enhanced agent testing completed successfully!")
        return True
        
    except ImportError as e:
        print(f"❌ Import error: {e}")
        print("Make sure all dependencies are installed")
        return False
    except Exception as e:
        print(f"❌ Unexpected error: {e}")
        return False

def test_response_processor_only():
    """Test just the response processor without the full agent."""
    
    print("\n🧠 Testing Response Processor Standalone")
    print("=" * 60)
    
    try:
        from utils.response_processor import EnhancedResponseProcessor
        
        # Initialize processor
        processor = EnhancedResponseProcessor()
        print("✅ Response processor initialized")
        
        # Test responses
        test_responses = [
            {
                "response": "Let me calculate this. 25 * 17 = 425. FINAL ANSWER: 425",
                "question": "What is 25 * 17?",
                "expected": "425"
            },
            {
                "response": "The capital of France is Paris. FINAL ANSWER: Paris",
                "question": "What is the capital of France?",
                "expected": "Paris"
            },
            {
                "response": "After researching, I found that there are 7 continents on Earth. FINAL ANSWER: 7",
                "question": "How many continents are there?",
                "expected": "7"
            }
        ]
        
        print("\n🔍 Testing Answer Extraction...")
        print("-" * 40)
        
        for i, test_case in enumerate(test_responses, 1):
            print(f"\nTest {i}:")
            print(f"Question: {test_case['question']}")
            print(f"Response: {test_case['response'][:100]}...")
            
            # Extract answer
            result = processor.process_response(test_case['response'], test_case['question'])
            
            print(f"Extracted: '{result.answer}'")
            print(f"Expected: '{test_case['expected']}'")
            print(f"Strategy: {result.strategy.value}")
            print(f"Confidence: {result.confidence:.2f}")
            
            if result.answer == test_case['expected']:
                print("✅ Extraction correct")
            else:
                print("⚠️ Extraction differs from expected")
        
        # Get statistics
        print("\n📊 Processor Statistics:")
        print("-" * 40)
        stats = processor.get_statistics()
        for key, value in stats.items():
            print(f"  {key}: {value}")
        
        print("\n✅ Response processor testing completed!")
        return True
        
    except Exception as e:
        print(f"❌ Error testing response processor: {e}")
        return False

if __name__ == "__main__":
    print("🧪 Enhanced GAIA Agent Test Suite")
    print("=" * 60)
    
    # Test response processor standalone
    processor_success = test_response_processor_only()
    
    # Test full agent if API key is available
    if os.getenv("MISTRAL_API_KEY"):
        agent_success = test_enhanced_agent()
    else:
        print("\n⚠️ MISTRAL_API_KEY not found - skipping full agent test")
        agent_success = True  # Don't fail if no API key
    
    # Summary
    print("\n" + "=" * 60)
    if processor_success and agent_success:
        print("🎉 All tests completed successfully!")
        sys.exit(0)
    else:
        print("❌ Some tests failed")
        sys.exit(1)