#!/usr/bin/env python3
"""
Test script to validate the fixed GAIA agent improvements.
This script tests the key fixes that should improve the 5/20 evaluation score.
"""

import os
import sys
import traceback
from pathlib import Path

# Add the deployment-ready directory to the path
sys.path.insert(0, str(Path(__file__).parent))

def load_env_file():
    """Load environment variables from .env file if it exists."""
    env_file = Path('.env')
    if env_file.exists():
        with open(env_file, 'r') as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith('#') and '=' in line:
                    key, value = line.split('=', 1)
                    os.environ[key.strip()] = value.strip()

# Load environment variables
load_env_file()

def test_answer_formatter():
    """Test the fixed answer formatter."""
    print("\n" + "="*50)
    print("🧪 Testing Fixed Answer Formatter")
    print("="*50)
    
    try:
        from utils.fixed_answer_formatter import FixedGAIAAnswerFormatter
        formatter = FixedGAIAAnswerFormatter()
        
        # Test cases that should work
        test_cases = [
            {
                'input': 'Let me calculate this. The answer is 42. FINAL ANSWER: 42',
                'expected': '42',
                'description': 'Basic FINAL ANSWER format'
            },
            {
                'input': 'After analysis, I found the result. FINAL ANSWER: Paris',
                'expected': 'Paris',
                'description': 'Text answer with FINAL ANSWER'
            },
            {
                'input': 'FINAL ANSWER: blue, green, red',
                'expected': 'blue, green, red',
                'description': 'List format'
            },
            {
                'input': 'The calculation shows 1234 FINAL ANSWER: 1234',
                'expected': '1234',
                'description': 'Number without commas'
            },
            {
                'input': 'No final answer format here, just 25',
                'expected': '25',
                'description': 'Fallback extraction'
            }
        ]
        
        all_passed = True
        for i, test_case in enumerate(test_cases, 1):
            result = formatter.format_answer(test_case['input'], "test question")
            expected = test_case['expected']
            passed = result == expected
            all_passed = all_passed and passed
            
            status = "✅ PASS" if passed else "❌ FAIL"
            print(f"Test {i}: {status} - {test_case['description']}")
            print(f"  Input: {test_case['input'][:50]}...")
            print(f"  Expected: '{expected}'")
            print(f"  Got: '{result}'")
            print()
        
        if all_passed:
            print("✅ All answer formatter tests passed!")
        else:
            print("❌ Some answer formatter tests failed!")
        
        return all_passed
        
    except Exception as e:
        print(f"❌ Error testing answer formatter: {e}")
        traceback.print_exc()
        return False

def test_fixed_agent_import():
    """Test importing the fixed agent."""
    print("\n" + "="*50)
    print("🧪 Testing Fixed Agent Import")
    print("="*50)
    
    try:
        from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent, get_agent_status
        print("✅ Successfully imported FixedGAIAAgent")
        
        # Test agent status function
        status = get_agent_status()
        print(f"📊 Agent Status: {status}")
        
        return True
        
    except Exception as e:
        print(f"❌ Error importing fixed agent: {e}")
        traceback.print_exc()
        return False

def test_fixed_agent_initialization():
    """Test initializing the fixed agent."""
    print("\n" + "="*50)
    print("🧪 Testing Fixed Agent Initialization")
    print("="*50)
    
    try:
        from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
        
        # Check for required API key
        mistral_key = os.getenv("MISTRAL_API_KEY")
        if not mistral_key:
            print("⚠️ MISTRAL_API_KEY not found - agent will not be fully functional")
            print("💡 Set MISTRAL_API_KEY in .env file for full testing")
            return False
        
        print("✅ MISTRAL_API_KEY found")
        
        # Initialize agent
        agent = FixedGAIAAgent()
        
        if agent.available:
            print("✅ Fixed agent initialized successfully")
            status = agent.get_tool_status()
            print(f"📊 Tool Status: {status}")
            return True
        else:
            print("❌ Fixed agent initialization failed")
            return False
        
    except Exception as e:
        print(f"❌ Error initializing fixed agent: {e}")
        traceback.print_exc()
        return False

def test_fixed_agent_simple_question():
    """Test the fixed agent with a simple question."""
    print("\n" + "="*50)
    print("🧪 Testing Fixed Agent with Simple Question")
    print("="*50)
    
    try:
        from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
        
        # Check for required API key
        mistral_key = os.getenv("MISTRAL_API_KEY")
        if not mistral_key:
            print("⚠️ MISTRAL_API_KEY not found - skipping agent test")
            return False
        
        # Initialize agent
        agent = FixedGAIAAgent()
        
        if not agent.available:
            print("❌ Agent not available - skipping test")
            return False
        
        # Test with a simple math question
        test_question = "What is 25 * 17?"
        print(f"🤔 Testing question: {test_question}")
        
        answer = agent(test_question)
        print(f"🎯 Agent answer: '{answer}'")
        
        # Check if answer looks reasonable
        if answer and answer != "unknown" and "425" in answer:
            print("✅ Agent provided reasonable answer")
            return True
        else:
            print("❌ Agent answer doesn't look correct")
            return False
        
    except Exception as e:
        print(f"❌ Error testing fixed agent: {e}")
        traceback.print_exc()
        return False

def test_app_integration():
    """Test the app integration with fixed agent."""
    print("\n" + "="*50)
    print("🧪 Testing App Integration")
    print("="*50)
    
    try:
        # Import the app module
        import app
        
        print("✅ Successfully imported app module")
        
        # Check if fixed agent is available
        if hasattr(app, 'FIXED_AGNO_AVAILABLE') and app.FIXED_AGNO_AVAILABLE:
            print("✅ Fixed AGNO agent available in app")
        else:
            print("⚠️ Fixed AGNO agent not available in app")
        
        return True
        
    except Exception as e:
        print(f"❌ Error testing app integration: {e}")
        traceback.print_exc()
        return False

def main():
    """Run all tests."""
    print("🚀 Starting Fixed GAIA Agent Test Suite")
    print("This validates the fixes for the 5/20 evaluation score issue")
    
    tests = [
        ("Answer Formatter", test_answer_formatter),
        ("Fixed Agent Import", test_fixed_agent_import),
        ("Fixed Agent Initialization", test_fixed_agent_initialization),
        ("Simple Question Test", test_fixed_agent_simple_question),
        ("App Integration", test_app_integration),
    ]
    
    results = []
    for test_name, test_func in tests:
        try:
            result = test_func()
            results.append((test_name, result))
        except Exception as e:
            print(f"❌ Test '{test_name}' crashed: {e}")
            results.append((test_name, False))
    
    # Summary
    print("\n" + "="*50)
    print("📊 Test Results Summary")
    print("="*50)
    
    passed = 0
    total = len(results)
    
    for test_name, result in results:
        status = "✅ PASS" if result else "❌ FAIL"
        print(f"{status} {test_name}")
        if result:
            passed += 1
    
    print(f"\n🎯 Overall: {passed}/{total} tests passed")
    
    if passed == total:
        print("🎉 All tests passed! The fixes should improve evaluation performance.")
    elif passed >= total * 0.8:
        print("⚠️ Most tests passed. Some issues may remain.")
    else:
        print("❌ Many tests failed. Significant issues remain.")
    
    return passed == total

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)