gaia-enhanced-agent / test_fixed_agent.py
GAIA Agent Deployment
Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements
9a6a4dc
#!/usr/bin/env python3
"""
Test script to validate the fixed GAIA agent improvements.
This script tests the key fixes that should improve the 5/20 evaluation score.
"""
import os
import sys
import traceback
from pathlib import Path
# Add the deployment-ready directory to the path
sys.path.insert(0, str(Path(__file__).parent))
def load_env_file():
"""Load environment variables from .env file if it exists."""
env_file = Path('.env')
if env_file.exists():
with open(env_file, 'r') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, value = line.split('=', 1)
os.environ[key.strip()] = value.strip()
# Load environment variables
load_env_file()
def test_answer_formatter():
"""Test the fixed answer formatter."""
print("\n" + "="*50)
print("πŸ§ͺ Testing Fixed Answer Formatter")
print("="*50)
try:
from utils.fixed_answer_formatter import FixedGAIAAnswerFormatter
formatter = FixedGAIAAnswerFormatter()
# Test cases that should work
test_cases = [
{
'input': 'Let me calculate this. The answer is 42. FINAL ANSWER: 42',
'expected': '42',
'description': 'Basic FINAL ANSWER format'
},
{
'input': 'After analysis, I found the result. FINAL ANSWER: Paris',
'expected': 'Paris',
'description': 'Text answer with FINAL ANSWER'
},
{
'input': 'FINAL ANSWER: blue, green, red',
'expected': 'blue, green, red',
'description': 'List format'
},
{
'input': 'The calculation shows 1234 FINAL ANSWER: 1234',
'expected': '1234',
'description': 'Number without commas'
},
{
'input': 'No final answer format here, just 25',
'expected': '25',
'description': 'Fallback extraction'
}
]
all_passed = True
for i, test_case in enumerate(test_cases, 1):
result = formatter.format_answer(test_case['input'], "test question")
expected = test_case['expected']
passed = result == expected
all_passed = all_passed and passed
status = "βœ… PASS" if passed else "❌ FAIL"
print(f"Test {i}: {status} - {test_case['description']}")
print(f" Input: {test_case['input'][:50]}...")
print(f" Expected: '{expected}'")
print(f" Got: '{result}'")
print()
if all_passed:
print("βœ… All answer formatter tests passed!")
else:
print("❌ Some answer formatter tests failed!")
return all_passed
except Exception as e:
print(f"❌ Error testing answer formatter: {e}")
traceback.print_exc()
return False
def test_fixed_agent_import():
"""Test importing the fixed agent."""
print("\n" + "="*50)
print("πŸ§ͺ Testing Fixed Agent Import")
print("="*50)
try:
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent, get_agent_status
print("βœ… Successfully imported FixedGAIAAgent")
# Test agent status function
status = get_agent_status()
print(f"πŸ“Š Agent Status: {status}")
return True
except Exception as e:
print(f"❌ Error importing fixed agent: {e}")
traceback.print_exc()
return False
def test_fixed_agent_initialization():
"""Test initializing the fixed agent."""
print("\n" + "="*50)
print("πŸ§ͺ Testing Fixed Agent Initialization")
print("="*50)
try:
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
# Check for required API key
mistral_key = os.getenv("MISTRAL_API_KEY")
if not mistral_key:
print("⚠️ MISTRAL_API_KEY not found - agent will not be fully functional")
print("πŸ’‘ Set MISTRAL_API_KEY in .env file for full testing")
return False
print("βœ… MISTRAL_API_KEY found")
# Initialize agent
agent = FixedGAIAAgent()
if agent.available:
print("βœ… Fixed agent initialized successfully")
status = agent.get_tool_status()
print(f"πŸ“Š Tool Status: {status}")
return True
else:
print("❌ Fixed agent initialization failed")
return False
except Exception as e:
print(f"❌ Error initializing fixed agent: {e}")
traceback.print_exc()
return False
def test_fixed_agent_simple_question():
"""Test the fixed agent with a simple question."""
print("\n" + "="*50)
print("πŸ§ͺ Testing Fixed Agent with Simple Question")
print("="*50)
try:
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
# Check for required API key
mistral_key = os.getenv("MISTRAL_API_KEY")
if not mistral_key:
print("⚠️ MISTRAL_API_KEY not found - skipping agent test")
return False
# Initialize agent
agent = FixedGAIAAgent()
if not agent.available:
print("❌ Agent not available - skipping test")
return False
# Test with a simple math question
test_question = "What is 25 * 17?"
print(f"πŸ€” Testing question: {test_question}")
answer = agent(test_question)
print(f"🎯 Agent answer: '{answer}'")
# Check if answer looks reasonable
if answer and answer != "unknown" and "425" in answer:
print("βœ… Agent provided reasonable answer")
return True
else:
print("❌ Agent answer doesn't look correct")
return False
except Exception as e:
print(f"❌ Error testing fixed agent: {e}")
traceback.print_exc()
return False
def test_app_integration():
"""Test the app integration with fixed agent."""
print("\n" + "="*50)
print("πŸ§ͺ Testing App Integration")
print("="*50)
try:
# Import the app module
import app
print("βœ… Successfully imported app module")
# Check if fixed agent is available
if hasattr(app, 'FIXED_AGNO_AVAILABLE') and app.FIXED_AGNO_AVAILABLE:
print("βœ… Fixed AGNO agent available in app")
else:
print("⚠️ Fixed AGNO agent not available in app")
return True
except Exception as e:
print(f"❌ Error testing app integration: {e}")
traceback.print_exc()
return False
def main():
"""Run all tests."""
print("πŸš€ Starting Fixed GAIA Agent Test Suite")
print("This validates the fixes for the 5/20 evaluation score issue")
tests = [
("Answer Formatter", test_answer_formatter),
("Fixed Agent Import", test_fixed_agent_import),
("Fixed Agent Initialization", test_fixed_agent_initialization),
("Simple Question Test", test_fixed_agent_simple_question),
("App Integration", test_app_integration),
]
results = []
for test_name, test_func in tests:
try:
result = test_func()
results.append((test_name, result))
except Exception as e:
print(f"❌ Test '{test_name}' crashed: {e}")
results.append((test_name, False))
# Summary
print("\n" + "="*50)
print("πŸ“Š Test Results Summary")
print("="*50)
passed = 0
total = len(results)
for test_name, result in results:
status = "βœ… PASS" if result else "❌ FAIL"
print(f"{status} {test_name}")
if result:
passed += 1
print(f"\n🎯 Overall: {passed}/{total} tests passed")
if passed == total:
print("πŸŽ‰ All tests passed! The fixes should improve evaluation performance.")
elif passed >= total * 0.8:
print("⚠️ Most tests passed. Some issues may remain.")
else:
print("❌ Many tests failed. Significant issues remain.")
return passed == total
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)