Spaces:

JoachimVC
/

gaia-enhanced-agent

Running

gaia-enhanced-agent / test_fixed_agent.py

GAIA Agent Deployment

Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements

9a6a4dc about 2 months ago

8.63 kB

	#!/usr/bin/env python3
	"""
	Test script to validate the fixed GAIA agent improvements.
	This script tests the key fixes that should improve the 5/20 evaluation score.
	"""

	import os
	import sys
	import traceback
	from pathlib import Path

	# Add the deployment-ready directory to the path
	sys.path.insert(0, str(Path(__file__).parent))

	def load_env_file():
	"""Load environment variables from .env file if it exists."""
	env_file = Path('.env')
	if env_file.exists():
	with open(env_file, 'r') as f:
	for line in f:
	line = line.strip()
	if line and not line.startswith('#') and '=' in line:
	key, value = line.split('=', 1)
	os.environ[key.strip()] = value.strip()

	# Load environment variables
	load_env_file()

	def test_answer_formatter():
	"""Test the fixed answer formatter."""
	print("\n" + "="*50)
	print("🧪 Testing Fixed Answer Formatter")
	print("="*50)

	try:
	from utils.fixed_answer_formatter import FixedGAIAAnswerFormatter
	formatter = FixedGAIAAnswerFormatter()

	# Test cases that should work
	test_cases = [
	{
	'input': 'Let me calculate this. The answer is 42. FINAL ANSWER: 42',
	'expected': '42',
	'description': 'Basic FINAL ANSWER format'
	},
	{
	'input': 'After analysis, I found the result. FINAL ANSWER: Paris',
	'expected': 'Paris',
	'description': 'Text answer with FINAL ANSWER'
	},
	{
	'input': 'FINAL ANSWER: blue, green, red',
	'expected': 'blue, green, red',
	'description': 'List format'
	},
	{
	'input': 'The calculation shows 1234 FINAL ANSWER: 1234',
	'expected': '1234',
	'description': 'Number without commas'
	},
	{
	'input': 'No final answer format here, just 25',
	'expected': '25',
	'description': 'Fallback extraction'
	}
	]

	all_passed = True
	for i, test_case in enumerate(test_cases, 1):
	result = formatter.format_answer(test_case['input'], "test question")
	expected = test_case['expected']
	passed = result == expected
	all_passed = all_passed and passed

	status = "✅ PASS" if passed else "❌ FAIL"
	print(f"Test {i}: {status} - {test_case['description']}")
	print(f" Input: {test_case['input'][:50]}...")
	print(f" Expected: '{expected}'")
	print(f" Got: '{result}'")
	print()

	if all_passed:
	print("✅ All answer formatter tests passed!")
	else:
	print("❌ Some answer formatter tests failed!")

	return all_passed

	except Exception as e:
	print(f"❌ Error testing answer formatter: {e}")
	traceback.print_exc()
	return False

	def test_fixed_agent_import():
	"""Test importing the fixed agent."""
	print("\n" + "="*50)
	print("🧪 Testing Fixed Agent Import")
	print("="*50)

	try:
	from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent, get_agent_status
	print("✅ Successfully imported FixedGAIAAgent")

	# Test agent status function
	status = get_agent_status()
	print(f"📊 Agent Status: {status}")

	return True

	except Exception as e:
	print(f"❌ Error importing fixed agent: {e}")
	traceback.print_exc()
	return False

	def test_fixed_agent_initialization():
	"""Test initializing the fixed agent."""
	print("\n" + "="*50)
	print("🧪 Testing Fixed Agent Initialization")
	print("="*50)

	try:
	from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent

	# Check for required API key
	mistral_key = os.getenv("MISTRAL_API_KEY")
	if not mistral_key:
	print("⚠️ MISTRAL_API_KEY not found - agent will not be fully functional")
	print("💡 Set MISTRAL_API_KEY in .env file for full testing")
	return False

	print("✅ MISTRAL_API_KEY found")

	# Initialize agent
	agent = FixedGAIAAgent()

	if agent.available:
	print("✅ Fixed agent initialized successfully")
	status = agent.get_tool_status()
	print(f"📊 Tool Status: {status}")
	return True
	else:
	print("❌ Fixed agent initialization failed")
	return False

	except Exception as e:
	print(f"❌ Error initializing fixed agent: {e}")
	traceback.print_exc()
	return False

	def test_fixed_agent_simple_question():
	"""Test the fixed agent with a simple question."""
	print("\n" + "="*50)
	print("🧪 Testing Fixed Agent with Simple Question")
	print("="*50)

	try:
	from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent

	# Check for required API key
	mistral_key = os.getenv("MISTRAL_API_KEY")
	if not mistral_key:
	print("⚠️ MISTRAL_API_KEY not found - skipping agent test")
	return False

	# Initialize agent
	agent = FixedGAIAAgent()

	if not agent.available:
	print("❌ Agent not available - skipping test")
	return False

	# Test with a simple math question
	test_question = "What is 25 * 17?"
	print(f"🤔 Testing question: {test_question}")

	answer = agent(test_question)
	print(f"🎯 Agent answer: '{answer}'")

	# Check if answer looks reasonable
	if answer and answer != "unknown" and "425" in answer:
	print("✅ Agent provided reasonable answer")
	return True
	else:
	print("❌ Agent answer doesn't look correct")
	return False

	except Exception as e:
	print(f"❌ Error testing fixed agent: {e}")
	traceback.print_exc()
	return False

	def test_app_integration():
	"""Test the app integration with fixed agent."""
	print("\n" + "="*50)
	print("🧪 Testing App Integration")
	print("="*50)

	try:
	# Import the app module
	import app

	print("✅ Successfully imported app module")

	# Check if fixed agent is available
	if hasattr(app, 'FIXED_AGNO_AVAILABLE') and app.FIXED_AGNO_AVAILABLE:
	print("✅ Fixed AGNO agent available in app")
	else:
	print("⚠️ Fixed AGNO agent not available in app")

	return True

	except Exception as e:
	print(f"❌ Error testing app integration: {e}")
	traceback.print_exc()
	return False

	def main():
	"""Run all tests."""
	print("🚀 Starting Fixed GAIA Agent Test Suite")
	print("This validates the fixes for the 5/20 evaluation score issue")

	tests = [
	("Answer Formatter", test_answer_formatter),
	("Fixed Agent Import", test_fixed_agent_import),
	("Fixed Agent Initialization", test_fixed_agent_initialization),
	("Simple Question Test", test_fixed_agent_simple_question),
	("App Integration", test_app_integration),
	]

	results = []
	for test_name, test_func in tests:
	try:
	result = test_func()
	results.append((test_name, result))
	except Exception as e:
	print(f"❌ Test '{test_name}' crashed: {e}")
	results.append((test_name, False))

	# Summary
	print("\n" + "="*50)
	print("📊 Test Results Summary")
	print("="*50)

	passed = 0
	total = len(results)

	for test_name, result in results:
	status = "✅ PASS" if result else "❌ FAIL"
	print(f"{status} {test_name}")
	if result:
	passed += 1

	print(f"\n🎯 Overall: {passed}/{total} tests passed")

	if passed == total:
	print("🎉 All tests passed! The fixes should improve evaluation performance.")
	elif passed >= total * 0.8:
	print("⚠️ Most tests passed. Some issues may remain.")
	else:
	print("❌ Many tests failed. Significant issues remain.")

	return passed == total

	if __name__ == "__main__":
	success = main()
	sys.exit(0 if success else 1)