Spaces:

JoachimVC
/

gaia-enhanced-agent

Running

gaia-enhanced-agent / test_enhanced_agent.py

GAIA Agent Deployment

Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements

9a6a4dc about 2 months ago

6.38 kB

	#!/usr/bin/env python3
	"""
	Test script for the enhanced GAIA agent with new response processor.
	"""

	import os
	import sys
	import logging
	from pathlib import Path

	# Add the deployment-ready directory to the path
	sys.path.insert(0, str(Path(__file__).parent))

	# Set up logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)

	def test_enhanced_agent():
	"""Test the enhanced GAIA agent with various question types."""

	print("🚀 Testing Enhanced GAIA Agent with Response Processor")
	print("=" * 60)

	try:
	# Import the enhanced agent
	from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent

	# Initialize the agent
	print("📦 Initializing Enhanced GAIA Agent...")
	agent = FixedGAIAAgent()

	if not agent.available:
	print("❌ Agent not available - check MISTRAL_API_KEY")
	return False

	print("✅ Agent initialized successfully")
	print(f"📊 Tools available: {len(agent.tools)}")

	# Test questions of different types
	test_questions = [
	{
	"question": "What is 25 * 17?",
	"type": "Mathematical",
	"expected_pattern": r"^\d+$"
	},
	{
	"question": "What is the capital of France?",
	"type": "Factual",
	"expected_pattern": r"^[A-Za-z\s]+$"
	},
	{
	"question": "How many continents are there?",
	"type": "Count",
	"expected_pattern": r"^\d+$"
	}
	]

	print("\n🧪 Testing Response Processing...")
	print("-" * 40)

	for i, test_case in enumerate(test_questions, 1):
	print(f"\nTest {i}: {test_case['type']} Question")
	print(f"Question: {test_case['question']}")

	try:
	# Process the question
	answer = agent(test_case['question'])
	print(f"Answer: '{answer}'")

	# Validate the answer format
	import re
	if re.match(test_case['expected_pattern'], answer):
	print("✅ Answer format valid")
	else:
	print("⚠️ Answer format unexpected")

	except Exception as e:
	print(f"❌ Error processing question: {e}")

	# Get processor statistics
	print("\n📈 Response Processor Statistics:")
	print("-" * 40)
	stats = agent.get_processor_statistics()
	if stats:
	for key, value in stats.items():
	print(f" {key}: {value}")
	else:
	print(" No statistics available")

	print("\n✅ Enhanced agent testing completed successfully!")
	return True

	except ImportError as e:
	print(f"❌ Import error: {e}")
	print("Make sure all dependencies are installed")
	return False
	except Exception as e:
	print(f"❌ Unexpected error: {e}")
	return False

	def test_response_processor_only():
	"""Test just the response processor without the full agent."""

	print("\n🧠 Testing Response Processor Standalone")
	print("=" * 60)

	try:
	from utils.response_processor import EnhancedResponseProcessor

	# Initialize processor
	processor = EnhancedResponseProcessor()
	print("✅ Response processor initialized")

	# Test responses
	test_responses = [
	{
	"response": "Let me calculate this. 25 * 17 = 425. FINAL ANSWER: 425",
	"question": "What is 25 * 17?",
	"expected": "425"
	},
	{
	"response": "The capital of France is Paris. FINAL ANSWER: Paris",
	"question": "What is the capital of France?",
	"expected": "Paris"
	},
	{
	"response": "After researching, I found that there are 7 continents on Earth. FINAL ANSWER: 7",
	"question": "How many continents are there?",
	"expected": "7"
	}
	]

	print("\n🔍 Testing Answer Extraction...")
	print("-" * 40)

	for i, test_case in enumerate(test_responses, 1):
	print(f"\nTest {i}:")
	print(f"Question: {test_case['question']}")
	print(f"Response: {test_case['response'][:100]}...")

	# Extract answer
	result = processor.process_response(test_case['response'], test_case['question'])

	print(f"Extracted: '{result.answer}'")
	print(f"Expected: '{test_case['expected']}'")
	print(f"Strategy: {result.strategy.value}")
	print(f"Confidence: {result.confidence:.2f}")

	if result.answer == test_case['expected']:
	print("✅ Extraction correct")
	else:
	print("⚠️ Extraction differs from expected")

	# Get statistics
	print("\n📊 Processor Statistics:")
	print("-" * 40)
	stats = processor.get_statistics()
	for key, value in stats.items():
	print(f" {key}: {value}")

	print("\n✅ Response processor testing completed!")
	return True

	except Exception as e:
	print(f"❌ Error testing response processor: {e}")
	return False

	if __name__ == "__main__":
	print("🧪 Enhanced GAIA Agent Test Suite")
	print("=" * 60)

	# Test response processor standalone
	processor_success = test_response_processor_only()

	# Test full agent if API key is available
	if os.getenv("MISTRAL_API_KEY"):
	agent_success = test_enhanced_agent()
	else:
	print("\n⚠️ MISTRAL_API_KEY not found - skipping full agent test")
	agent_success = True # Don't fail if no API key

	# Summary
	print("\n" + "=" * 60)
	if processor_success and agent_success:
	print("🎉 All tests completed successfully!")
	sys.exit(0)
	else:
	print("❌ Some tests failed")
	sys.exit(1)