Spaces:

JoachimVC
/

gaia-enhanced-agent

Running

gaia-enhanced-agent / tests /test_calculator_exponentiation_fix.py

GAIA Agent Deployment

Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements

9a6a4dc 9 days ago

5.48 kB

	"""
	Calculator Exponentiation Fix - TDD Implementation
	Specific fix for exponentiation operations to achieve 100% accuracy.
	"""

	import pytest
	import sys
	import os
	import logging
	from pathlib import Path

	# Add the deployment-ready directory to the path
	sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

	from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent

	logger = logging.getLogger(__name__)


	class TestCalculatorExponentiationFix:
	"""Test suite to fix calculator exponentiation issues."""

	@pytest.fixture(autouse=True)
	def setup_method(self):
	"""Set up test fixtures."""
	self.agent = FixedGAIAAgent()

	def test_exponentiation_operations_failing(self):
	"""Test that demonstrates the current exponentiation failure."""
	test_cases = [
	{
	'question': 'What is 2 to the power of 8?',
	'expected': '256',
	'operation': 'exponentiation'
	},
	{
	'question': 'Calculate 2^8',
	'expected': '256',
	'operation': 'exponentiation'
	},
	{
	'question': 'What is 2**8?',
	'expected': '256',
	'operation': 'exponentiation'
	},
	{
	'question': 'Compute 3 to the power of 4',
	'expected': '81',
	'operation': 'exponentiation'
	}
	]

	failed_operations = []

	for case in test_cases:
	if not self.agent.available:
	pytest.skip("Agent not available for testing")

	try:
	result = self.agent(case['question'])

	# Extract numeric answer
	import re
	numbers = re.findall(r'\d+', result)
	extracted_answer = numbers[-1] if numbers else result.strip()
	expected = case['expected']

	# Check if the result matches
	if extracted_answer != expected:
	failed_operations.append({
	'question': case['question'],
	'expected': expected,
	'actual': extracted_answer,
	'full_response': result,
	'operation': case['operation']
	})
	logger.error(f"❌ {case['operation']} failed: {case['question']}")
	logger.error(f" Expected: {expected}")
	logger.error(f" Got: {extracted_answer}")
	logger.error(f" Full response: {result}")
	else:
	logger.info(f"✅ {case['operation']} passed: {case['question']} → {extracted_answer}")

	except Exception as e:
	failed_operations.append({
	'question': case['question'],
	'expected': case['expected'],
	'actual': f"ERROR: {e}",
	'full_response': str(e),
	'operation': case['operation']
	})
	logger.error(f"❌ {case['operation']} error: {case['question']} → {e}")

	# Report current state
	accuracy = (len(test_cases) - len(failed_operations)) / len(test_cases) * 100
	logger.info(f"📊 Exponentiation accuracy: {accuracy:.1f}% ({len(test_cases) - len(failed_operations)}/{len(test_cases)})")

	# This test is expected to fail initially - it documents the problem
	if failed_operations:
	logger.error("❌ Exponentiation operations that need fixing:")
	for failure in failed_operations:
	logger.error(f" {failure['operation']}: {failure['question']}")
	logger.error(f" Expected: {failure['expected']}")
	logger.error(f" Got: {failure['actual']}")

	# For now, just report the issues (don't assert failure)
	# This allows us to see the current state
	logger.info(f"🔧 Identified {len(failed_operations)} exponentiation issues to fix")

	def test_python_tool_exponentiation_direct(self):
	"""Test exponentiation using Python tool directly."""
	if not self.agent.available:
	pytest.skip("Agent not available for testing")

	# Test direct Python calculation
	python_questions = [
	"Use Python to calculate 2**8",
	"Execute Python code: print(2**8)",
	"Run this Python: result = 2**8; print(result)",
	]

	for question in python_questions:
	try:
	result = self.agent(question)
	logger.info(f"🐍 Python test: {question}")
	logger.info(f" Result: {result}")

	# Check if 256 appears in the result
	if "256" in result:
	logger.info(f"✅ Python exponentiation working: {question}")
	else:
	logger.warning(f"⚠️ Python exponentiation unclear: {question} → {result}")

	except Exception as e:
	logger.error(f"❌ Python test error: {question} → {e}")


	if __name__ == "__main__":
	# Run the exponentiation fix tests
	pytest.main([__file__, "-v", "-s"])