gaia-enhanced-agent / tests /test_calculator_exponentiation_fix.py
GAIA Agent Deployment
Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements
9a6a4dc
"""
Calculator Exponentiation Fix - TDD Implementation
Specific fix for exponentiation operations to achieve 100% accuracy.
"""
import pytest
import sys
import os
import logging
from pathlib import Path
# Add the deployment-ready directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
logger = logging.getLogger(__name__)
class TestCalculatorExponentiationFix:
"""Test suite to fix calculator exponentiation issues."""
@pytest.fixture(autouse=True)
def setup_method(self):
"""Set up test fixtures."""
self.agent = FixedGAIAAgent()
def test_exponentiation_operations_failing(self):
"""Test that demonstrates the current exponentiation failure."""
test_cases = [
{
'question': 'What is 2 to the power of 8?',
'expected': '256',
'operation': 'exponentiation'
},
{
'question': 'Calculate 2^8',
'expected': '256',
'operation': 'exponentiation'
},
{
'question': 'What is 2**8?',
'expected': '256',
'operation': 'exponentiation'
},
{
'question': 'Compute 3 to the power of 4',
'expected': '81',
'operation': 'exponentiation'
}
]
failed_operations = []
for case in test_cases:
if not self.agent.available:
pytest.skip("Agent not available for testing")
try:
result = self.agent(case['question'])
# Extract numeric answer
import re
numbers = re.findall(r'\d+', result)
extracted_answer = numbers[-1] if numbers else result.strip()
expected = case['expected']
# Check if the result matches
if extracted_answer != expected:
failed_operations.append({
'question': case['question'],
'expected': expected,
'actual': extracted_answer,
'full_response': result,
'operation': case['operation']
})
logger.error(f"❌ {case['operation']} failed: {case['question']}")
logger.error(f" Expected: {expected}")
logger.error(f" Got: {extracted_answer}")
logger.error(f" Full response: {result}")
else:
logger.info(f"βœ… {case['operation']} passed: {case['question']} β†’ {extracted_answer}")
except Exception as e:
failed_operations.append({
'question': case['question'],
'expected': case['expected'],
'actual': f"ERROR: {e}",
'full_response': str(e),
'operation': case['operation']
})
logger.error(f"❌ {case['operation']} error: {case['question']} β†’ {e}")
# Report current state
accuracy = (len(test_cases) - len(failed_operations)) / len(test_cases) * 100
logger.info(f"πŸ“Š Exponentiation accuracy: {accuracy:.1f}% ({len(test_cases) - len(failed_operations)}/{len(test_cases)})")
# This test is expected to fail initially - it documents the problem
if failed_operations:
logger.error("❌ Exponentiation operations that need fixing:")
for failure in failed_operations:
logger.error(f" {failure['operation']}: {failure['question']}")
logger.error(f" Expected: {failure['expected']}")
logger.error(f" Got: {failure['actual']}")
# For now, just report the issues (don't assert failure)
# This allows us to see the current state
logger.info(f"πŸ”§ Identified {len(failed_operations)} exponentiation issues to fix")
def test_python_tool_exponentiation_direct(self):
"""Test exponentiation using Python tool directly."""
if not self.agent.available:
pytest.skip("Agent not available for testing")
# Test direct Python calculation
python_questions = [
"Use Python to calculate 2**8",
"Execute Python code: print(2**8)",
"Run this Python: result = 2**8; print(result)",
]
for question in python_questions:
try:
result = self.agent(question)
logger.info(f"🐍 Python test: {question}")
logger.info(f" Result: {result}")
# Check if 256 appears in the result
if "256" in result:
logger.info(f"βœ… Python exponentiation working: {question}")
else:
logger.warning(f"⚠️ Python exponentiation unclear: {question} β†’ {result}")
except Exception as e:
logger.error(f"❌ Python test error: {question} β†’ {e}")
if __name__ == "__main__":
# Run the exponentiation fix tests
pytest.main([__file__, "-v", "-s"])