File size: 5,479 Bytes
9a6a4dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""
Calculator Exponentiation Fix - TDD Implementation
Specific fix for exponentiation operations to achieve 100% accuracy.
"""

import pytest
import sys
import os
import logging
from pathlib import Path

# Add the deployment-ready directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent

logger = logging.getLogger(__name__)


class TestCalculatorExponentiationFix:
    """Test suite to fix calculator exponentiation issues."""
    
    @pytest.fixture(autouse=True)
    def setup_method(self):
        """Set up test fixtures."""
        self.agent = FixedGAIAAgent()
        
    def test_exponentiation_operations_failing(self):
        """Test that demonstrates the current exponentiation failure."""
        test_cases = [
            {
                'question': 'What is 2 to the power of 8?',
                'expected': '256',
                'operation': 'exponentiation'
            },
            {
                'question': 'Calculate 2^8',
                'expected': '256',
                'operation': 'exponentiation'
            },
            {
                'question': 'What is 2**8?',
                'expected': '256',
                'operation': 'exponentiation'
            },
            {
                'question': 'Compute 3 to the power of 4',
                'expected': '81',
                'operation': 'exponentiation'
            }
        ]
        
        failed_operations = []
        
        for case in test_cases:
            if not self.agent.available:
                pytest.skip("Agent not available for testing")
            
            try:
                result = self.agent(case['question'])
                
                # Extract numeric answer
                import re
                numbers = re.findall(r'\d+', result)
                extracted_answer = numbers[-1] if numbers else result.strip()
                expected = case['expected']
                
                # Check if the result matches
                if extracted_answer != expected:
                    failed_operations.append({
                        'question': case['question'],
                        'expected': expected,
                        'actual': extracted_answer,
                        'full_response': result,
                        'operation': case['operation']
                    })
                    logger.error(f"❌ {case['operation']} failed: {case['question']}")
                    logger.error(f"   Expected: {expected}")
                    logger.error(f"   Got: {extracted_answer}")
                    logger.error(f"   Full response: {result}")
                else:
                    logger.info(f"βœ… {case['operation']} passed: {case['question']} β†’ {extracted_answer}")
                    
            except Exception as e:
                failed_operations.append({
                    'question': case['question'],
                    'expected': case['expected'],
                    'actual': f"ERROR: {e}",
                    'full_response': str(e),
                    'operation': case['operation']
                })
                logger.error(f"❌ {case['operation']} error: {case['question']} β†’ {e}")
        
        # Report current state
        accuracy = (len(test_cases) - len(failed_operations)) / len(test_cases) * 100
        logger.info(f"πŸ“Š Exponentiation accuracy: {accuracy:.1f}% ({len(test_cases) - len(failed_operations)}/{len(test_cases)})")
        
        # This test is expected to fail initially - it documents the problem
        if failed_operations:
            logger.error("❌ Exponentiation operations that need fixing:")
            for failure in failed_operations:
                logger.error(f"   {failure['operation']}: {failure['question']}")
                logger.error(f"      Expected: {failure['expected']}")
                logger.error(f"      Got: {failure['actual']}")
        
        # For now, just report the issues (don't assert failure)
        # This allows us to see the current state
        logger.info(f"πŸ”§ Identified {len(failed_operations)} exponentiation issues to fix")
    
    def test_python_tool_exponentiation_direct(self):
        """Test exponentiation using Python tool directly."""
        if not self.agent.available:
            pytest.skip("Agent not available for testing")
        
        # Test direct Python calculation
        python_questions = [
            "Use Python to calculate 2**8",
            "Execute Python code: print(2**8)",
            "Run this Python: result = 2**8; print(result)",
        ]
        
        for question in python_questions:
            try:
                result = self.agent(question)
                logger.info(f"🐍 Python test: {question}")
                logger.info(f"   Result: {result}")
                
                # Check if 256 appears in the result
                if "256" in result:
                    logger.info(f"βœ… Python exponentiation working: {question}")
                else:
                    logger.warning(f"⚠️ Python exponentiation unclear: {question} β†’ {result}")
                    
            except Exception as e:
                logger.error(f"❌ Python test error: {question} β†’ {e}")


if __name__ == "__main__":
    # Run the exponentiation fix tests
    pytest.main([__file__, "-v", "-s"])