File size: 8,064 Bytes
9a6a4dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
"""
Calculator Accuracy Fix - TDD Approach
Identifies and fixes calculator accuracy issues to achieve 100% success rate.
"""

import pytest
import sys
import os
import logging
from pathlib import Path

# Add the deployment-ready directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent

logger = logging.getLogger(__name__)


class TestCalculatorFix:
    """Test suite to identify and fix calculator accuracy issues."""
    
    @pytest.fixture(autouse=True)
    def setup_method(self):
        """Set up test fixtures."""
        self.agent = FixedGAIAAgent()
        
    def test_basic_arithmetic_operations(self):
        """Test basic arithmetic operations that should always work."""
        test_cases = [
            {
                'question': 'What is 25 * 17?',
                'expected': '425',
                'operation': 'multiplication'
            },
            {
                'question': 'What is 144 / 12?',
                'expected': '12',
                'operation': 'division'
            },
            {
                'question': 'What is 100 + 50?',
                'expected': '150',
                'operation': 'addition'
            },
            {
                'question': 'What is 200 - 75?',
                'expected': '125',
                'operation': 'subtraction'
            }
        ]
        
        failed_operations = []
        
        for case in test_cases:
            if not self.agent.available:
                pytest.skip("Agent not available for testing")
            
            try:
                result = self.agent(case['question'])
                
                # Clean the result for comparison
                cleaned_result = result.strip().replace(',', '')
                expected = case['expected']
                
                # Check if the result matches
                if cleaned_result != expected:
                    failed_operations.append({
                        'question': case['question'],
                        'expected': expected,
                        'actual': cleaned_result,
                        'operation': case['operation']
                    })
                    logger.error(f"โŒ {case['operation']} failed: {case['question']} โ†’ Expected: {expected}, Got: {cleaned_result}")
                else:
                    logger.info(f"โœ… {case['operation']} passed: {case['question']} โ†’ {cleaned_result}")
                    
            except Exception as e:
                failed_operations.append({
                    'question': case['question'],
                    'expected': case['expected'],
                    'actual': f"ERROR: {e}",
                    'operation': case['operation']
                })
                logger.error(f"โŒ {case['operation']} error: {case['question']} โ†’ {e}")
        
        # Report results
        if failed_operations:
            logger.error(f"โŒ Calculator accuracy: {len(test_cases) - len(failed_operations)}/{len(test_cases)} ({((len(test_cases) - len(failed_operations))/len(test_cases)*100):.1f}%)")
            for failure in failed_operations:
                logger.error(f"   Failed: {failure['question']} โ†’ Expected: {failure['expected']}, Got: {failure['actual']}")
        else:
            logger.info(f"โœ… Calculator accuracy: 100% ({len(test_cases)}/{len(test_cases)})")
        
        # Assert no failures for 100% accuracy
        assert len(failed_operations) == 0, f"Calculator failed {len(failed_operations)} out of {len(test_cases)} tests"
    
    def test_complex_mathematical_operations(self):
        """Test complex mathematical operations."""
        test_cases = [
            {
                'question': 'What is 2^8?',
                'expected': '256',
                'operation': 'exponentiation'
            },
            {
                'question': 'What is the square root of 144?',
                'expected': '12',
                'operation': 'square_root'
            },
            {
                'question': 'Calculate the factorial of 5',
                'expected': '120',
                'operation': 'factorial'
            }
        ]
        
        failed_operations = []
        
        for case in test_cases:
            if not self.agent.available:
                pytest.skip("Agent not available for testing")
            
            try:
                result = self.agent(case['question'])
                
                # Clean the result for comparison
                cleaned_result = result.strip().replace(',', '')
                expected = case['expected']
                
                # For complex operations, allow for slight variations
                try:
                    result_num = float(cleaned_result)
                    expected_num = float(expected)
                    if abs(result_num - expected_num) < 0.01:
                        logger.info(f"โœ… {case['operation']} passed: {case['question']} โ†’ {cleaned_result}")
                        continue
                except ValueError:
                    pass
                
                # Exact match check
                if cleaned_result != expected:
                    failed_operations.append({
                        'question': case['question'],
                        'expected': expected,
                        'actual': cleaned_result,
                        'operation': case['operation']
                    })
                    logger.error(f"โŒ {case['operation']} failed: {case['question']} โ†’ Expected: {expected}, Got: {cleaned_result}")
                else:
                    logger.info(f"โœ… {case['operation']} passed: {case['question']} โ†’ {cleaned_result}")
                    
            except Exception as e:
                failed_operations.append({
                    'question': case['question'],
                    'expected': case['expected'],
                    'actual': f"ERROR: {e}",
                    'operation': case['operation']
                })
                logger.error(f"โŒ {case['operation']} error: {case['question']} โ†’ {e}")
        
        # Report results
        success_rate = (len(test_cases) - len(failed_operations)) / len(test_cases) * 100
        logger.info(f"๐Ÿ“Š Complex math accuracy: {success_rate:.1f}% ({len(test_cases) - len(failed_operations)}/{len(test_cases)})")
        
        if failed_operations:
            for failure in failed_operations:
                logger.error(f"   Failed: {failure['question']} โ†’ Expected: {failure['expected']}, Got: {failure['actual']}")
    
    def test_calculator_tool_direct_access(self):
        """Test direct access to calculator tool to identify issues."""
        if not self.agent.available:
            pytest.skip("Agent not available for testing")
        
        # Find calculator tool
        calculator_tool = None
        for tool in self.agent.tools:
            if hasattr(tool, '__class__') and 'Calculator' in tool.__class__.__name__:
                calculator_tool = tool
                break
        
        if calculator_tool is None:
            pytest.fail("Calculator tool not found in agent tools")
        
        logger.info(f"โœ… Calculator tool found: {calculator_tool.__class__.__name__}")
        
        # Test direct calculator operations
        test_operations = [
            ('25 * 17', 425),
            ('144 / 12', 12),
            ('2 ** 8', 256),
            ('100 + 50', 150)
        ]
        
        for expression, expected in test_operations:
            try:
                # This would depend on the calculator tool's interface
                logger.info(f"๐Ÿงฎ Testing calculator: {expression} = {expected}")
            except Exception as e:
                logger.error(f"โŒ Calculator tool error: {e}")


if __name__ == "__main__":
    # Run the calculator fix tests
    pytest.main([__file__, "-v", "-s"])