Spaces:
Running
Running
File size: 8,064 Bytes
9a6a4dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
"""
Calculator Accuracy Fix - TDD Approach
Identifies and fixes calculator accuracy issues to achieve 100% success rate.
"""
import pytest
import sys
import os
import logging
from pathlib import Path
# Add the deployment-ready directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
logger = logging.getLogger(__name__)
class TestCalculatorFix:
"""Test suite to identify and fix calculator accuracy issues."""
@pytest.fixture(autouse=True)
def setup_method(self):
"""Set up test fixtures."""
self.agent = FixedGAIAAgent()
def test_basic_arithmetic_operations(self):
"""Test basic arithmetic operations that should always work."""
test_cases = [
{
'question': 'What is 25 * 17?',
'expected': '425',
'operation': 'multiplication'
},
{
'question': 'What is 144 / 12?',
'expected': '12',
'operation': 'division'
},
{
'question': 'What is 100 + 50?',
'expected': '150',
'operation': 'addition'
},
{
'question': 'What is 200 - 75?',
'expected': '125',
'operation': 'subtraction'
}
]
failed_operations = []
for case in test_cases:
if not self.agent.available:
pytest.skip("Agent not available for testing")
try:
result = self.agent(case['question'])
# Clean the result for comparison
cleaned_result = result.strip().replace(',', '')
expected = case['expected']
# Check if the result matches
if cleaned_result != expected:
failed_operations.append({
'question': case['question'],
'expected': expected,
'actual': cleaned_result,
'operation': case['operation']
})
logger.error(f"โ {case['operation']} failed: {case['question']} โ Expected: {expected}, Got: {cleaned_result}")
else:
logger.info(f"โ
{case['operation']} passed: {case['question']} โ {cleaned_result}")
except Exception as e:
failed_operations.append({
'question': case['question'],
'expected': case['expected'],
'actual': f"ERROR: {e}",
'operation': case['operation']
})
logger.error(f"โ {case['operation']} error: {case['question']} โ {e}")
# Report results
if failed_operations:
logger.error(f"โ Calculator accuracy: {len(test_cases) - len(failed_operations)}/{len(test_cases)} ({((len(test_cases) - len(failed_operations))/len(test_cases)*100):.1f}%)")
for failure in failed_operations:
logger.error(f" Failed: {failure['question']} โ Expected: {failure['expected']}, Got: {failure['actual']}")
else:
logger.info(f"โ
Calculator accuracy: 100% ({len(test_cases)}/{len(test_cases)})")
# Assert no failures for 100% accuracy
assert len(failed_operations) == 0, f"Calculator failed {len(failed_operations)} out of {len(test_cases)} tests"
def test_complex_mathematical_operations(self):
"""Test complex mathematical operations."""
test_cases = [
{
'question': 'What is 2^8?',
'expected': '256',
'operation': 'exponentiation'
},
{
'question': 'What is the square root of 144?',
'expected': '12',
'operation': 'square_root'
},
{
'question': 'Calculate the factorial of 5',
'expected': '120',
'operation': 'factorial'
}
]
failed_operations = []
for case in test_cases:
if not self.agent.available:
pytest.skip("Agent not available for testing")
try:
result = self.agent(case['question'])
# Clean the result for comparison
cleaned_result = result.strip().replace(',', '')
expected = case['expected']
# For complex operations, allow for slight variations
try:
result_num = float(cleaned_result)
expected_num = float(expected)
if abs(result_num - expected_num) < 0.01:
logger.info(f"โ
{case['operation']} passed: {case['question']} โ {cleaned_result}")
continue
except ValueError:
pass
# Exact match check
if cleaned_result != expected:
failed_operations.append({
'question': case['question'],
'expected': expected,
'actual': cleaned_result,
'operation': case['operation']
})
logger.error(f"โ {case['operation']} failed: {case['question']} โ Expected: {expected}, Got: {cleaned_result}")
else:
logger.info(f"โ
{case['operation']} passed: {case['question']} โ {cleaned_result}")
except Exception as e:
failed_operations.append({
'question': case['question'],
'expected': case['expected'],
'actual': f"ERROR: {e}",
'operation': case['operation']
})
logger.error(f"โ {case['operation']} error: {case['question']} โ {e}")
# Report results
success_rate = (len(test_cases) - len(failed_operations)) / len(test_cases) * 100
logger.info(f"๐ Complex math accuracy: {success_rate:.1f}% ({len(test_cases) - len(failed_operations)}/{len(test_cases)})")
if failed_operations:
for failure in failed_operations:
logger.error(f" Failed: {failure['question']} โ Expected: {failure['expected']}, Got: {failure['actual']}")
def test_calculator_tool_direct_access(self):
"""Test direct access to calculator tool to identify issues."""
if not self.agent.available:
pytest.skip("Agent not available for testing")
# Find calculator tool
calculator_tool = None
for tool in self.agent.tools:
if hasattr(tool, '__class__') and 'Calculator' in tool.__class__.__name__:
calculator_tool = tool
break
if calculator_tool is None:
pytest.fail("Calculator tool not found in agent tools")
logger.info(f"โ
Calculator tool found: {calculator_tool.__class__.__name__}")
# Test direct calculator operations
test_operations = [
('25 * 17', 425),
('144 / 12', 12),
('2 ** 8', 256),
('100 + 50', 150)
]
for expression, expected in test_operations:
try:
# This would depend on the calculator tool's interface
logger.info(f"๐งฎ Testing calculator: {expression} = {expected}")
except Exception as e:
logger.error(f"โ Calculator tool error: {e}")
if __name__ == "__main__":
# Run the calculator fix tests
pytest.main([__file__, "-v", "-s"]) |