Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Test script to validate the fixed GAIA agent improvements. | |
This script tests the key fixes that should improve the 5/20 evaluation score. | |
""" | |
import os | |
import sys | |
import traceback | |
from pathlib import Path | |
# Add the deployment-ready directory to the path | |
sys.path.insert(0, str(Path(__file__).parent)) | |
def load_env_file(): | |
"""Load environment variables from .env file if it exists.""" | |
env_file = Path('.env') | |
if env_file.exists(): | |
with open(env_file, 'r') as f: | |
for line in f: | |
line = line.strip() | |
if line and not line.startswith('#') and '=' in line: | |
key, value = line.split('=', 1) | |
os.environ[key.strip()] = value.strip() | |
# Load environment variables | |
load_env_file() | |
def test_answer_formatter(): | |
"""Test the fixed answer formatter.""" | |
print("\n" + "="*50) | |
print("π§ͺ Testing Fixed Answer Formatter") | |
print("="*50) | |
try: | |
from utils.fixed_answer_formatter import FixedGAIAAnswerFormatter | |
formatter = FixedGAIAAnswerFormatter() | |
# Test cases that should work | |
test_cases = [ | |
{ | |
'input': 'Let me calculate this. The answer is 42. FINAL ANSWER: 42', | |
'expected': '42', | |
'description': 'Basic FINAL ANSWER format' | |
}, | |
{ | |
'input': 'After analysis, I found the result. FINAL ANSWER: Paris', | |
'expected': 'Paris', | |
'description': 'Text answer with FINAL ANSWER' | |
}, | |
{ | |
'input': 'FINAL ANSWER: blue, green, red', | |
'expected': 'blue, green, red', | |
'description': 'List format' | |
}, | |
{ | |
'input': 'The calculation shows 1234 FINAL ANSWER: 1234', | |
'expected': '1234', | |
'description': 'Number without commas' | |
}, | |
{ | |
'input': 'No final answer format here, just 25', | |
'expected': '25', | |
'description': 'Fallback extraction' | |
} | |
] | |
all_passed = True | |
for i, test_case in enumerate(test_cases, 1): | |
result = formatter.format_answer(test_case['input'], "test question") | |
expected = test_case['expected'] | |
passed = result == expected | |
all_passed = all_passed and passed | |
status = "β PASS" if passed else "β FAIL" | |
print(f"Test {i}: {status} - {test_case['description']}") | |
print(f" Input: {test_case['input'][:50]}...") | |
print(f" Expected: '{expected}'") | |
print(f" Got: '{result}'") | |
print() | |
if all_passed: | |
print("β All answer formatter tests passed!") | |
else: | |
print("β Some answer formatter tests failed!") | |
return all_passed | |
except Exception as e: | |
print(f"β Error testing answer formatter: {e}") | |
traceback.print_exc() | |
return False | |
def test_fixed_agent_import(): | |
"""Test importing the fixed agent.""" | |
print("\n" + "="*50) | |
print("π§ͺ Testing Fixed Agent Import") | |
print("="*50) | |
try: | |
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent, get_agent_status | |
print("β Successfully imported FixedGAIAAgent") | |
# Test agent status function | |
status = get_agent_status() | |
print(f"π Agent Status: {status}") | |
return True | |
except Exception as e: | |
print(f"β Error importing fixed agent: {e}") | |
traceback.print_exc() | |
return False | |
def test_fixed_agent_initialization(): | |
"""Test initializing the fixed agent.""" | |
print("\n" + "="*50) | |
print("π§ͺ Testing Fixed Agent Initialization") | |
print("="*50) | |
try: | |
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent | |
# Check for required API key | |
mistral_key = os.getenv("MISTRAL_API_KEY") | |
if not mistral_key: | |
print("β οΈ MISTRAL_API_KEY not found - agent will not be fully functional") | |
print("π‘ Set MISTRAL_API_KEY in .env file for full testing") | |
return False | |
print("β MISTRAL_API_KEY found") | |
# Initialize agent | |
agent = FixedGAIAAgent() | |
if agent.available: | |
print("β Fixed agent initialized successfully") | |
status = agent.get_tool_status() | |
print(f"π Tool Status: {status}") | |
return True | |
else: | |
print("β Fixed agent initialization failed") | |
return False | |
except Exception as e: | |
print(f"β Error initializing fixed agent: {e}") | |
traceback.print_exc() | |
return False | |
def test_fixed_agent_simple_question(): | |
"""Test the fixed agent with a simple question.""" | |
print("\n" + "="*50) | |
print("π§ͺ Testing Fixed Agent with Simple Question") | |
print("="*50) | |
try: | |
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent | |
# Check for required API key | |
mistral_key = os.getenv("MISTRAL_API_KEY") | |
if not mistral_key: | |
print("β οΈ MISTRAL_API_KEY not found - skipping agent test") | |
return False | |
# Initialize agent | |
agent = FixedGAIAAgent() | |
if not agent.available: | |
print("β Agent not available - skipping test") | |
return False | |
# Test with a simple math question | |
test_question = "What is 25 * 17?" | |
print(f"π€ Testing question: {test_question}") | |
answer = agent(test_question) | |
print(f"π― Agent answer: '{answer}'") | |
# Check if answer looks reasonable | |
if answer and answer != "unknown" and "425" in answer: | |
print("β Agent provided reasonable answer") | |
return True | |
else: | |
print("β Agent answer doesn't look correct") | |
return False | |
except Exception as e: | |
print(f"β Error testing fixed agent: {e}") | |
traceback.print_exc() | |
return False | |
def test_app_integration(): | |
"""Test the app integration with fixed agent.""" | |
print("\n" + "="*50) | |
print("π§ͺ Testing App Integration") | |
print("="*50) | |
try: | |
# Import the app module | |
import app | |
print("β Successfully imported app module") | |
# Check if fixed agent is available | |
if hasattr(app, 'FIXED_AGNO_AVAILABLE') and app.FIXED_AGNO_AVAILABLE: | |
print("β Fixed AGNO agent available in app") | |
else: | |
print("β οΈ Fixed AGNO agent not available in app") | |
return True | |
except Exception as e: | |
print(f"β Error testing app integration: {e}") | |
traceback.print_exc() | |
return False | |
def main(): | |
"""Run all tests.""" | |
print("π Starting Fixed GAIA Agent Test Suite") | |
print("This validates the fixes for the 5/20 evaluation score issue") | |
tests = [ | |
("Answer Formatter", test_answer_formatter), | |
("Fixed Agent Import", test_fixed_agent_import), | |
("Fixed Agent Initialization", test_fixed_agent_initialization), | |
("Simple Question Test", test_fixed_agent_simple_question), | |
("App Integration", test_app_integration), | |
] | |
results = [] | |
for test_name, test_func in tests: | |
try: | |
result = test_func() | |
results.append((test_name, result)) | |
except Exception as e: | |
print(f"β Test '{test_name}' crashed: {e}") | |
results.append((test_name, False)) | |
# Summary | |
print("\n" + "="*50) | |
print("π Test Results Summary") | |
print("="*50) | |
passed = 0 | |
total = len(results) | |
for test_name, result in results: | |
status = "β PASS" if result else "β FAIL" | |
print(f"{status} {test_name}") | |
if result: | |
passed += 1 | |
print(f"\nπ― Overall: {passed}/{total} tests passed") | |
if passed == total: | |
print("π All tests passed! The fixes should improve evaluation performance.") | |
elif passed >= total * 0.8: | |
print("β οΈ Most tests passed. Some issues may remain.") | |
else: | |
print("β Many tests failed. Significant issues remain.") | |
return passed == total | |
if __name__ == "__main__": | |
success = main() | |
sys.exit(0 if success else 1) |