gaia-enhanced-agent / test_gaia_file_handling_fix.py
GAIA Agent Deployment
Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements
9a6a4dc
#!/usr/bin/env python3
"""
GAIA File Handling Fix Validation Test
This test validates that the file handling fix correctly:
1. Extracts file_name from GAIA evaluation API responses
2. Passes files to the agent's __call__ method
3. Agent processes files correctly with enhanced search paths
4. Resolves the "Error file not found" issues
Expected Result: All file-based questions should now process successfully
"""
import os
import sys
import tempfile
import json
import logging
import traceback
from pathlib import Path
# Add deployment-ready to path
sys.path.insert(0, '/workspaces/gaia-agent-python/deployment-ready')
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class GAIAFileHandlingFixValidator:
"""Validates the GAIA file handling fix."""
def __init__(self):
"""Initialize the validator."""
self.temp_dir = tempfile.mkdtemp(prefix="gaia_fix_test_")
self.test_files = {}
logger.info(f"πŸ§ͺ Test directory: {self.temp_dir}")
def setup_test_files(self):
"""Create test files that simulate GAIA evaluation files."""
logger.info("πŸ“ Setting up test files...")
# 1. Excel file (simulating GAIA Excel question)
excel_data = """Item,Category,Sales,Price
Burger,Food,150,8.99
Fries,Food,200,3.49
Soda,Beverage,180,2.99
Salad,Food,75,6.99
Coffee,Beverage,120,4.49"""
excel_file = os.path.join(self.temp_dir, "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx")
with open(excel_file, 'w') as f:
f.write(excel_data)
self.test_files['excel'] = excel_file
logger.info(f"πŸ“Š Created Excel test file: {excel_file}")
# 2. Python code file (simulating GAIA Python question)
python_code = """#!/usr/bin/env python3
# Test Python code for GAIA evaluation
import math
def calculate_result():
x = 15
y = 8
result = x * y + math.sqrt(64)
return result
if __name__ == "__main__":
final_result = calculate_result()
print(f"Final result: {final_result}")
"""
python_file = os.path.join(self.temp_dir, "f918266a-b3e0-4914-865d-4faa564f1aef.py")
with open(python_file, 'w') as f:
f.write(python_code)
self.test_files['python'] = python_file
logger.info(f"🐍 Created Python test file: {python_file}")
# 3. PNG image file (simulating GAIA image question)
# Create a simple text file with PNG extension for testing
image_content = "PNG_IMAGE_PLACEHOLDER_FOR_TESTING"
image_file = os.path.join(self.temp_dir, "cca530fc-4052-43b2-b130-b30968d8aa44.png")
with open(image_file, 'w') as f:
f.write(image_content)
self.test_files['image'] = image_file
logger.info(f"πŸ–ΌοΈ Created PNG test file: {image_file}")
return True
def test_app_file_extraction(self):
"""Test that app.py correctly extracts file_name from question data."""
logger.info("πŸ” Testing app.py file extraction logic...")
# Simulate GAIA question data structure
test_question_data = {
"task_id": "test-task-123",
"question": "What is the total sales in the attached Excel file?",
"file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
"Level": 1
}
# Test the file extraction logic
file_name = test_question_data.get("file_name", "")
files = None
if file_name and file_name.strip():
files = [file_name.strip()]
assert files is not None, "File extraction failed"
assert len(files) == 1, "Should extract exactly one file"
assert files[0] == "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx", "File name mismatch"
logger.info("βœ… App.py file extraction logic works correctly")
return True
def test_agent_file_processing(self):
"""Test that the agent can process files with enhanced search paths."""
logger.info("πŸ€– Testing agent file processing...")
try:
# Import the fixed agent
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
# Create agent instance
agent = FixedGAIAAgent()
logger.info("βœ… Agent imported and initialized successfully")
# Test 1: Process Excel file
question = "What is the total sales amount in the attached Excel file?"
excel_filename = os.path.basename(self.test_files['excel'])
# Copy file to deployment-ready directory for testing
import shutil
target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}"
shutil.copy2(self.test_files['excel'], target_path)
try:
response = agent(question, files=[excel_filename])
logger.info(f"πŸ“Š Excel file processing response: {response[:100]}...")
# Check if response indicates successful file processing
if "error" not in response.lower() and "file not found" not in response.lower():
logger.info("βœ… Excel file processed successfully")
else:
logger.warning(f"⚠️ Excel file processing may have issues: {response}")
except Exception as e:
logger.error(f"❌ Excel file processing failed: {e}")
return False
finally:
# Cleanup
if os.path.exists(target_path):
os.remove(target_path)
# Test 2: Process Python file
question = "What is the final numeric output from the attached Python code?"
python_filename = os.path.basename(self.test_files['python'])
target_path = f"/workspaces/gaia-agent-python/deployment-ready/{python_filename}"
shutil.copy2(self.test_files['python'], target_path)
try:
response = agent(question, files=[python_filename])
logger.info(f"🐍 Python file processing response: {response[:100]}...")
if "error" not in response.lower() and "file not found" not in response.lower():
logger.info("βœ… Python file processed successfully")
else:
logger.warning(f"⚠️ Python file processing may have issues: {response}")
except Exception as e:
logger.error(f"❌ Python file processing failed: {e}")
return False
finally:
# Cleanup
if os.path.exists(target_path):
os.remove(target_path)
return True
except ImportError as e:
logger.error(f"❌ Could not import agent: {e}")
return False
except Exception as e:
logger.error(f"❌ Agent file processing test failed: {e}")
traceback.print_exc()
return False
def test_enhanced_search_paths(self):
"""Test that enhanced search paths work correctly."""
logger.info("πŸ” Testing enhanced search paths...")
try:
from utils.file_handler import EnhancedFileHandler
# Create file handler
handler = EnhancedFileHandler()
# Check that GAIA-specific paths are included
expected_paths = [
"/workspaces/gaia-agent-python/deployment-ready",
"/app",
"/data"
]
for expected_path in expected_paths:
if expected_path in handler.base_paths:
logger.info(f"βœ… Found expected path: {expected_path}")
else:
logger.warning(f"⚠️ Missing expected path: {expected_path}")
logger.info(f"πŸ“ Total search paths: {len(handler.base_paths)}")
logger.info("βœ… Enhanced search paths configured correctly")
return True
except Exception as e:
logger.error(f"❌ Enhanced search paths test failed: {e}")
return False
def test_end_to_end_simulation(self):
"""Test end-to-end simulation of GAIA evaluation with files."""
logger.info("🎯 Testing end-to-end GAIA evaluation simulation...")
try:
# Simulate the app.py workflow
from app import DeploymentReadyGAIAAgent
# Create agent
agent = DeploymentReadyGAIAAgent()
# Simulate GAIA question data with file
question_data = {
"task_id": "test-excel-task",
"question": "What is the total sales amount in the attached Excel file?",
"file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
"Level": 1
}
# Extract data (simulating app.py logic)
task_id = question_data.get("task_id", "")
question_text = question_data.get("question", "")
file_name = question_data.get("file_name", "")
# Prepare files list
files = None
if file_name and file_name.strip():
files = [file_name.strip()]
# Copy test file to a location where it can be found
import shutil
excel_filename = os.path.basename(self.test_files['excel'])
target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}"
shutil.copy2(self.test_files['excel'], target_path)
try:
# Call agent (simulating app.py workflow)
if files:
submitted_answer = agent(question_text, files)
else:
submitted_answer = agent(question_text)
logger.info(f"🎯 End-to-end test response: {submitted_answer[:100]}...")
# Check for success indicators
if "error" not in submitted_answer.lower() and "file not found" not in submitted_answer.lower():
logger.info("βœ… End-to-end simulation successful")
return True
else:
logger.warning(f"⚠️ End-to-end simulation may have issues: {submitted_answer}")
return False
finally:
# Cleanup
if os.path.exists(target_path):
os.remove(target_path)
except Exception as e:
logger.error(f"❌ End-to-end simulation failed: {e}")
traceback.print_exc()
return False
def run_all_tests(self):
"""Run all validation tests."""
logger.info("πŸš€ Starting GAIA File Handling Fix Validation...")
tests = [
("Setup Test Files", self.setup_test_files),
("App File Extraction", self.test_app_file_extraction),
("Enhanced Search Paths", self.test_enhanced_search_paths),
("Agent File Processing", self.test_agent_file_processing),
("End-to-End Simulation", self.test_end_to_end_simulation),
]
results = {}
total_tests = len(tests)
passed_tests = 0
for test_name, test_func in tests:
logger.info(f"\n{'='*50}")
logger.info(f"πŸ§ͺ Running: {test_name}")
logger.info(f"{'='*50}")
try:
result = test_func()
results[test_name] = result
if result:
passed_tests += 1
logger.info(f"βœ… {test_name}: PASSED")
else:
logger.error(f"❌ {test_name}: FAILED")
except Exception as e:
logger.error(f"❌ {test_name}: FAILED with exception: {e}")
results[test_name] = False
# Summary
logger.info(f"\n{'='*60}")
logger.info("πŸ“Š GAIA FILE HANDLING FIX VALIDATION SUMMARY")
logger.info(f"{'='*60}")
logger.info(f"Total Tests: {total_tests}")
logger.info(f"Passed: {passed_tests}")
logger.info(f"Failed: {total_tests - passed_tests}")
logger.info(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
for test_name, result in results.items():
status = "βœ… PASSED" if result else "❌ FAILED"
logger.info(f" {test_name}: {status}")
if passed_tests == total_tests:
logger.info("\nπŸŽ‰ ALL TESTS PASSED! File handling fix is working correctly.")
logger.info("πŸš€ The GAIA evaluation should now process file-based questions successfully.")
else:
logger.warning(f"\n⚠️ {total_tests - passed_tests} tests failed. File handling fix needs attention.")
return passed_tests == total_tests
def cleanup(self):
"""Clean up test files."""
try:
import shutil
shutil.rmtree(self.temp_dir)
logger.info(f"🧹 Cleaned up test directory: {self.temp_dir}")
except Exception as e:
logger.warning(f"⚠️ Could not clean up test directory: {e}")
def main():
"""Main test execution."""
validator = GAIAFileHandlingFixValidator()
try:
success = validator.run_all_tests()
return 0 if success else 1
finally:
validator.cleanup()
if __name__ == "__main__":
exit_code = main()
sys.exit(exit_code)