Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
GAIA File Handling Fix Validation Test | |
This test validates that the file handling fix correctly: | |
1. Extracts file_name from GAIA evaluation API responses | |
2. Passes files to the agent's __call__ method | |
3. Agent processes files correctly with enhanced search paths | |
4. Resolves the "Error file not found" issues | |
Expected Result: All file-based questions should now process successfully | |
""" | |
import os | |
import sys | |
import tempfile | |
import json | |
import logging | |
import traceback | |
from pathlib import Path | |
# Add deployment-ready to path | |
sys.path.insert(0, '/workspaces/gaia-agent-python/deployment-ready') | |
# Configure logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
logger = logging.getLogger(__name__) | |
class GAIAFileHandlingFixValidator: | |
"""Validates the GAIA file handling fix.""" | |
def __init__(self): | |
"""Initialize the validator.""" | |
self.temp_dir = tempfile.mkdtemp(prefix="gaia_fix_test_") | |
self.test_files = {} | |
logger.info(f"π§ͺ Test directory: {self.temp_dir}") | |
def setup_test_files(self): | |
"""Create test files that simulate GAIA evaluation files.""" | |
logger.info("π Setting up test files...") | |
# 1. Excel file (simulating GAIA Excel question) | |
excel_data = """Item,Category,Sales,Price | |
Burger,Food,150,8.99 | |
Fries,Food,200,3.49 | |
Soda,Beverage,180,2.99 | |
Salad,Food,75,6.99 | |
Coffee,Beverage,120,4.49""" | |
excel_file = os.path.join(self.temp_dir, "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx") | |
with open(excel_file, 'w') as f: | |
f.write(excel_data) | |
self.test_files['excel'] = excel_file | |
logger.info(f"π Created Excel test file: {excel_file}") | |
# 2. Python code file (simulating GAIA Python question) | |
python_code = """#!/usr/bin/env python3 | |
# Test Python code for GAIA evaluation | |
import math | |
def calculate_result(): | |
x = 15 | |
y = 8 | |
result = x * y + math.sqrt(64) | |
return result | |
if __name__ == "__main__": | |
final_result = calculate_result() | |
print(f"Final result: {final_result}") | |
""" | |
python_file = os.path.join(self.temp_dir, "f918266a-b3e0-4914-865d-4faa564f1aef.py") | |
with open(python_file, 'w') as f: | |
f.write(python_code) | |
self.test_files['python'] = python_file | |
logger.info(f"π Created Python test file: {python_file}") | |
# 3. PNG image file (simulating GAIA image question) | |
# Create a simple text file with PNG extension for testing | |
image_content = "PNG_IMAGE_PLACEHOLDER_FOR_TESTING" | |
image_file = os.path.join(self.temp_dir, "cca530fc-4052-43b2-b130-b30968d8aa44.png") | |
with open(image_file, 'w') as f: | |
f.write(image_content) | |
self.test_files['image'] = image_file | |
logger.info(f"πΌοΈ Created PNG test file: {image_file}") | |
return True | |
def test_app_file_extraction(self): | |
"""Test that app.py correctly extracts file_name from question data.""" | |
logger.info("π Testing app.py file extraction logic...") | |
# Simulate GAIA question data structure | |
test_question_data = { | |
"task_id": "test-task-123", | |
"question": "What is the total sales in the attached Excel file?", | |
"file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx", | |
"Level": 1 | |
} | |
# Test the file extraction logic | |
file_name = test_question_data.get("file_name", "") | |
files = None | |
if file_name and file_name.strip(): | |
files = [file_name.strip()] | |
assert files is not None, "File extraction failed" | |
assert len(files) == 1, "Should extract exactly one file" | |
assert files[0] == "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx", "File name mismatch" | |
logger.info("β App.py file extraction logic works correctly") | |
return True | |
def test_agent_file_processing(self): | |
"""Test that the agent can process files with enhanced search paths.""" | |
logger.info("π€ Testing agent file processing...") | |
try: | |
# Import the fixed agent | |
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent | |
# Create agent instance | |
agent = FixedGAIAAgent() | |
logger.info("β Agent imported and initialized successfully") | |
# Test 1: Process Excel file | |
question = "What is the total sales amount in the attached Excel file?" | |
excel_filename = os.path.basename(self.test_files['excel']) | |
# Copy file to deployment-ready directory for testing | |
import shutil | |
target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}" | |
shutil.copy2(self.test_files['excel'], target_path) | |
try: | |
response = agent(question, files=[excel_filename]) | |
logger.info(f"π Excel file processing response: {response[:100]}...") | |
# Check if response indicates successful file processing | |
if "error" not in response.lower() and "file not found" not in response.lower(): | |
logger.info("β Excel file processed successfully") | |
else: | |
logger.warning(f"β οΈ Excel file processing may have issues: {response}") | |
except Exception as e: | |
logger.error(f"β Excel file processing failed: {e}") | |
return False | |
finally: | |
# Cleanup | |
if os.path.exists(target_path): | |
os.remove(target_path) | |
# Test 2: Process Python file | |
question = "What is the final numeric output from the attached Python code?" | |
python_filename = os.path.basename(self.test_files['python']) | |
target_path = f"/workspaces/gaia-agent-python/deployment-ready/{python_filename}" | |
shutil.copy2(self.test_files['python'], target_path) | |
try: | |
response = agent(question, files=[python_filename]) | |
logger.info(f"π Python file processing response: {response[:100]}...") | |
if "error" not in response.lower() and "file not found" not in response.lower(): | |
logger.info("β Python file processed successfully") | |
else: | |
logger.warning(f"β οΈ Python file processing may have issues: {response}") | |
except Exception as e: | |
logger.error(f"β Python file processing failed: {e}") | |
return False | |
finally: | |
# Cleanup | |
if os.path.exists(target_path): | |
os.remove(target_path) | |
return True | |
except ImportError as e: | |
logger.error(f"β Could not import agent: {e}") | |
return False | |
except Exception as e: | |
logger.error(f"β Agent file processing test failed: {e}") | |
traceback.print_exc() | |
return False | |
def test_enhanced_search_paths(self): | |
"""Test that enhanced search paths work correctly.""" | |
logger.info("π Testing enhanced search paths...") | |
try: | |
from utils.file_handler import EnhancedFileHandler | |
# Create file handler | |
handler = EnhancedFileHandler() | |
# Check that GAIA-specific paths are included | |
expected_paths = [ | |
"/workspaces/gaia-agent-python/deployment-ready", | |
"/app", | |
"/data" | |
] | |
for expected_path in expected_paths: | |
if expected_path in handler.base_paths: | |
logger.info(f"β Found expected path: {expected_path}") | |
else: | |
logger.warning(f"β οΈ Missing expected path: {expected_path}") | |
logger.info(f"π Total search paths: {len(handler.base_paths)}") | |
logger.info("β Enhanced search paths configured correctly") | |
return True | |
except Exception as e: | |
logger.error(f"β Enhanced search paths test failed: {e}") | |
return False | |
def test_end_to_end_simulation(self): | |
"""Test end-to-end simulation of GAIA evaluation with files.""" | |
logger.info("π― Testing end-to-end GAIA evaluation simulation...") | |
try: | |
# Simulate the app.py workflow | |
from app import DeploymentReadyGAIAAgent | |
# Create agent | |
agent = DeploymentReadyGAIAAgent() | |
# Simulate GAIA question data with file | |
question_data = { | |
"task_id": "test-excel-task", | |
"question": "What is the total sales amount in the attached Excel file?", | |
"file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx", | |
"Level": 1 | |
} | |
# Extract data (simulating app.py logic) | |
task_id = question_data.get("task_id", "") | |
question_text = question_data.get("question", "") | |
file_name = question_data.get("file_name", "") | |
# Prepare files list | |
files = None | |
if file_name and file_name.strip(): | |
files = [file_name.strip()] | |
# Copy test file to a location where it can be found | |
import shutil | |
excel_filename = os.path.basename(self.test_files['excel']) | |
target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}" | |
shutil.copy2(self.test_files['excel'], target_path) | |
try: | |
# Call agent (simulating app.py workflow) | |
if files: | |
submitted_answer = agent(question_text, files) | |
else: | |
submitted_answer = agent(question_text) | |
logger.info(f"π― End-to-end test response: {submitted_answer[:100]}...") | |
# Check for success indicators | |
if "error" not in submitted_answer.lower() and "file not found" not in submitted_answer.lower(): | |
logger.info("β End-to-end simulation successful") | |
return True | |
else: | |
logger.warning(f"β οΈ End-to-end simulation may have issues: {submitted_answer}") | |
return False | |
finally: | |
# Cleanup | |
if os.path.exists(target_path): | |
os.remove(target_path) | |
except Exception as e: | |
logger.error(f"β End-to-end simulation failed: {e}") | |
traceback.print_exc() | |
return False | |
def run_all_tests(self): | |
"""Run all validation tests.""" | |
logger.info("π Starting GAIA File Handling Fix Validation...") | |
tests = [ | |
("Setup Test Files", self.setup_test_files), | |
("App File Extraction", self.test_app_file_extraction), | |
("Enhanced Search Paths", self.test_enhanced_search_paths), | |
("Agent File Processing", self.test_agent_file_processing), | |
("End-to-End Simulation", self.test_end_to_end_simulation), | |
] | |
results = {} | |
total_tests = len(tests) | |
passed_tests = 0 | |
for test_name, test_func in tests: | |
logger.info(f"\n{'='*50}") | |
logger.info(f"π§ͺ Running: {test_name}") | |
logger.info(f"{'='*50}") | |
try: | |
result = test_func() | |
results[test_name] = result | |
if result: | |
passed_tests += 1 | |
logger.info(f"β {test_name}: PASSED") | |
else: | |
logger.error(f"β {test_name}: FAILED") | |
except Exception as e: | |
logger.error(f"β {test_name}: FAILED with exception: {e}") | |
results[test_name] = False | |
# Summary | |
logger.info(f"\n{'='*60}") | |
logger.info("π GAIA FILE HANDLING FIX VALIDATION SUMMARY") | |
logger.info(f"{'='*60}") | |
logger.info(f"Total Tests: {total_tests}") | |
logger.info(f"Passed: {passed_tests}") | |
logger.info(f"Failed: {total_tests - passed_tests}") | |
logger.info(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%") | |
for test_name, result in results.items(): | |
status = "β PASSED" if result else "β FAILED" | |
logger.info(f" {test_name}: {status}") | |
if passed_tests == total_tests: | |
logger.info("\nπ ALL TESTS PASSED! File handling fix is working correctly.") | |
logger.info("π The GAIA evaluation should now process file-based questions successfully.") | |
else: | |
logger.warning(f"\nβ οΈ {total_tests - passed_tests} tests failed. File handling fix needs attention.") | |
return passed_tests == total_tests | |
def cleanup(self): | |
"""Clean up test files.""" | |
try: | |
import shutil | |
shutil.rmtree(self.temp_dir) | |
logger.info(f"π§Ή Cleaned up test directory: {self.temp_dir}") | |
except Exception as e: | |
logger.warning(f"β οΈ Could not clean up test directory: {e}") | |
def main(): | |
"""Main test execution.""" | |
validator = GAIAFileHandlingFixValidator() | |
try: | |
success = validator.run_all_tests() | |
return 0 if success else 1 | |
finally: | |
validator.cleanup() | |
if __name__ == "__main__": | |
exit_code = main() | |
sys.exit(exit_code) |