#!/usr/bin/env python3 """ GAIA File Handling Fix Validation Test This test validates that the file handling fix correctly: 1. Extracts file_name from GAIA evaluation API responses 2. Passes files to the agent's __call__ method 3. Agent processes files correctly with enhanced search paths 4. Resolves the "Error file not found" issues Expected Result: All file-based questions should now process successfully """ import os import sys import tempfile import json import logging import traceback from pathlib import Path # Add deployment-ready to path sys.path.insert(0, '/workspaces/gaia-agent-python/deployment-ready') # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class GAIAFileHandlingFixValidator: """Validates the GAIA file handling fix.""" def __init__(self): """Initialize the validator.""" self.temp_dir = tempfile.mkdtemp(prefix="gaia_fix_test_") self.test_files = {} logger.info(f"๐Ÿงช Test directory: {self.temp_dir}") def setup_test_files(self): """Create test files that simulate GAIA evaluation files.""" logger.info("๐Ÿ“ Setting up test files...") # 1. Excel file (simulating GAIA Excel question) excel_data = """Item,Category,Sales,Price Burger,Food,150,8.99 Fries,Food,200,3.49 Soda,Beverage,180,2.99 Salad,Food,75,6.99 Coffee,Beverage,120,4.49""" excel_file = os.path.join(self.temp_dir, "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx") with open(excel_file, 'w') as f: f.write(excel_data) self.test_files['excel'] = excel_file logger.info(f"๐Ÿ“Š Created Excel test file: {excel_file}") # 2. Python code file (simulating GAIA Python question) python_code = """#!/usr/bin/env python3 # Test Python code for GAIA evaluation import math def calculate_result(): x = 15 y = 8 result = x * y + math.sqrt(64) return result if __name__ == "__main__": final_result = calculate_result() print(f"Final result: {final_result}") """ python_file = os.path.join(self.temp_dir, "f918266a-b3e0-4914-865d-4faa564f1aef.py") with open(python_file, 'w') as f: f.write(python_code) self.test_files['python'] = python_file logger.info(f"๐Ÿ Created Python test file: {python_file}") # 3. PNG image file (simulating GAIA image question) # Create a simple text file with PNG extension for testing image_content = "PNG_IMAGE_PLACEHOLDER_FOR_TESTING" image_file = os.path.join(self.temp_dir, "cca530fc-4052-43b2-b130-b30968d8aa44.png") with open(image_file, 'w') as f: f.write(image_content) self.test_files['image'] = image_file logger.info(f"๐Ÿ–ผ๏ธ Created PNG test file: {image_file}") return True def test_app_file_extraction(self): """Test that app.py correctly extracts file_name from question data.""" logger.info("๐Ÿ” Testing app.py file extraction logic...") # Simulate GAIA question data structure test_question_data = { "task_id": "test-task-123", "question": "What is the total sales in the attached Excel file?", "file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx", "Level": 1 } # Test the file extraction logic file_name = test_question_data.get("file_name", "") files = None if file_name and file_name.strip(): files = [file_name.strip()] assert files is not None, "File extraction failed" assert len(files) == 1, "Should extract exactly one file" assert files[0] == "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx", "File name mismatch" logger.info("โœ… App.py file extraction logic works correctly") return True def test_agent_file_processing(self): """Test that the agent can process files with enhanced search paths.""" logger.info("๐Ÿค– Testing agent file processing...") try: # Import the fixed agent from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent # Create agent instance agent = FixedGAIAAgent() logger.info("โœ… Agent imported and initialized successfully") # Test 1: Process Excel file question = "What is the total sales amount in the attached Excel file?" excel_filename = os.path.basename(self.test_files['excel']) # Copy file to deployment-ready directory for testing import shutil target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}" shutil.copy2(self.test_files['excel'], target_path) try: response = agent(question, files=[excel_filename]) logger.info(f"๐Ÿ“Š Excel file processing response: {response[:100]}...") # Check if response indicates successful file processing if "error" not in response.lower() and "file not found" not in response.lower(): logger.info("โœ… Excel file processed successfully") else: logger.warning(f"โš ๏ธ Excel file processing may have issues: {response}") except Exception as e: logger.error(f"โŒ Excel file processing failed: {e}") return False finally: # Cleanup if os.path.exists(target_path): os.remove(target_path) # Test 2: Process Python file question = "What is the final numeric output from the attached Python code?" python_filename = os.path.basename(self.test_files['python']) target_path = f"/workspaces/gaia-agent-python/deployment-ready/{python_filename}" shutil.copy2(self.test_files['python'], target_path) try: response = agent(question, files=[python_filename]) logger.info(f"๐Ÿ Python file processing response: {response[:100]}...") if "error" not in response.lower() and "file not found" not in response.lower(): logger.info("โœ… Python file processed successfully") else: logger.warning(f"โš ๏ธ Python file processing may have issues: {response}") except Exception as e: logger.error(f"โŒ Python file processing failed: {e}") return False finally: # Cleanup if os.path.exists(target_path): os.remove(target_path) return True except ImportError as e: logger.error(f"โŒ Could not import agent: {e}") return False except Exception as e: logger.error(f"โŒ Agent file processing test failed: {e}") traceback.print_exc() return False def test_enhanced_search_paths(self): """Test that enhanced search paths work correctly.""" logger.info("๐Ÿ” Testing enhanced search paths...") try: from utils.file_handler import EnhancedFileHandler # Create file handler handler = EnhancedFileHandler() # Check that GAIA-specific paths are included expected_paths = [ "/workspaces/gaia-agent-python/deployment-ready", "/app", "/data" ] for expected_path in expected_paths: if expected_path in handler.base_paths: logger.info(f"โœ… Found expected path: {expected_path}") else: logger.warning(f"โš ๏ธ Missing expected path: {expected_path}") logger.info(f"๐Ÿ“ Total search paths: {len(handler.base_paths)}") logger.info("โœ… Enhanced search paths configured correctly") return True except Exception as e: logger.error(f"โŒ Enhanced search paths test failed: {e}") return False def test_end_to_end_simulation(self): """Test end-to-end simulation of GAIA evaluation with files.""" logger.info("๐ŸŽฏ Testing end-to-end GAIA evaluation simulation...") try: # Simulate the app.py workflow from app import DeploymentReadyGAIAAgent # Create agent agent = DeploymentReadyGAIAAgent() # Simulate GAIA question data with file question_data = { "task_id": "test-excel-task", "question": "What is the total sales amount in the attached Excel file?", "file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx", "Level": 1 } # Extract data (simulating app.py logic) task_id = question_data.get("task_id", "") question_text = question_data.get("question", "") file_name = question_data.get("file_name", "") # Prepare files list files = None if file_name and file_name.strip(): files = [file_name.strip()] # Copy test file to a location where it can be found import shutil excel_filename = os.path.basename(self.test_files['excel']) target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}" shutil.copy2(self.test_files['excel'], target_path) try: # Call agent (simulating app.py workflow) if files: submitted_answer = agent(question_text, files) else: submitted_answer = agent(question_text) logger.info(f"๐ŸŽฏ End-to-end test response: {submitted_answer[:100]}...") # Check for success indicators if "error" not in submitted_answer.lower() and "file not found" not in submitted_answer.lower(): logger.info("โœ… End-to-end simulation successful") return True else: logger.warning(f"โš ๏ธ End-to-end simulation may have issues: {submitted_answer}") return False finally: # Cleanup if os.path.exists(target_path): os.remove(target_path) except Exception as e: logger.error(f"โŒ End-to-end simulation failed: {e}") traceback.print_exc() return False def run_all_tests(self): """Run all validation tests.""" logger.info("๐Ÿš€ Starting GAIA File Handling Fix Validation...") tests = [ ("Setup Test Files", self.setup_test_files), ("App File Extraction", self.test_app_file_extraction), ("Enhanced Search Paths", self.test_enhanced_search_paths), ("Agent File Processing", self.test_agent_file_processing), ("End-to-End Simulation", self.test_end_to_end_simulation), ] results = {} total_tests = len(tests) passed_tests = 0 for test_name, test_func in tests: logger.info(f"\n{'='*50}") logger.info(f"๐Ÿงช Running: {test_name}") logger.info(f"{'='*50}") try: result = test_func() results[test_name] = result if result: passed_tests += 1 logger.info(f"โœ… {test_name}: PASSED") else: logger.error(f"โŒ {test_name}: FAILED") except Exception as e: logger.error(f"โŒ {test_name}: FAILED with exception: {e}") results[test_name] = False # Summary logger.info(f"\n{'='*60}") logger.info("๐Ÿ“Š GAIA FILE HANDLING FIX VALIDATION SUMMARY") logger.info(f"{'='*60}") logger.info(f"Total Tests: {total_tests}") logger.info(f"Passed: {passed_tests}") logger.info(f"Failed: {total_tests - passed_tests}") logger.info(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%") for test_name, result in results.items(): status = "โœ… PASSED" if result else "โŒ FAILED" logger.info(f" {test_name}: {status}") if passed_tests == total_tests: logger.info("\n๐ŸŽ‰ ALL TESTS PASSED! File handling fix is working correctly.") logger.info("๐Ÿš€ The GAIA evaluation should now process file-based questions successfully.") else: logger.warning(f"\nโš ๏ธ {total_tests - passed_tests} tests failed. File handling fix needs attention.") return passed_tests == total_tests def cleanup(self): """Clean up test files.""" try: import shutil shutil.rmtree(self.temp_dir) logger.info(f"๐Ÿงน Cleaned up test directory: {self.temp_dir}") except Exception as e: logger.warning(f"โš ๏ธ Could not clean up test directory: {e}") def main(): """Main test execution.""" validator = GAIAFileHandlingFixValidator() try: success = validator.run_all_tests() return 0 if success else 1 finally: validator.cleanup() if __name__ == "__main__": exit_code = main() sys.exit(exit_code)