Spaces:

JoachimVC
/

gaia-enhanced-agent

Running

File size: 14,059 Bytes

9a6a4dc

#!/usr/bin/env python3
"""
GAIA File Handling Fix Validation Test

This test validates that the file handling fix correctly:
1. Extracts file_name from GAIA evaluation API responses
2. Passes files to the agent's __call__ method
3. Agent processes files correctly with enhanced search paths
4. Resolves the "Error file not found" issues

Expected Result: All file-based questions should now process successfully
"""

import os
import sys
import tempfile
import json
import logging
import traceback
from pathlib import Path

# Add deployment-ready to path
sys.path.insert(0, '/workspaces/gaia-agent-python/deployment-ready')

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class GAIAFileHandlingFixValidator:
    """Validates the GAIA file handling fix."""
    
    def __init__(self):
        """Initialize the validator."""
        self.temp_dir = tempfile.mkdtemp(prefix="gaia_fix_test_")
        self.test_files = {}
        logger.info(f"🧪 Test directory: {self.temp_dir}")
        
    def setup_test_files(self):
        """Create test files that simulate GAIA evaluation files."""
        logger.info("📁 Setting up test files...")
        
        # 1. Excel file (simulating GAIA Excel question)
        excel_data = """Item,Category,Sales,Price
Burger,Food,150,8.99
Fries,Food,200,3.49
Soda,Beverage,180,2.99
Salad,Food,75,6.99
Coffee,Beverage,120,4.49"""
        
        excel_file = os.path.join(self.temp_dir, "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx")
        with open(excel_file, 'w') as f:
            f.write(excel_data)
        self.test_files['excel'] = excel_file
        logger.info(f"📊 Created Excel test file: {excel_file}")
        
        # 2. Python code file (simulating GAIA Python question)
        python_code = """#!/usr/bin/env python3
# Test Python code for GAIA evaluation
import math

def calculate_result():
    x = 15
    y = 8
    result = x * y + math.sqrt(64)
    return result

if __name__ == "__main__":
    final_result = calculate_result()
    print(f"Final result: {final_result}")
"""
        
        python_file = os.path.join(self.temp_dir, "f918266a-b3e0-4914-865d-4faa564f1aef.py")
        with open(python_file, 'w') as f:
            f.write(python_code)
        self.test_files['python'] = python_file
        logger.info(f"🐍 Created Python test file: {python_file}")
        
        # 3. PNG image file (simulating GAIA image question)
        # Create a simple text file with PNG extension for testing
        image_content = "PNG_IMAGE_PLACEHOLDER_FOR_TESTING"
        image_file = os.path.join(self.temp_dir, "cca530fc-4052-43b2-b130-b30968d8aa44.png")
        with open(image_file, 'w') as f:
            f.write(image_content)
        self.test_files['image'] = image_file
        logger.info(f"🖼️ Created PNG test file: {image_file}")
        
        return True
    
    def test_app_file_extraction(self):
        """Test that app.py correctly extracts file_name from question data."""
        logger.info("🔍 Testing app.py file extraction logic...")
        
        # Simulate GAIA question data structure
        test_question_data = {
            "task_id": "test-task-123",
            "question": "What is the total sales in the attached Excel file?",
            "file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
            "Level": 1
        }
        
        # Test the file extraction logic
        file_name = test_question_data.get("file_name", "")
        files = None
        if file_name and file_name.strip():
            files = [file_name.strip()]
        
        assert files is not None, "File extraction failed"
        assert len(files) == 1, "Should extract exactly one file"
        assert files[0] == "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx", "File name mismatch"
        
        logger.info("✅ App.py file extraction logic works correctly")
        return True
    
    def test_agent_file_processing(self):
        """Test that the agent can process files with enhanced search paths."""
        logger.info("🤖 Testing agent file processing...")
        
        try:
            # Import the fixed agent
            from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
            
            # Create agent instance
            agent = FixedGAIAAgent()
            logger.info("✅ Agent imported and initialized successfully")
            
            # Test 1: Process Excel file
            question = "What is the total sales amount in the attached Excel file?"
            excel_filename = os.path.basename(self.test_files['excel'])
            
            # Copy file to deployment-ready directory for testing
            import shutil
            target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}"
            shutil.copy2(self.test_files['excel'], target_path)
            
            try:
                response = agent(question, files=[excel_filename])
                logger.info(f"📊 Excel file processing response: {response[:100]}...")
                
                # Check if response indicates successful file processing
                if "error" not in response.lower() and "file not found" not in response.lower():
                    logger.info("✅ Excel file processed successfully")
                else:
                    logger.warning(f"⚠️ Excel file processing may have issues: {response}")
                    
            except Exception as e:
                logger.error(f"❌ Excel file processing failed: {e}")
                return False
            finally:
                # Cleanup
                if os.path.exists(target_path):
                    os.remove(target_path)
            
            # Test 2: Process Python file
            question = "What is the final numeric output from the attached Python code?"
            python_filename = os.path.basename(self.test_files['python'])
            
            target_path = f"/workspaces/gaia-agent-python/deployment-ready/{python_filename}"
            shutil.copy2(self.test_files['python'], target_path)
            
            try:
                response = agent(question, files=[python_filename])
                logger.info(f"🐍 Python file processing response: {response[:100]}...")
                
                if "error" not in response.lower() and "file not found" not in response.lower():
                    logger.info("✅ Python file processed successfully")
                else:
                    logger.warning(f"⚠️ Python file processing may have issues: {response}")
                    
            except Exception as e:
                logger.error(f"❌ Python file processing failed: {e}")
                return False
            finally:
                # Cleanup
                if os.path.exists(target_path):
                    os.remove(target_path)
            
            return True
            
        except ImportError as e:
            logger.error(f"❌ Could not import agent: {e}")
            return False
        except Exception as e:
            logger.error(f"❌ Agent file processing test failed: {e}")
            traceback.print_exc()
            return False
    
    def test_enhanced_search_paths(self):
        """Test that enhanced search paths work correctly."""
        logger.info("🔍 Testing enhanced search paths...")
        
        try:
            from utils.file_handler import EnhancedFileHandler
            
            # Create file handler
            handler = EnhancedFileHandler()
            
            # Check that GAIA-specific paths are included
            expected_paths = [
                "/workspaces/gaia-agent-python/deployment-ready",
                "/app",
                "/data"
            ]
            
            for expected_path in expected_paths:
                if expected_path in handler.base_paths:
                    logger.info(f"✅ Found expected path: {expected_path}")
                else:
                    logger.warning(f"⚠️ Missing expected path: {expected_path}")
            
            logger.info(f"📁 Total search paths: {len(handler.base_paths)}")
            logger.info("✅ Enhanced search paths configured correctly")
            return True
            
        except Exception as e:
            logger.error(f"❌ Enhanced search paths test failed: {e}")
            return False
    
    def test_end_to_end_simulation(self):
        """Test end-to-end simulation of GAIA evaluation with files."""
        logger.info("🎯 Testing end-to-end GAIA evaluation simulation...")
        
        try:
            # Simulate the app.py workflow
            from app import DeploymentReadyGAIAAgent
            
            # Create agent
            agent = DeploymentReadyGAIAAgent()
            
            # Simulate GAIA question data with file
            question_data = {
                "task_id": "test-excel-task",
                "question": "What is the total sales amount in the attached Excel file?",
                "file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
                "Level": 1
            }
            
            # Extract data (simulating app.py logic)
            task_id = question_data.get("task_id", "")
            question_text = question_data.get("question", "")
            file_name = question_data.get("file_name", "")
            
            # Prepare files list
            files = None
            if file_name and file_name.strip():
                files = [file_name.strip()]
            
            # Copy test file to a location where it can be found
            import shutil
            excel_filename = os.path.basename(self.test_files['excel'])
            target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}"
            shutil.copy2(self.test_files['excel'], target_path)
            
            try:
                # Call agent (simulating app.py workflow)
                if files:
                    submitted_answer = agent(question_text, files)
                else:
                    submitted_answer = agent(question_text)
                
                logger.info(f"🎯 End-to-end test response: {submitted_answer[:100]}...")
                
                # Check for success indicators
                if "error" not in submitted_answer.lower() and "file not found" not in submitted_answer.lower():
                    logger.info("✅ End-to-end simulation successful")
                    return True
                else:
                    logger.warning(f"⚠️ End-to-end simulation may have issues: {submitted_answer}")
                    return False
                    
            finally:
                # Cleanup
                if os.path.exists(target_path):
                    os.remove(target_path)
            
        except Exception as e:
            logger.error(f"❌ End-to-end simulation failed: {e}")
            traceback.print_exc()
            return False
    
    def run_all_tests(self):
        """Run all validation tests."""
        logger.info("🚀 Starting GAIA File Handling Fix Validation...")
        
        tests = [
            ("Setup Test Files", self.setup_test_files),
            ("App File Extraction", self.test_app_file_extraction),
            ("Enhanced Search Paths", self.test_enhanced_search_paths),
            ("Agent File Processing", self.test_agent_file_processing),
            ("End-to-End Simulation", self.test_end_to_end_simulation),
        ]
        
        results = {}
        total_tests = len(tests)
        passed_tests = 0
        
        for test_name, test_func in tests:
            logger.info(f"\n{'='*50}")
            logger.info(f"🧪 Running: {test_name}")
            logger.info(f"{'='*50}")
            
            try:
                result = test_func()
                results[test_name] = result
                if result:
                    passed_tests += 1
                    logger.info(f"✅ {test_name}: PASSED")
                else:
                    logger.error(f"❌ {test_name}: FAILED")
            except Exception as e:
                logger.error(f"❌ {test_name}: FAILED with exception: {e}")
                results[test_name] = False
        
        # Summary
        logger.info(f"\n{'='*60}")
        logger.info("📊 GAIA FILE HANDLING FIX VALIDATION SUMMARY")
        logger.info(f"{'='*60}")
        logger.info(f"Total Tests: {total_tests}")
        logger.info(f"Passed: {passed_tests}")
        logger.info(f"Failed: {total_tests - passed_tests}")
        logger.info(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
        
        for test_name, result in results.items():
            status = "✅ PASSED" if result else "❌ FAILED"
            logger.info(f"  {test_name}: {status}")
        
        if passed_tests == total_tests:
            logger.info("\n🎉 ALL TESTS PASSED! File handling fix is working correctly.")
            logger.info("🚀 The GAIA evaluation should now process file-based questions successfully.")
        else:
            logger.warning(f"\n⚠️ {total_tests - passed_tests} tests failed. File handling fix needs attention.")
        
        return passed_tests == total_tests
    
    def cleanup(self):
        """Clean up test files."""
        try:
            import shutil
            shutil.rmtree(self.temp_dir)
            logger.info(f"🧹 Cleaned up test directory: {self.temp_dir}")
        except Exception as e:
            logger.warning(f"⚠️ Could not clean up test directory: {e}")

def main():
    """Main test execution."""
    validator = GAIAFileHandlingFixValidator()
    
    try:
        success = validator.run_all_tests()
        return 0 if success else 1
    finally:
        validator.cleanup()

if __name__ == "__main__":
    exit_code = main()
    sys.exit(exit_code)