File size: 14,059 Bytes
9a6a4dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
#!/usr/bin/env python3
"""
GAIA File Handling Fix Validation Test

This test validates that the file handling fix correctly:
1. Extracts file_name from GAIA evaluation API responses
2. Passes files to the agent's __call__ method
3. Agent processes files correctly with enhanced search paths
4. Resolves the "Error file not found" issues

Expected Result: All file-based questions should now process successfully
"""

import os
import sys
import tempfile
import json
import logging
import traceback
from pathlib import Path

# Add deployment-ready to path
sys.path.insert(0, '/workspaces/gaia-agent-python/deployment-ready')

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class GAIAFileHandlingFixValidator:
    """Validates the GAIA file handling fix."""
    
    def __init__(self):
        """Initialize the validator."""
        self.temp_dir = tempfile.mkdtemp(prefix="gaia_fix_test_")
        self.test_files = {}
        logger.info(f"πŸ§ͺ Test directory: {self.temp_dir}")
        
    def setup_test_files(self):
        """Create test files that simulate GAIA evaluation files."""
        logger.info("πŸ“ Setting up test files...")
        
        # 1. Excel file (simulating GAIA Excel question)
        excel_data = """Item,Category,Sales,Price
Burger,Food,150,8.99
Fries,Food,200,3.49
Soda,Beverage,180,2.99
Salad,Food,75,6.99
Coffee,Beverage,120,4.49"""
        
        excel_file = os.path.join(self.temp_dir, "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx")
        with open(excel_file, 'w') as f:
            f.write(excel_data)
        self.test_files['excel'] = excel_file
        logger.info(f"πŸ“Š Created Excel test file: {excel_file}")
        
        # 2. Python code file (simulating GAIA Python question)
        python_code = """#!/usr/bin/env python3
# Test Python code for GAIA evaluation
import math

def calculate_result():
    x = 15
    y = 8
    result = x * y + math.sqrt(64)
    return result

if __name__ == "__main__":
    final_result = calculate_result()
    print(f"Final result: {final_result}")
"""
        
        python_file = os.path.join(self.temp_dir, "f918266a-b3e0-4914-865d-4faa564f1aef.py")
        with open(python_file, 'w') as f:
            f.write(python_code)
        self.test_files['python'] = python_file
        logger.info(f"🐍 Created Python test file: {python_file}")
        
        # 3. PNG image file (simulating GAIA image question)
        # Create a simple text file with PNG extension for testing
        image_content = "PNG_IMAGE_PLACEHOLDER_FOR_TESTING"
        image_file = os.path.join(self.temp_dir, "cca530fc-4052-43b2-b130-b30968d8aa44.png")
        with open(image_file, 'w') as f:
            f.write(image_content)
        self.test_files['image'] = image_file
        logger.info(f"πŸ–ΌοΈ Created PNG test file: {image_file}")
        
        return True
    
    def test_app_file_extraction(self):
        """Test that app.py correctly extracts file_name from question data."""
        logger.info("πŸ” Testing app.py file extraction logic...")
        
        # Simulate GAIA question data structure
        test_question_data = {
            "task_id": "test-task-123",
            "question": "What is the total sales in the attached Excel file?",
            "file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
            "Level": 1
        }
        
        # Test the file extraction logic
        file_name = test_question_data.get("file_name", "")
        files = None
        if file_name and file_name.strip():
            files = [file_name.strip()]
        
        assert files is not None, "File extraction failed"
        assert len(files) == 1, "Should extract exactly one file"
        assert files[0] == "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx", "File name mismatch"
        
        logger.info("βœ… App.py file extraction logic works correctly")
        return True
    
    def test_agent_file_processing(self):
        """Test that the agent can process files with enhanced search paths."""
        logger.info("πŸ€– Testing agent file processing...")
        
        try:
            # Import the fixed agent
            from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
            
            # Create agent instance
            agent = FixedGAIAAgent()
            logger.info("βœ… Agent imported and initialized successfully")
            
            # Test 1: Process Excel file
            question = "What is the total sales amount in the attached Excel file?"
            excel_filename = os.path.basename(self.test_files['excel'])
            
            # Copy file to deployment-ready directory for testing
            import shutil
            target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}"
            shutil.copy2(self.test_files['excel'], target_path)
            
            try:
                response = agent(question, files=[excel_filename])
                logger.info(f"πŸ“Š Excel file processing response: {response[:100]}...")
                
                # Check if response indicates successful file processing
                if "error" not in response.lower() and "file not found" not in response.lower():
                    logger.info("βœ… Excel file processed successfully")
                else:
                    logger.warning(f"⚠️ Excel file processing may have issues: {response}")
                    
            except Exception as e:
                logger.error(f"❌ Excel file processing failed: {e}")
                return False
            finally:
                # Cleanup
                if os.path.exists(target_path):
                    os.remove(target_path)
            
            # Test 2: Process Python file
            question = "What is the final numeric output from the attached Python code?"
            python_filename = os.path.basename(self.test_files['python'])
            
            target_path = f"/workspaces/gaia-agent-python/deployment-ready/{python_filename}"
            shutil.copy2(self.test_files['python'], target_path)
            
            try:
                response = agent(question, files=[python_filename])
                logger.info(f"🐍 Python file processing response: {response[:100]}...")
                
                if "error" not in response.lower() and "file not found" not in response.lower():
                    logger.info("βœ… Python file processed successfully")
                else:
                    logger.warning(f"⚠️ Python file processing may have issues: {response}")
                    
            except Exception as e:
                logger.error(f"❌ Python file processing failed: {e}")
                return False
            finally:
                # Cleanup
                if os.path.exists(target_path):
                    os.remove(target_path)
            
            return True
            
        except ImportError as e:
            logger.error(f"❌ Could not import agent: {e}")
            return False
        except Exception as e:
            logger.error(f"❌ Agent file processing test failed: {e}")
            traceback.print_exc()
            return False
    
    def test_enhanced_search_paths(self):
        """Test that enhanced search paths work correctly."""
        logger.info("πŸ” Testing enhanced search paths...")
        
        try:
            from utils.file_handler import EnhancedFileHandler
            
            # Create file handler
            handler = EnhancedFileHandler()
            
            # Check that GAIA-specific paths are included
            expected_paths = [
                "/workspaces/gaia-agent-python/deployment-ready",
                "/app",
                "/data"
            ]
            
            for expected_path in expected_paths:
                if expected_path in handler.base_paths:
                    logger.info(f"βœ… Found expected path: {expected_path}")
                else:
                    logger.warning(f"⚠️ Missing expected path: {expected_path}")
            
            logger.info(f"πŸ“ Total search paths: {len(handler.base_paths)}")
            logger.info("βœ… Enhanced search paths configured correctly")
            return True
            
        except Exception as e:
            logger.error(f"❌ Enhanced search paths test failed: {e}")
            return False
    
    def test_end_to_end_simulation(self):
        """Test end-to-end simulation of GAIA evaluation with files."""
        logger.info("🎯 Testing end-to-end GAIA evaluation simulation...")
        
        try:
            # Simulate the app.py workflow
            from app import DeploymentReadyGAIAAgent
            
            # Create agent
            agent = DeploymentReadyGAIAAgent()
            
            # Simulate GAIA question data with file
            question_data = {
                "task_id": "test-excel-task",
                "question": "What is the total sales amount in the attached Excel file?",
                "file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
                "Level": 1
            }
            
            # Extract data (simulating app.py logic)
            task_id = question_data.get("task_id", "")
            question_text = question_data.get("question", "")
            file_name = question_data.get("file_name", "")
            
            # Prepare files list
            files = None
            if file_name and file_name.strip():
                files = [file_name.strip()]
            
            # Copy test file to a location where it can be found
            import shutil
            excel_filename = os.path.basename(self.test_files['excel'])
            target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}"
            shutil.copy2(self.test_files['excel'], target_path)
            
            try:
                # Call agent (simulating app.py workflow)
                if files:
                    submitted_answer = agent(question_text, files)
                else:
                    submitted_answer = agent(question_text)
                
                logger.info(f"🎯 End-to-end test response: {submitted_answer[:100]}...")
                
                # Check for success indicators
                if "error" not in submitted_answer.lower() and "file not found" not in submitted_answer.lower():
                    logger.info("βœ… End-to-end simulation successful")
                    return True
                else:
                    logger.warning(f"⚠️ End-to-end simulation may have issues: {submitted_answer}")
                    return False
                    
            finally:
                # Cleanup
                if os.path.exists(target_path):
                    os.remove(target_path)
            
        except Exception as e:
            logger.error(f"❌ End-to-end simulation failed: {e}")
            traceback.print_exc()
            return False
    
    def run_all_tests(self):
        """Run all validation tests."""
        logger.info("πŸš€ Starting GAIA File Handling Fix Validation...")
        
        tests = [
            ("Setup Test Files", self.setup_test_files),
            ("App File Extraction", self.test_app_file_extraction),
            ("Enhanced Search Paths", self.test_enhanced_search_paths),
            ("Agent File Processing", self.test_agent_file_processing),
            ("End-to-End Simulation", self.test_end_to_end_simulation),
        ]
        
        results = {}
        total_tests = len(tests)
        passed_tests = 0
        
        for test_name, test_func in tests:
            logger.info(f"\n{'='*50}")
            logger.info(f"πŸ§ͺ Running: {test_name}")
            logger.info(f"{'='*50}")
            
            try:
                result = test_func()
                results[test_name] = result
                if result:
                    passed_tests += 1
                    logger.info(f"βœ… {test_name}: PASSED")
                else:
                    logger.error(f"❌ {test_name}: FAILED")
            except Exception as e:
                logger.error(f"❌ {test_name}: FAILED with exception: {e}")
                results[test_name] = False
        
        # Summary
        logger.info(f"\n{'='*60}")
        logger.info("πŸ“Š GAIA FILE HANDLING FIX VALIDATION SUMMARY")
        logger.info(f"{'='*60}")
        logger.info(f"Total Tests: {total_tests}")
        logger.info(f"Passed: {passed_tests}")
        logger.info(f"Failed: {total_tests - passed_tests}")
        logger.info(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
        
        for test_name, result in results.items():
            status = "βœ… PASSED" if result else "❌ FAILED"
            logger.info(f"  {test_name}: {status}")
        
        if passed_tests == total_tests:
            logger.info("\nπŸŽ‰ ALL TESTS PASSED! File handling fix is working correctly.")
            logger.info("πŸš€ The GAIA evaluation should now process file-based questions successfully.")
        else:
            logger.warning(f"\n⚠️ {total_tests - passed_tests} tests failed. File handling fix needs attention.")
        
        return passed_tests == total_tests
    
    def cleanup(self):
        """Clean up test files."""
        try:
            import shutil
            shutil.rmtree(self.temp_dir)
            logger.info(f"🧹 Cleaned up test directory: {self.temp_dir}")
        except Exception as e:
            logger.warning(f"⚠️ Could not clean up test directory: {e}")

def main():
    """Main test execution."""
    validator = GAIAFileHandlingFixValidator()
    
    try:
        success = validator.run_all_tests()
        return 0 if success else 1
    finally:
        validator.cleanup()

if __name__ == "__main__":
    exit_code = main()
    sys.exit(exit_code)