Spaces:
Running
Running
File size: 14,059 Bytes
9a6a4dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 |
#!/usr/bin/env python3
"""
GAIA File Handling Fix Validation Test
This test validates that the file handling fix correctly:
1. Extracts file_name from GAIA evaluation API responses
2. Passes files to the agent's __call__ method
3. Agent processes files correctly with enhanced search paths
4. Resolves the "Error file not found" issues
Expected Result: All file-based questions should now process successfully
"""
import os
import sys
import tempfile
import json
import logging
import traceback
from pathlib import Path
# Add deployment-ready to path
sys.path.insert(0, '/workspaces/gaia-agent-python/deployment-ready')
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class GAIAFileHandlingFixValidator:
"""Validates the GAIA file handling fix."""
def __init__(self):
"""Initialize the validator."""
self.temp_dir = tempfile.mkdtemp(prefix="gaia_fix_test_")
self.test_files = {}
logger.info(f"π§ͺ Test directory: {self.temp_dir}")
def setup_test_files(self):
"""Create test files that simulate GAIA evaluation files."""
logger.info("π Setting up test files...")
# 1. Excel file (simulating GAIA Excel question)
excel_data = """Item,Category,Sales,Price
Burger,Food,150,8.99
Fries,Food,200,3.49
Soda,Beverage,180,2.99
Salad,Food,75,6.99
Coffee,Beverage,120,4.49"""
excel_file = os.path.join(self.temp_dir, "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx")
with open(excel_file, 'w') as f:
f.write(excel_data)
self.test_files['excel'] = excel_file
logger.info(f"π Created Excel test file: {excel_file}")
# 2. Python code file (simulating GAIA Python question)
python_code = """#!/usr/bin/env python3
# Test Python code for GAIA evaluation
import math
def calculate_result():
x = 15
y = 8
result = x * y + math.sqrt(64)
return result
if __name__ == "__main__":
final_result = calculate_result()
print(f"Final result: {final_result}")
"""
python_file = os.path.join(self.temp_dir, "f918266a-b3e0-4914-865d-4faa564f1aef.py")
with open(python_file, 'w') as f:
f.write(python_code)
self.test_files['python'] = python_file
logger.info(f"π Created Python test file: {python_file}")
# 3. PNG image file (simulating GAIA image question)
# Create a simple text file with PNG extension for testing
image_content = "PNG_IMAGE_PLACEHOLDER_FOR_TESTING"
image_file = os.path.join(self.temp_dir, "cca530fc-4052-43b2-b130-b30968d8aa44.png")
with open(image_file, 'w') as f:
f.write(image_content)
self.test_files['image'] = image_file
logger.info(f"πΌοΈ Created PNG test file: {image_file}")
return True
def test_app_file_extraction(self):
"""Test that app.py correctly extracts file_name from question data."""
logger.info("π Testing app.py file extraction logic...")
# Simulate GAIA question data structure
test_question_data = {
"task_id": "test-task-123",
"question": "What is the total sales in the attached Excel file?",
"file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
"Level": 1
}
# Test the file extraction logic
file_name = test_question_data.get("file_name", "")
files = None
if file_name and file_name.strip():
files = [file_name.strip()]
assert files is not None, "File extraction failed"
assert len(files) == 1, "Should extract exactly one file"
assert files[0] == "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx", "File name mismatch"
logger.info("β
App.py file extraction logic works correctly")
return True
def test_agent_file_processing(self):
"""Test that the agent can process files with enhanced search paths."""
logger.info("π€ Testing agent file processing...")
try:
# Import the fixed agent
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
# Create agent instance
agent = FixedGAIAAgent()
logger.info("β
Agent imported and initialized successfully")
# Test 1: Process Excel file
question = "What is the total sales amount in the attached Excel file?"
excel_filename = os.path.basename(self.test_files['excel'])
# Copy file to deployment-ready directory for testing
import shutil
target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}"
shutil.copy2(self.test_files['excel'], target_path)
try:
response = agent(question, files=[excel_filename])
logger.info(f"π Excel file processing response: {response[:100]}...")
# Check if response indicates successful file processing
if "error" not in response.lower() and "file not found" not in response.lower():
logger.info("β
Excel file processed successfully")
else:
logger.warning(f"β οΈ Excel file processing may have issues: {response}")
except Exception as e:
logger.error(f"β Excel file processing failed: {e}")
return False
finally:
# Cleanup
if os.path.exists(target_path):
os.remove(target_path)
# Test 2: Process Python file
question = "What is the final numeric output from the attached Python code?"
python_filename = os.path.basename(self.test_files['python'])
target_path = f"/workspaces/gaia-agent-python/deployment-ready/{python_filename}"
shutil.copy2(self.test_files['python'], target_path)
try:
response = agent(question, files=[python_filename])
logger.info(f"π Python file processing response: {response[:100]}...")
if "error" not in response.lower() and "file not found" not in response.lower():
logger.info("β
Python file processed successfully")
else:
logger.warning(f"β οΈ Python file processing may have issues: {response}")
except Exception as e:
logger.error(f"β Python file processing failed: {e}")
return False
finally:
# Cleanup
if os.path.exists(target_path):
os.remove(target_path)
return True
except ImportError as e:
logger.error(f"β Could not import agent: {e}")
return False
except Exception as e:
logger.error(f"β Agent file processing test failed: {e}")
traceback.print_exc()
return False
def test_enhanced_search_paths(self):
"""Test that enhanced search paths work correctly."""
logger.info("π Testing enhanced search paths...")
try:
from utils.file_handler import EnhancedFileHandler
# Create file handler
handler = EnhancedFileHandler()
# Check that GAIA-specific paths are included
expected_paths = [
"/workspaces/gaia-agent-python/deployment-ready",
"/app",
"/data"
]
for expected_path in expected_paths:
if expected_path in handler.base_paths:
logger.info(f"β
Found expected path: {expected_path}")
else:
logger.warning(f"β οΈ Missing expected path: {expected_path}")
logger.info(f"π Total search paths: {len(handler.base_paths)}")
logger.info("β
Enhanced search paths configured correctly")
return True
except Exception as e:
logger.error(f"β Enhanced search paths test failed: {e}")
return False
def test_end_to_end_simulation(self):
"""Test end-to-end simulation of GAIA evaluation with files."""
logger.info("π― Testing end-to-end GAIA evaluation simulation...")
try:
# Simulate the app.py workflow
from app import DeploymentReadyGAIAAgent
# Create agent
agent = DeploymentReadyGAIAAgent()
# Simulate GAIA question data with file
question_data = {
"task_id": "test-excel-task",
"question": "What is the total sales amount in the attached Excel file?",
"file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
"Level": 1
}
# Extract data (simulating app.py logic)
task_id = question_data.get("task_id", "")
question_text = question_data.get("question", "")
file_name = question_data.get("file_name", "")
# Prepare files list
files = None
if file_name and file_name.strip():
files = [file_name.strip()]
# Copy test file to a location where it can be found
import shutil
excel_filename = os.path.basename(self.test_files['excel'])
target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}"
shutil.copy2(self.test_files['excel'], target_path)
try:
# Call agent (simulating app.py workflow)
if files:
submitted_answer = agent(question_text, files)
else:
submitted_answer = agent(question_text)
logger.info(f"π― End-to-end test response: {submitted_answer[:100]}...")
# Check for success indicators
if "error" not in submitted_answer.lower() and "file not found" not in submitted_answer.lower():
logger.info("β
End-to-end simulation successful")
return True
else:
logger.warning(f"β οΈ End-to-end simulation may have issues: {submitted_answer}")
return False
finally:
# Cleanup
if os.path.exists(target_path):
os.remove(target_path)
except Exception as e:
logger.error(f"β End-to-end simulation failed: {e}")
traceback.print_exc()
return False
def run_all_tests(self):
"""Run all validation tests."""
logger.info("π Starting GAIA File Handling Fix Validation...")
tests = [
("Setup Test Files", self.setup_test_files),
("App File Extraction", self.test_app_file_extraction),
("Enhanced Search Paths", self.test_enhanced_search_paths),
("Agent File Processing", self.test_agent_file_processing),
("End-to-End Simulation", self.test_end_to_end_simulation),
]
results = {}
total_tests = len(tests)
passed_tests = 0
for test_name, test_func in tests:
logger.info(f"\n{'='*50}")
logger.info(f"π§ͺ Running: {test_name}")
logger.info(f"{'='*50}")
try:
result = test_func()
results[test_name] = result
if result:
passed_tests += 1
logger.info(f"β
{test_name}: PASSED")
else:
logger.error(f"β {test_name}: FAILED")
except Exception as e:
logger.error(f"β {test_name}: FAILED with exception: {e}")
results[test_name] = False
# Summary
logger.info(f"\n{'='*60}")
logger.info("π GAIA FILE HANDLING FIX VALIDATION SUMMARY")
logger.info(f"{'='*60}")
logger.info(f"Total Tests: {total_tests}")
logger.info(f"Passed: {passed_tests}")
logger.info(f"Failed: {total_tests - passed_tests}")
logger.info(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
for test_name, result in results.items():
status = "β
PASSED" if result else "β FAILED"
logger.info(f" {test_name}: {status}")
if passed_tests == total_tests:
logger.info("\nπ ALL TESTS PASSED! File handling fix is working correctly.")
logger.info("π The GAIA evaluation should now process file-based questions successfully.")
else:
logger.warning(f"\nβ οΈ {total_tests - passed_tests} tests failed. File handling fix needs attention.")
return passed_tests == total_tests
def cleanup(self):
"""Clean up test files."""
try:
import shutil
shutil.rmtree(self.temp_dir)
logger.info(f"π§Ή Cleaned up test directory: {self.temp_dir}")
except Exception as e:
logger.warning(f"β οΈ Could not clean up test directory: {e}")
def main():
"""Main test execution."""
validator = GAIAFileHandlingFixValidator()
try:
success = validator.run_all_tests()
return 0 if success else 1
finally:
validator.cleanup()
if __name__ == "__main__":
exit_code = main()
sys.exit(exit_code) |