Spaces:

JoachimVC
/

gaia-enhanced-agent

Running

gaia-enhanced-agent / test_gaia_file_handling_fix.py

GAIA Agent Deployment

Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements

9a6a4dc about 2 months ago

14.1 kB

	#!/usr/bin/env python3
	"""
	GAIA File Handling Fix Validation Test

	This test validates that the file handling fix correctly:
	1. Extracts file_name from GAIA evaluation API responses
	2. Passes files to the agent's __call__ method
	3. Agent processes files correctly with enhanced search paths
	4. Resolves the "Error file not found" issues

	Expected Result: All file-based questions should now process successfully
	"""

	import os
	import sys
	import tempfile
	import json
	import logging
	import traceback
	from pathlib import Path

	# Add deployment-ready to path
	sys.path.insert(0, '/workspaces/gaia-agent-python/deployment-ready')

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	class GAIAFileHandlingFixValidator:
	"""Validates the GAIA file handling fix."""

	def __init__(self):
	"""Initialize the validator."""
	self.temp_dir = tempfile.mkdtemp(prefix="gaia_fix_test_")
	self.test_files = {}
	logger.info(f"🧪 Test directory: {self.temp_dir}")

	def setup_test_files(self):
	"""Create test files that simulate GAIA evaluation files."""
	logger.info("📁 Setting up test files...")

	# 1. Excel file (simulating GAIA Excel question)
	excel_data = """Item,Category,Sales,Price
	Burger,Food,150,8.99
	Fries,Food,200,3.49
	Soda,Beverage,180,2.99
	Salad,Food,75,6.99
	Coffee,Beverage,120,4.49"""

	excel_file = os.path.join(self.temp_dir, "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx")
	with open(excel_file, 'w') as f:
	f.write(excel_data)
	self.test_files['excel'] = excel_file
	logger.info(f"📊 Created Excel test file: {excel_file}")

	# 2. Python code file (simulating GAIA Python question)
	python_code = """#!/usr/bin/env python3
	# Test Python code for GAIA evaluation
	import math

	def calculate_result():
	x = 15
	y = 8
	result = x * y + math.sqrt(64)
	return result

	if __name__ == "__main__":
	final_result = calculate_result()
	print(f"Final result: {final_result}")
	"""

	python_file = os.path.join(self.temp_dir, "f918266a-b3e0-4914-865d-4faa564f1aef.py")
	with open(python_file, 'w') as f:
	f.write(python_code)
	self.test_files['python'] = python_file
	logger.info(f"🐍 Created Python test file: {python_file}")

	# 3. PNG image file (simulating GAIA image question)
	# Create a simple text file with PNG extension for testing
	image_content = "PNG_IMAGE_PLACEHOLDER_FOR_TESTING"
	image_file = os.path.join(self.temp_dir, "cca530fc-4052-43b2-b130-b30968d8aa44.png")
	with open(image_file, 'w') as f:
	f.write(image_content)
	self.test_files['image'] = image_file
	logger.info(f"🖼️ Created PNG test file: {image_file}")

	return True

	def test_app_file_extraction(self):
	"""Test that app.py correctly extracts file_name from question data."""
	logger.info("🔍 Testing app.py file extraction logic...")

	# Simulate GAIA question data structure
	test_question_data = {
	"task_id": "test-task-123",
	"question": "What is the total sales in the attached Excel file?",
	"file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
	"Level": 1
	}

	# Test the file extraction logic
	file_name = test_question_data.get("file_name", "")
	files = None
	if file_name and file_name.strip():
	files = [file_name.strip()]

	assert files is not None, "File extraction failed"
	assert len(files) == 1, "Should extract exactly one file"
	assert files[0] == "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx", "File name mismatch"

	logger.info("✅ App.py file extraction logic works correctly")
	return True

	def test_agent_file_processing(self):
	"""Test that the agent can process files with enhanced search paths."""
	logger.info("🤖 Testing agent file processing...")

	try:
	# Import the fixed agent
	from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent

	# Create agent instance
	agent = FixedGAIAAgent()
	logger.info("✅ Agent imported and initialized successfully")

	# Test 1: Process Excel file
	question = "What is the total sales amount in the attached Excel file?"
	excel_filename = os.path.basename(self.test_files['excel'])

	# Copy file to deployment-ready directory for testing
	import shutil
	target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}"
	shutil.copy2(self.test_files['excel'], target_path)

	try:
	response = agent(question, files=[excel_filename])
	logger.info(f"📊 Excel file processing response: {response[:100]}...")

	# Check if response indicates successful file processing
	if "error" not in response.lower() and "file not found" not in response.lower():
	logger.info("✅ Excel file processed successfully")
	else:
	logger.warning(f"⚠️ Excel file processing may have issues: {response}")

	except Exception as e:
	logger.error(f"❌ Excel file processing failed: {e}")
	return False
	finally:
	# Cleanup
	if os.path.exists(target_path):
	os.remove(target_path)

	# Test 2: Process Python file
	question = "What is the final numeric output from the attached Python code?"
	python_filename = os.path.basename(self.test_files['python'])

	target_path = f"/workspaces/gaia-agent-python/deployment-ready/{python_filename}"
	shutil.copy2(self.test_files['python'], target_path)

	try:
	response = agent(question, files=[python_filename])
	logger.info(f"🐍 Python file processing response: {response[:100]}...")

	if "error" not in response.lower() and "file not found" not in response.lower():
	logger.info("✅ Python file processed successfully")
	else:
	logger.warning(f"⚠️ Python file processing may have issues: {response}")

	except Exception as e:
	logger.error(f"❌ Python file processing failed: {e}")
	return False
	finally:
	# Cleanup
	if os.path.exists(target_path):
	os.remove(target_path)

	return True

	except ImportError as e:
	logger.error(f"❌ Could not import agent: {e}")
	return False
	except Exception as e:
	logger.error(f"❌ Agent file processing test failed: {e}")
	traceback.print_exc()
	return False

	def test_enhanced_search_paths(self):
	"""Test that enhanced search paths work correctly."""
	logger.info("🔍 Testing enhanced search paths...")

	try:
	from utils.file_handler import EnhancedFileHandler

	# Create file handler
	handler = EnhancedFileHandler()

	# Check that GAIA-specific paths are included
	expected_paths = [
	"/workspaces/gaia-agent-python/deployment-ready",
	"/app",
	"/data"
	]

	for expected_path in expected_paths:
	if expected_path in handler.base_paths:
	logger.info(f"✅ Found expected path: {expected_path}")
	else:
	logger.warning(f"⚠️ Missing expected path: {expected_path}")

	logger.info(f"📁 Total search paths: {len(handler.base_paths)}")
	logger.info("✅ Enhanced search paths configured correctly")
	return True

	except Exception as e:
	logger.error(f"❌ Enhanced search paths test failed: {e}")
	return False

	def test_end_to_end_simulation(self):
	"""Test end-to-end simulation of GAIA evaluation with files."""
	logger.info("🎯 Testing end-to-end GAIA evaluation simulation...")

	try:
	# Simulate the app.py workflow
	from app import DeploymentReadyGAIAAgent

	# Create agent
	agent = DeploymentReadyGAIAAgent()

	# Simulate GAIA question data with file
	question_data = {
	"task_id": "test-excel-task",
	"question": "What is the total sales amount in the attached Excel file?",
	"file_name": "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
	"Level": 1
	}

	# Extract data (simulating app.py logic)
	task_id = question_data.get("task_id", "")
	question_text = question_data.get("question", "")
	file_name = question_data.get("file_name", "")

	# Prepare files list
	files = None
	if file_name and file_name.strip():
	files = [file_name.strip()]

	# Copy test file to a location where it can be found
	import shutil
	excel_filename = os.path.basename(self.test_files['excel'])
	target_path = f"/workspaces/gaia-agent-python/deployment-ready/{excel_filename}"
	shutil.copy2(self.test_files['excel'], target_path)

	try:
	# Call agent (simulating app.py workflow)
	if files:
	submitted_answer = agent(question_text, files)
	else:
	submitted_answer = agent(question_text)

	logger.info(f"🎯 End-to-end test response: {submitted_answer[:100]}...")

	# Check for success indicators
	if "error" not in submitted_answer.lower() and "file not found" not in submitted_answer.lower():
	logger.info("✅ End-to-end simulation successful")
	return True
	else:
	logger.warning(f"⚠️ End-to-end simulation may have issues: {submitted_answer}")
	return False

	finally:
	# Cleanup
	if os.path.exists(target_path):
	os.remove(target_path)

	except Exception as e:
	logger.error(f"❌ End-to-end simulation failed: {e}")
	traceback.print_exc()
	return False

	def run_all_tests(self):
	"""Run all validation tests."""
	logger.info("🚀 Starting GAIA File Handling Fix Validation...")

	tests = [
	("Setup Test Files", self.setup_test_files),
	("App File Extraction", self.test_app_file_extraction),
	("Enhanced Search Paths", self.test_enhanced_search_paths),
	("Agent File Processing", self.test_agent_file_processing),
	("End-to-End Simulation", self.test_end_to_end_simulation),
	]

	results = {}
	total_tests = len(tests)
	passed_tests = 0

	for test_name, test_func in tests:
	logger.info(f"\n{'='*50}")
	logger.info(f"🧪 Running: {test_name}")
	logger.info(f"{'='*50}")

	try:
	result = test_func()
	results[test_name] = result
	if result:
	passed_tests += 1
	logger.info(f"✅ {test_name}: PASSED")
	else:
	logger.error(f"❌ {test_name}: FAILED")
	except Exception as e:
	logger.error(f"❌ {test_name}: FAILED with exception: {e}")
	results[test_name] = False

	# Summary
	logger.info(f"\n{'='*60}")
	logger.info("📊 GAIA FILE HANDLING FIX VALIDATION SUMMARY")
	logger.info(f"{'='*60}")
	logger.info(f"Total Tests: {total_tests}")
	logger.info(f"Passed: {passed_tests}")
	logger.info(f"Failed: {total_tests - passed_tests}")
	logger.info(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")

	for test_name, result in results.items():
	status = "✅ PASSED" if result else "❌ FAILED"
	logger.info(f" {test_name}: {status}")

	if passed_tests == total_tests:
	logger.info("\n🎉 ALL TESTS PASSED! File handling fix is working correctly.")
	logger.info("🚀 The GAIA evaluation should now process file-based questions successfully.")
	else:
	logger.warning(f"\n⚠️ {total_tests - passed_tests} tests failed. File handling fix needs attention.")

	return passed_tests == total_tests

	def cleanup(self):
	"""Clean up test files."""
	try:
	import shutil
	shutil.rmtree(self.temp_dir)
	logger.info(f"🧹 Cleaned up test directory: {self.temp_dir}")
	except Exception as e:
	logger.warning(f"⚠️ Could not clean up test directory: {e}")

	def main():
	"""Main test execution."""
	validator = GAIAFileHandlingFixValidator()

	try:
	success = validator.run_all_tests()
	return 0 if success else 1
	finally:
	validator.cleanup()

	if __name__ == "__main__":
	exit_code = main()
	sys.exit(exit_code)