Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Excel File Processing Debug Test | |
Tests the specific "Could not resolve file path" issue for Excel files | |
""" | |
import os | |
import sys | |
import logging | |
import tempfile | |
import pandas as pd | |
from pathlib import Path | |
# Add the deployment-ready directory to Python path | |
sys.path.insert(0, '/workspaces/gaia-agent-python/deployment-ready') | |
from utils.file_handler import EnhancedFileHandler, FileType, FileFormat | |
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
def create_test_excel_file(): | |
"""Create a test Excel file with sales data similar to GAIA evaluation.""" | |
# Create sample sales data | |
data = { | |
'Item': ['Burger', 'Fries', 'Soda', 'Chicken Sandwich', 'Water', 'Salad', 'Coffee', 'Juice'], | |
'Category': ['Food', 'Food', 'Drink', 'Food', 'Drink', 'Food', 'Drink', 'Drink'], | |
'Sales': [1250.50, 875.25, 450.75, 980.00, 125.50, 675.25, 325.00, 275.25] | |
} | |
df = pd.DataFrame(data) | |
# Create temporary Excel file | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') | |
temp_path = temp_file.name | |
temp_file.close() | |
# Write to Excel | |
df.to_excel(temp_path, index=False) | |
logger.info(f"π Created test Excel file: {temp_path}") | |
logger.info(f"π Data preview:\n{df}") | |
return temp_path, df | |
def test_file_handler_excel_processing(): | |
"""Test the file handler's Excel processing capabilities.""" | |
logger.info("π§ͺ Testing File Handler Excel Processing...") | |
# Create test Excel file | |
excel_path, expected_data = create_test_excel_file() | |
try: | |
# Initialize file handler | |
file_handler = EnhancedFileHandler() | |
# Test 1: File path resolution | |
logger.info("π Test 1: File path resolution") | |
resolved_path = file_handler.resolve_file_path(excel_path) | |
if resolved_path: | |
logger.info(f"β File path resolved: {resolved_path}") | |
else: | |
logger.error(f"β Could not resolve file path: {excel_path}") | |
return False | |
# Test 2: File type detection | |
logger.info("π Test 2: File type detection") | |
file_type, file_format = file_handler.detect_file_type(excel_path) | |
logger.info(f"π Detected type: {file_type}, format: {file_format}") | |
if file_type != FileType.DATA or file_format != FileFormat.XLSX: | |
logger.error(f"β Incorrect file type detection. Expected: DATA/XLSX, Got: {file_type}/{file_format}") | |
return False | |
# Test 3: File validation | |
logger.info("π Test 3: File validation") | |
is_valid, error_msg = file_handler.validate_file(excel_path) | |
if is_valid: | |
logger.info("β File validation passed") | |
else: | |
logger.error(f"β File validation failed: {error_msg}") | |
return False | |
# Test 4: File processing | |
logger.info("π Test 4: File processing") | |
processed_file = file_handler.process_file_input(excel_path) | |
if processed_file.info.error: | |
logger.error(f"β File processing failed: {processed_file.info.error}") | |
return False | |
else: | |
logger.info("β File processing succeeded") | |
logger.info(f"π File info: {processed_file.info}") | |
return True | |
except Exception as e: | |
logger.error(f"β File handler test failed: {e}") | |
return False | |
finally: | |
# Cleanup | |
if os.path.exists(excel_path): | |
os.unlink(excel_path) | |
def test_excel_data_analysis(): | |
"""Test Excel data analysis using Python tools.""" | |
logger.info("π§ͺ Testing Excel Data Analysis...") | |
# Create test Excel file | |
excel_path, expected_data = create_test_excel_file() | |
try: | |
# Test pandas reading | |
logger.info("π Testing pandas Excel reading") | |
df = pd.read_excel(excel_path) | |
logger.info(f"π Successfully read Excel file with shape: {df.shape}") | |
logger.info(f"π Columns: {list(df.columns)}") | |
# Test food vs drink filtering | |
logger.info("π Testing food vs drink filtering") | |
food_sales = df[df['Category'] == 'Food']['Sales'].sum() | |
drink_sales = df[df['Category'] == 'Drink']['Sales'].sum() | |
total_sales = df['Sales'].sum() | |
logger.info(f"π Food sales: ${food_sales:.2f}") | |
logger.info(f"π₯€ Drink sales: ${drink_sales:.2f}") | |
logger.info(f"π° Total sales: ${total_sales:.2f}") | |
# Verify calculations | |
expected_food_sales = 1250.50 + 875.25 + 980.00 + 675.25 # 3781.00 | |
if abs(food_sales - expected_food_sales) < 0.01: | |
logger.info("β Food sales calculation correct") | |
else: | |
logger.error(f"β Food sales calculation incorrect. Expected: {expected_food_sales}, Got: {food_sales}") | |
return False | |
return True | |
except Exception as e: | |
logger.error(f"β Excel data analysis test failed: {e}") | |
return False | |
finally: | |
# Cleanup | |
if os.path.exists(excel_path): | |
os.unlink(excel_path) | |
def test_agent_excel_processing(): | |
"""Test the full agent Excel processing workflow.""" | |
logger.info("π§ͺ Testing Agent Excel Processing...") | |
# Create test Excel file | |
excel_path, expected_data = create_test_excel_file() | |
try: | |
# Initialize agent | |
logger.info("π€ Initializing GAIA Agent...") | |
agent = FixedGAIAAgent() | |
if not agent.available: | |
logger.error("β Agent not available - skipping agent test") | |
return False | |
# Test question similar to GAIA evaluation | |
question = "The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)?" | |
logger.info(f"β Question: {question}") | |
logger.info(f"π Attached file: {excel_path}") | |
# Process with agent | |
answer = agent(question, files=[excel_path]) | |
logger.info(f"π― Agent answer: '{answer}'") | |
# Expected answer is $3781.00 (sum of food items) | |
expected_answer = "3781.00" | |
# Check if answer contains the expected value | |
if expected_answer in answer or "3781" in answer: | |
logger.info("β Agent provided correct answer") | |
return True | |
else: | |
logger.error(f"β Agent answer incorrect. Expected: {expected_answer}, Got: {answer}") | |
return False | |
except Exception as e: | |
logger.error(f"β Agent Excel processing test failed: {e}") | |
return False | |
finally: | |
# Cleanup | |
if os.path.exists(excel_path): | |
os.unlink(excel_path) | |
def test_file_path_variations(): | |
"""Test various file path scenarios that might cause resolution issues.""" | |
logger.info("π§ͺ Testing File Path Variations...") | |
# Create test Excel file | |
excel_path, _ = create_test_excel_file() | |
try: | |
file_handler = EnhancedFileHandler() | |
# Test scenarios | |
test_cases = [ | |
("Absolute path", excel_path), | |
("Relative path", os.path.basename(excel_path)), | |
("Path with ./", f"./{os.path.basename(excel_path)}"), | |
("Non-existent file", "non_existent_file.xlsx"), | |
] | |
# Copy file to current directory for relative path tests | |
current_dir_path = os.path.join(os.getcwd(), os.path.basename(excel_path)) | |
import shutil | |
shutil.copy2(excel_path, current_dir_path) | |
results = {} | |
for test_name, test_path in test_cases: | |
logger.info(f"π Testing {test_name}: {test_path}") | |
resolved = file_handler.resolve_file_path(test_path) | |
results[test_name] = resolved is not None | |
if resolved: | |
logger.info(f"β {test_name} resolved to: {resolved}") | |
else: | |
logger.warning(f"β {test_name} could not be resolved") | |
# Cleanup | |
if os.path.exists(current_dir_path): | |
os.unlink(current_dir_path) | |
return results | |
except Exception as e: | |
logger.error(f"β File path variation test failed: {e}") | |
return {} | |
finally: | |
# Cleanup | |
if os.path.exists(excel_path): | |
os.unlink(excel_path) | |
def main(): | |
"""Run all Excel file processing debug tests.""" | |
logger.info("π Starting Excel File Processing Debug Tests") | |
# Check pandas availability | |
try: | |
import pandas as pd | |
logger.info(f"β Pandas available: {pd.__version__}") | |
except ImportError: | |
logger.error("β Pandas not available - Excel processing will fail") | |
return | |
# Check openpyxl availability (required for Excel) | |
try: | |
import openpyxl | |
logger.info(f"β OpenPyXL available: {openpyxl.__version__}") | |
except ImportError: | |
logger.error("β OpenPyXL not available - Excel processing will fail") | |
return | |
test_results = {} | |
# Run tests | |
test_results["File Handler Excel Processing"] = test_file_handler_excel_processing() | |
test_results["Excel Data Analysis"] = test_excel_data_analysis() | |
test_results["File Path Variations"] = test_file_path_variations() | |
test_results["Agent Excel Processing"] = test_agent_excel_processing() | |
# Summary | |
logger.info("π Test Results Summary:") | |
for test_name, result in test_results.items(): | |
if isinstance(result, bool): | |
status = "β PASS" if result else "β FAIL" | |
logger.info(f" {test_name}: {status}") | |
elif isinstance(result, dict): | |
logger.info(f" {test_name}:") | |
for sub_test, sub_result in result.items(): | |
status = "β PASS" if sub_result else "β FAIL" | |
logger.info(f" {sub_test}: {status}") | |
# Overall result | |
all_passed = all( | |
result if isinstance(result, bool) else all(result.values()) | |
for result in test_results.values() | |
) | |
if all_passed: | |
logger.info("π All tests passed! Excel file processing is working correctly.") | |
else: | |
logger.error("π₯ Some tests failed. Excel file processing needs fixes.") | |
if __name__ == "__main__": | |
main() |