Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
GAIA Agent Tool Integration Debugging Script | |
Phase 2: Tool Integration Validation | |
This script systematically tests and debugs each of the 11 tools in the GAIA Agent | |
to identify and resolve the issues causing evaluation failures. | |
Critical Issues to Debug: | |
1. Image Processing Failures: "I'm sorry, I am unable to process the image at the moment" | |
2. File Handling Issues: Missing file references and incorrect file path handling | |
3. Tool Selection Logic: Inappropriate tool selection for specific question types | |
4. API Integration: Ensure all API keys and endpoints are working correctly | |
""" | |
import os | |
import sys | |
import logging | |
import traceback | |
import tempfile | |
import json | |
from pathlib import Path | |
from typing import Dict, Any, List, Optional | |
import requests | |
from PIL import Image | |
import io | |
# Add deployment-ready to path | |
sys.path.insert(0, str(Path(__file__).parent)) | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
class GAIAToolDebugger: | |
"""Comprehensive debugger for GAIA Agent tools.""" | |
def __init__(self): | |
"""Initialize the debugger.""" | |
logger.info("π Initializing GAIA Tool Debugger...") | |
# Load environment variables | |
self._load_env_file() | |
# Initialize test results | |
self.test_results = { | |
'tool_initialization': {}, | |
'tool_functionality': {}, | |
'api_integrations': {}, | |
'multimodal_capabilities': {}, | |
'error_handling': {}, | |
'overall_status': 'UNKNOWN' | |
} | |
# Test data | |
self.test_data = self._prepare_test_data() | |
logger.info("β GAIA Tool Debugger initialized") | |
def _load_env_file(self): | |
"""Load environment variables from .env file.""" | |
env_file = Path('.env') | |
if env_file.exists(): | |
with open(env_file, 'r') as f: | |
for line in f: | |
line = line.strip() | |
if line and not line.startswith('#') and '=' in line: | |
key, value = line.split('=', 1) | |
os.environ[key.strip()] = value.strip() | |
logger.info("β Environment variables loaded from .env") | |
else: | |
logger.warning("β οΈ No .env file found") | |
def _prepare_test_data(self) -> Dict[str, Any]: | |
"""Prepare test data for debugging.""" | |
# Create a simple test image | |
test_image = Image.new('RGB', (100, 100), color='red') | |
test_image_path = tempfile.mktemp(suffix='.png') | |
test_image.save(test_image_path) | |
# Create test audio file (placeholder) | |
test_audio_path = tempfile.mktemp(suffix='.wav') | |
with open(test_audio_path, 'wb') as f: | |
f.write(b'RIFF\x24\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x44\xac\x00\x00\x88X\x01\x00\x02\x00\x10\x00data\x00\x00\x00\x00') | |
# Create test document | |
test_document = "This is a test document. It contains information about testing. The answer to the test question is 42." | |
return { | |
'image_path': test_image_path, | |
'audio_path': test_audio_path, | |
'document_text': test_document, | |
'test_questions': { | |
'math': "What is 25 * 17?", | |
'python': "Calculate the sum of numbers from 1 to 10", | |
'wikipedia': "What is the capital of France?", | |
'arxiv': "Find papers about machine learning", | |
'web_search': "What is the current weather?", | |
'file_operation': "List files in current directory", | |
'shell_command': "echo 'Hello World'", | |
'image_analysis': "What do you see in this image?", | |
'audio_transcription': "Transcribe this audio", | |
'document_qa': "What is the answer mentioned in the document?" | |
} | |
} | |
def debug_tool_initialization(self) -> Dict[str, Any]: | |
"""Debug tool initialization process.""" | |
logger.info("π§ Debugging tool initialization...") | |
results = {} | |
# Test core AGNO tools | |
core_tools = [ | |
('calculator', 'agno.tools.calculator', 'CalculatorTools'), | |
('python', 'agno.tools.python', 'PythonTools'), | |
('wikipedia', 'agno.tools.wikipedia', 'WikipediaTools'), | |
('arxiv', 'agno.tools.arxiv', 'ArxivTools'), | |
('file', 'agno.tools.file', 'FileTools'), | |
('shell', 'agno.tools.shell', 'ShellTools'), | |
] | |
# Test API-dependent tools | |
api_tools = [ | |
('firecrawl', 'agno.tools.firecrawl', 'FirecrawlTools', 'FIRECRAWL_API_KEY'), | |
('exa', 'agno.tools.exa', 'ExaTools', 'EXA_API_KEY'), | |
] | |
# Test core tools | |
for tool_name, module_path, class_name in core_tools: | |
results[tool_name] = self._test_tool_initialization( | |
tool_name, module_path, class_name | |
) | |
# Test API tools | |
for tool_name, module_path, class_name, api_key in api_tools: | |
results[tool_name] = self._test_tool_initialization( | |
tool_name, module_path, class_name, api_key | |
) | |
# Test multimodal tools | |
results['multimodal'] = self._test_multimodal_initialization() | |
self.test_results['tool_initialization'] = results | |
return results | |
def _test_tool_initialization(self, tool_name: str, module_path: str, | |
class_name: str, required_api_key: str = None) -> Dict[str, Any]: | |
"""Test individual tool initialization.""" | |
result = { | |
'status': 'UNKNOWN', | |
'error': None, | |
'api_key_present': None, | |
'instance_created': False | |
} | |
try: | |
# Check API key if required | |
if required_api_key: | |
api_key_value = os.getenv(required_api_key) | |
result['api_key_present'] = bool(api_key_value) | |
if not api_key_value: | |
result['status'] = 'MISSING_API_KEY' | |
result['error'] = f"Missing {required_api_key}" | |
return result | |
# Try to import and instantiate | |
module = __import__(module_path, fromlist=[class_name]) | |
tool_class = getattr(module, class_name) | |
# Initialize with appropriate parameters | |
if tool_name == 'exa': | |
tool_instance = tool_class(api_key=os.getenv('EXA_API_KEY')) | |
elif tool_name == 'firecrawl': | |
tool_instance = tool_class(api_key=os.getenv('FIRECRAWL_API_KEY')) | |
else: | |
tool_instance = tool_class() | |
result['instance_created'] = True | |
result['status'] = 'SUCCESS' | |
logger.info(f"β {tool_name} initialized successfully") | |
except ImportError as e: | |
result['status'] = 'IMPORT_ERROR' | |
result['error'] = str(e) | |
logger.error(f"β {tool_name} import failed: {e}") | |
except Exception as e: | |
result['status'] = 'INITIALIZATION_ERROR' | |
result['error'] = str(e) | |
logger.error(f"β {tool_name} initialization failed: {e}") | |
return result | |
def _test_multimodal_initialization(self) -> Dict[str, Any]: | |
"""Test multimodal tools initialization.""" | |
result = { | |
'status': 'UNKNOWN', | |
'error': None, | |
'mistral_available': False, | |
'transformers_available': False, | |
'whisper_available': False, | |
'capabilities': {} | |
} | |
try: | |
# Test Mistral availability | |
try: | |
from mistralai.client import MistralClient | |
result['mistral_available'] = True | |
logger.info("β Mistral client available") | |
except ImportError: | |
try: | |
from mistralai import Mistral as MistralClient | |
result['mistral_available'] = True | |
logger.info("β Mistral client available (alternative import)") | |
except ImportError: | |
logger.warning("β οΈ Mistral client not available") | |
# Test transformers availability | |
try: | |
from transformers import pipeline | |
result['transformers_available'] = True | |
logger.info("β Transformers available") | |
except ImportError: | |
logger.warning("β οΈ Transformers not available") | |
# Test Faster-Whisper availability | |
try: | |
import faster_whisper | |
result['whisper_available'] = True | |
logger.info("β Faster-Whisper available") | |
except ImportError: | |
logger.warning("β οΈ Faster-Whisper not available") | |
# Try to initialize multimodal tools | |
from agents.mistral_multimodal_agent import OpenSourceMultimodalTools | |
multimodal_tools = OpenSourceMultimodalTools() | |
result['capabilities'] = multimodal_tools.get_capabilities_status() | |
result['status'] = 'SUCCESS' | |
logger.info("β Multimodal tools initialized") | |
except Exception as e: | |
result['status'] = 'ERROR' | |
result['error'] = str(e) | |
logger.error(f"β Multimodal tools initialization failed: {e}") | |
return result | |
def debug_tool_functionality(self) -> Dict[str, Any]: | |
"""Debug individual tool functionality.""" | |
logger.info("π§ͺ Debugging tool functionality...") | |
results = {} | |
# Test each tool with appropriate test cases | |
test_cases = [ | |
('calculator', self._test_calculator), | |
('python', self._test_python), | |
('wikipedia', self._test_wikipedia), | |
('arxiv', self._test_arxiv), | |
('firecrawl', self._test_firecrawl), | |
('exa', self._test_exa), | |
('file', self._test_file), | |
('shell', self._test_shell), | |
('image_analysis', self._test_image_analysis), | |
('audio_transcription', self._test_audio_transcription), | |
('document_analysis', self._test_document_analysis), | |
] | |
for tool_name, test_func in test_cases: | |
try: | |
results[tool_name] = test_func() | |
except Exception as e: | |
results[tool_name] = { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
logger.error(f"β {tool_name} test failed: {e}") | |
self.test_results['tool_functionality'] = results | |
return results | |
def _test_calculator(self) -> Dict[str, Any]: | |
"""Test calculator tool.""" | |
try: | |
from agno.tools.calculator import CalculatorTools | |
calc = CalculatorTools() | |
# Test basic calculation using correct AGNO method | |
result = calc.multiply(25, 17) | |
expected = 425 | |
# Extract result from JSON response if needed | |
actual_result = result | |
if isinstance(result, dict) and 'result' in result: | |
actual_result = result['result'] | |
elif isinstance(result, str) and 'result' in result: | |
import json | |
try: | |
parsed = json.loads(result) | |
actual_result = parsed.get('result', result) | |
except: | |
actual_result = result | |
return { | |
'status': 'SUCCESS' if actual_result == expected else 'FAILED', | |
'test_input': "multiply(25, 17)", | |
'expected': expected, | |
'actual': actual_result, | |
'raw_result': result, | |
'error': None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def _test_python(self) -> Dict[str, Any]: | |
"""Test Python tool.""" | |
try: | |
from agno.tools.python import PythonTools | |
python_tool = PythonTools() | |
# Test simple Python execution | |
code = "result = sum(range(1, 11))\nprint(result)" | |
result = python_tool.run_python_code(code) | |
# Extract actual output from result | |
if isinstance(result, dict): | |
# If result is a dict, look for output or stdout keys | |
actual_output = result.get('output', result.get('stdout', str(result))) | |
elif isinstance(result, str): | |
if "successfully" in result.lower() and "55" not in result: | |
# If it's just a success message, indicate that execution worked | |
actual_output = "Python execution completed successfully (output may be captured elsewhere)" | |
else: | |
actual_output = result | |
else: | |
actual_output = str(result) | |
return { | |
'status': 'SUCCESS' if '55' in str(result) or '55' in str(actual_output) else 'FAILED', | |
'test_input': code, | |
'expected': "55", | |
'actual': actual_output, | |
'raw_result': result, | |
'error': None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def _test_wikipedia(self) -> Dict[str, Any]: | |
"""Test Wikipedia tool.""" | |
try: | |
from agno.tools.wikipedia import WikipediaTools | |
wiki = WikipediaTools() | |
# Test Wikipedia search | |
result = wiki.search_wikipedia("Paris France capital") | |
return { | |
'status': 'SUCCESS' if 'Paris' in str(result) else 'FAILED', | |
'test_input': "Paris France capital", | |
'actual': str(result)[:200] + "..." if len(str(result)) > 200 else str(result), | |
'error': None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def _test_arxiv(self) -> Dict[str, Any]: | |
"""Test ArXiv tool.""" | |
try: | |
from agno.tools.arxiv import ArxivTools | |
arxiv = ArxivTools() | |
# Test ArXiv search using correct AGNO method (without max_results parameter) | |
result = arxiv.search_arxiv_and_return_articles("machine learning") | |
return { | |
'status': 'SUCCESS' if result and len(str(result)) > 10 else 'FAILED', | |
'test_input': "machine learning", | |
'actual': str(result)[:200] + "..." if len(str(result)) > 200 else str(result), | |
'error': None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def _test_firecrawl(self) -> Dict[str, Any]: | |
"""Test Firecrawl tool.""" | |
api_key = os.getenv('FIRECRAWL_API_KEY') | |
if not api_key: | |
return { | |
'status': 'SKIPPED', | |
'error': 'FIRECRAWL_API_KEY not found' | |
} | |
try: | |
from agno.tools.firecrawl import FirecrawlTools | |
firecrawl = FirecrawlTools(api_key=api_key) | |
# Test simple web scraping using correct AGNO method | |
result = firecrawl.scrape_website("https://httpbin.org/json") | |
return { | |
'status': 'SUCCESS' if result and len(str(result)) > 10 else 'FAILED', | |
'test_input': "https://httpbin.org/json", | |
'actual': str(result)[:200] + "..." if len(str(result)) > 200 else str(result), | |
'error': None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def _test_exa(self) -> Dict[str, Any]: | |
"""Test Exa tool.""" | |
api_key = os.getenv('EXA_API_KEY') | |
if not api_key: | |
return { | |
'status': 'SKIPPED', | |
'error': 'EXA_API_KEY not found' | |
} | |
try: | |
from agno.tools.exa import ExaTools | |
exa = ExaTools(api_key=api_key) | |
# Test search using correct AGNO method | |
result = exa.search_exa("Python programming", num_results=1) | |
return { | |
'status': 'SUCCESS' if result and len(str(result)) > 10 else 'FAILED', | |
'test_input': "Python programming", | |
'actual': str(result)[:200] + "..." if len(str(result)) > 200 else str(result), | |
'error': None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def _test_file(self) -> Dict[str, Any]: | |
"""Test File tool.""" | |
try: | |
from agno.tools.file import FileTools | |
file_tool = FileTools() | |
# Test file listing (without parameters - check if method accepts no args) | |
result = file_tool.list_files() | |
return { | |
'status': 'SUCCESS' if result and len(str(result)) > 10 else 'FAILED', | |
'test_input': "current directory", | |
'actual': str(result)[:200] + "..." if len(str(result)) > 200 else str(result), | |
'error': None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def _test_shell(self) -> Dict[str, Any]: | |
"""Test Shell tool.""" | |
try: | |
from agno.tools.shell import ShellTools | |
shell = ShellTools() | |
# Test simple command | |
result = shell.run_shell_command("echo 'Hello World'") | |
return { | |
'status': 'SUCCESS' if 'Hello World' in str(result) else 'FAILED', | |
'test_input': "echo 'Hello World'", | |
'expected': "Hello World", | |
'actual': result, | |
'error': None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def _test_image_analysis(self) -> Dict[str, Any]: | |
"""Test image analysis capability.""" | |
try: | |
from agents.mistral_multimodal_agent import OpenSourceMultimodalTools | |
multimodal = OpenSourceMultimodalTools() | |
# Test with our test image | |
result = multimodal.analyze_image( | |
self.test_data['image_path'], | |
"What color is this image?" | |
) | |
# Check if we get a proper response (not an error message) | |
is_error = any(error_word in result.lower() for error_word in [ | |
'unable', 'cannot', 'error', 'failed', 'sorry' | |
]) | |
return { | |
'status': 'FAILED' if is_error else 'SUCCESS', | |
'test_input': "Red color image analysis", | |
'actual': result, | |
'error': result if is_error else None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def _test_audio_transcription(self) -> Dict[str, Any]: | |
"""Test audio transcription capability.""" | |
try: | |
from agents.mistral_multimodal_agent import OpenSourceMultimodalTools | |
multimodal = OpenSourceMultimodalTools() | |
# Test with our test audio file | |
result = multimodal.transcribe_audio(self.test_data['audio_path']) | |
# Check if we get a proper response (not an error message) | |
is_error = any(error_word in result.lower() for error_word in [ | |
'unable', 'cannot', 'error', 'failed', 'sorry', 'not available' | |
]) | |
return { | |
'status': 'FAILED' if is_error else 'SUCCESS', | |
'test_input': "Test audio file", | |
'actual': result, | |
'error': result if is_error else None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def _test_document_analysis(self) -> Dict[str, Any]: | |
"""Test document analysis capability.""" | |
try: | |
from agents.mistral_multimodal_agent import OpenSourceMultimodalTools | |
multimodal = OpenSourceMultimodalTools() | |
# Test document Q&A | |
result = multimodal.analyze_document( | |
self.test_data['document_text'], | |
"What is the answer mentioned in the document?" | |
) | |
# Check if we get a proper response | |
is_error = any(error_word in result.lower() for error_word in [ | |
'unable', 'cannot', 'error', 'failed', 'sorry' | |
]) | |
return { | |
'status': 'FAILED' if is_error else 'SUCCESS', | |
'test_input': "Document Q&A about answer 42", | |
'expected': "42", | |
'actual': result, | |
'error': result if is_error else None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def debug_api_integrations(self) -> Dict[str, Any]: | |
"""Debug API integrations.""" | |
logger.info("π Debugging API integrations...") | |
results = {} | |
# Check API keys | |
api_keys = { | |
'MISTRAL_API_KEY': os.getenv('MISTRAL_API_KEY'), | |
'EXA_API_KEY': os.getenv('EXA_API_KEY'), | |
'FIRECRAWL_API_KEY': os.getenv('FIRECRAWL_API_KEY'), | |
} | |
for key_name, key_value in api_keys.items(): | |
results[key_name] = { | |
'present': bool(key_value), | |
'length': len(key_value) if key_value else 0, | |
'valid_format': self._validate_api_key_format(key_name, key_value) | |
} | |
# Test API connectivity | |
results['mistral_connectivity'] = self._test_mistral_api() | |
results['exa_connectivity'] = self._test_exa_api() | |
results['firecrawl_connectivity'] = self._test_firecrawl_api() | |
self.test_results['api_integrations'] = results | |
return results | |
def _validate_api_key_format(self, key_name: str, key_value: str) -> bool: | |
"""Validate API key format.""" | |
if not key_value: | |
return False | |
# Basic format validation | |
if key_name == 'MISTRAL_API_KEY': | |
return len(key_value) > 20 and key_value.startswith(('sk-', 'ms-')) | |
elif key_name == 'EXA_API_KEY': | |
return len(key_value) > 10 | |
elif key_name == 'FIRECRAWL_API_KEY': | |
return len(key_value) > 10 | |
return True | |
def _test_mistral_api(self) -> Dict[str, Any]: | |
"""Test Mistral API connectivity.""" | |
api_key = os.getenv('MISTRAL_API_KEY') | |
if not api_key: | |
return {'status': 'SKIPPED', 'error': 'API key not found'} | |
try: | |
from mistralai.client import MistralClient | |
client = MistralClient(api_key=api_key) | |
# Simple test call | |
response = client.chat( | |
model="mistral-large-latest", | |
messages=[{"role": "user", "content": "Hello"}], | |
max_tokens=10 | |
) | |
return { | |
'status': 'SUCCESS', | |
'response_length': len(str(response)), | |
'error': None | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e) | |
} | |
def _test_exa_api(self) -> Dict[str, Any]: | |
"""Test Exa API connectivity.""" | |
api_key = os.getenv('EXA_API_KEY') | |
if not api_key: | |
return {'status': 'SKIPPED', 'error': 'API key not found'} | |
try: | |
# Simple HTTP test to Exa API | |
headers = {'Authorization': f'Bearer {api_key}'} | |
response = requests.get( | |
'https://api.exa.ai/search', | |
headers=headers, | |
params={'query': 'test', 'num_results': 1}, | |
timeout=10 | |
) | |
return { | |
'status': 'SUCCESS' if response.status_code == 200 else 'FAILED', | |
'status_code': response.status_code, | |
'error': None if response.status_code == 200 else response.text | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e) | |
} | |
def _test_firecrawl_api(self) -> Dict[str, Any]: | |
"""Test Firecrawl API connectivity.""" | |
api_key = os.getenv('FIRECRAWL_API_KEY') | |
if not api_key: | |
return {'status': 'SKIPPED', 'error': 'API key not found'} | |
try: | |
# Simple HTTP test to Firecrawl API | |
headers = {'Authorization': f'Bearer {api_key}'} | |
response = requests.post( | |
'https://api.firecrawl.dev/v0/scrape', | |
headers=headers, | |
json={'url': 'https://httpbin.org/json'}, | |
timeout=10 | |
) | |
return { | |
'status': 'SUCCESS' if response.status_code == 200 else 'FAILED', | |
'status_code': response.status_code, | |
'error': None if response.status_code == 200 else response.text | |
} | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e) | |
} | |
def debug_agent_integration(self) -> Dict[str, Any]: | |
"""Debug the full agent integration.""" | |
logger.info("π€ Debugging full agent integration...") | |
try: | |
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent | |
# Initialize agent | |
agent = FixedGAIAAgent() | |
# Test agent status | |
status = agent.get_tool_status() | |
# Test with sample questions | |
test_questions = [ | |
"What is 25 * 17?", | |
"What is the capital of France?", | |
] | |
results = { | |
'agent_status': status, | |
'test_responses': {} | |
} | |
for question in test_questions: | |
try: | |
response = agent(question) | |
results['test_responses'][question] = { | |
'response': response, | |
'status': 'SUCCESS' if response != 'unknown' else 'FAILED' | |
} | |
except Exception as e: | |
results['test_responses'][question] = { | |
'response': None, | |
'status': 'ERROR', | |
'error': str(e) | |
} | |
return results | |
except Exception as e: | |
return { | |
'status': 'ERROR', | |
'error': str(e), | |
'traceback': traceback.format_exc() | |
} | |
def run_comprehensive_debug(self) -> Dict[str, Any]: | |
"""Run comprehensive debugging of all tools.""" | |
logger.info("π Starting comprehensive GAIA Agent debugging...") | |
# Run all debug phases | |
self.debug_tool_initialization() | |
self.debug_tool_functionality() | |
self.debug_api_integrations() | |
# Test full agent integration | |
self.test_results['agent_integration'] = self.debug_agent_integration() | |
# Determine overall status | |
self._determine_overall_status() | |
# Generate summary | |
self._generate_debug_summary() | |
return self.test_results | |
def _determine_overall_status(self): | |
"""Determine overall debugging status.""" | |
failed_tools = [] | |
error_tools = [] | |
# Check tool functionality | |
for tool_name, result in self.test_results['tool_functionality'].items(): | |
if result.get('status') == 'FAILED': | |
failed_tools.append(tool_name) | |
elif result.get('status') == 'ERROR': | |
error_tools.append(tool_name) | |
# Check API integrations | |
api_issues = [] | |
for api_name, result in self.test_results['api_integrations'].items(): | |
if isinstance(result, dict) and result.get('status') in ['FAILED', 'ERROR']: | |
api_issues.append(api_name) | |
if error_tools or api_issues: | |
self.test_results['overall_status'] = 'CRITICAL_ISSUES' | |
elif failed_tools: | |
self.test_results['overall_status'] = 'SOME_ISSUES' | |
else: | |
self.test_results['overall_status'] = 'HEALTHY' | |
def _generate_debug_summary(self): | |
"""Generate debugging summary.""" | |
summary = { | |
'total_tools_tested': len(self.test_results['tool_functionality']), | |
'successful_tools': [], | |
'failed_tools': [], | |
'error_tools': [], | |
'api_status': {}, | |
'critical_issues': [], | |
'recommendations': [] | |
} | |
# Analyze tool results | |
for tool_name, result in self.test_results['tool_functionality'].items(): | |
status = result.get('status', 'UNKNOWN') | |
if status == 'SUCCESS': | |
summary['successful_tools'].append(tool_name) | |
elif status == 'FAILED': | |
summary['failed_tools'].append(tool_name) | |
elif status == 'ERROR': | |
summary['error_tools'].append(tool_name) | |
# Analyze API status | |
for api_name, result in self.test_results['api_integrations'].items(): | |
if isinstance(result, dict): | |
summary['api_status'][api_name] = result.get('status', 'UNKNOWN') | |
# Identify critical issues | |
if 'image_analysis' in summary['failed_tools']: | |
summary['critical_issues'].append("Image processing failures - multimodal capabilities compromised") | |
if 'audio_transcription' in summary['failed_tools']: | |
summary['critical_issues'].append("Audio transcription failures - multimodal capabilities compromised") | |
if any('ERROR' in str(result) for result in self.test_results['api_integrations'].values()): | |
summary['critical_issues'].append("API integration failures - external service access compromised") | |
# Generate recommendations | |
if summary['failed_tools']: | |
summary['recommendations'].append(f"Fix failed tools: {', '.join(summary['failed_tools'])}") | |
if summary['error_tools']: | |
summary['recommendations'].append(f"Debug error tools: {', '.join(summary['error_tools'])}") | |
if 'image_analysis' in summary['failed_tools'] or 'audio_transcription' in summary['failed_tools']: | |
summary['recommendations'].append("Install missing multimodal dependencies (transformers, faster-whisper)") | |
self.test_results['debug_summary'] = summary | |
def save_results(self, output_file: str = "debug_results.json"): | |
"""Save debugging results to file.""" | |
with open(output_file, 'w') as f: | |
json.dump(self.test_results, f, indent=2, default=str) | |
logger.info(f"π Debug results saved to {output_file}") | |
def cleanup(self): | |
"""Clean up test files.""" | |
try: | |
if os.path.exists(self.test_data['image_path']): | |
os.unlink(self.test_data['image_path']) | |
if os.path.exists(self.test_data['audio_path']): | |
os.unlink(self.test_data['audio_path']) | |
except Exception as e: | |
logger.warning(f"β οΈ Cleanup failed: {e}") | |
def main(): | |
"""Main debugging function.""" | |
debugger = GAIAToolDebugger() | |
try: | |
# Run comprehensive debugging | |
results = debugger.run_comprehensive_debug() | |
# Save results | |
debugger.save_results("debug_results.json") | |
# Print summary | |
print("\n" + "="*80) | |
print("π GAIA AGENT TOOL DEBUGGING SUMMARY") | |
print("="*80) | |
summary = results.get('debug_summary', {}) | |
print(f"π Overall Status: {results['overall_status']}") | |
print(f"π§ Total Tools Tested: {summary.get('total_tools_tested', 0)}") | |
print(f"β Successful Tools: {len(summary.get('successful_tools', []))}") | |
print(f"β Failed Tools: {len(summary.get('failed_tools', []))}") | |
print(f"π¨ Error Tools: {len(summary.get('error_tools', []))}") | |
if summary.get('failed_tools'): | |
print(f"\nβ Failed Tools: {', '.join(summary['failed_tools'])}") | |
if summary.get('error_tools'): | |
print(f"\nπ¨ Error Tools: {', '.join(summary['error_tools'])}") | |
if summary.get('critical_issues'): | |
print(f"\nπ¨ Critical Issues:") | |
for issue in summary['critical_issues']: | |
print(f" - {issue}") | |
if summary.get('recommendations'): | |
print(f"\nπ‘ Recommendations:") | |
for rec in summary['recommendations']: | |
print(f" - {rec}") | |
print("\n" + "="*80) | |
except Exception as e: | |
logger.error(f"β Debugging failed: {e}") | |
traceback.print_exc() | |
finally: | |
debugger.cleanup() | |
if __name__ == "__main__": | |
main() |