Spaces:

JoachimVC
/

gaia-enhanced-agent

Running

gaia-enhanced-agent / agents /enhanced_unified_agno_agent.py

GAIA Agent Deployment

Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements

9a6a4dc 9 days ago

17.9 kB

	"""
	GAIA Agent - Simplified Working Version
	Complete AGNO Tools with Basic Multimodal Integration

	This agent provides comprehensive GAIA evaluation capabilities using:
	- All AGNO tools (calculator, python, wikipedia, arxiv, firecrawl, exa, file, shell)
	- Basic multimodal tools (Mistral Vision when available)
	- Simple, reliable answer formatting
	- No complex dependencies that cause import failures

	Advantages:
	- Single agent for all GAIA tasks (text, math, multimodal)
	- AGNO's native orchestration handles tool selection
	- Simple, reliable architecture that works in HuggingFace Space
	- Consistent error handling and response formatting
	- No complex import dependencies
	"""

	import os
	import logging
	from typing import Dict, Any, List, Optional
	from pathlib import Path

	from agno.agent import Agent
	from agno.models.mistral import MistralChat

	# Import European open-source multimodal tools
	try:
	from .mistral_multimodal_agent import OpenSourceMultimodalTools
	MULTIMODAL_AVAILABLE = True
	except ImportError:
	try:
	from mistral_multimodal_agent import OpenSourceMultimodalTools
	MULTIMODAL_AVAILABLE = True
	except ImportError:
	OpenSourceMultimodalTools = None
	MULTIMODAL_AVAILABLE = False

	# Simple answer formatting without complex dependencies
	class SimpleAnswerFormatter:
	"""Simple answer formatter for GAIA evaluation."""

	def format_answer(self, response: str, question: str = None) -> str:
	"""Format response for GAIA evaluation."""
	if not response:
	return ""

	# Clean the response
	answer = response.strip()

	# Remove common prefixes
	prefixes_to_remove = [
	"The answer is:",
	"Answer:",
	"Final answer:",
	"The final answer is:",
	"Based on my analysis,",
	"According to my research,",
	]

	for prefix in prefixes_to_remove:
	if answer.lower().startswith(prefix.lower()):
	answer = answer[len(prefix):].strip()

	# Remove markdown formatting
	answer = answer.replace("*", "").replace("", "")

	# Extract final answer if it's in a specific format
	lines = answer.split('\n')
	for line in lines:
	line = line.strip()
	if line and not line.startswith('#') and not line.startswith('-'):
	# This looks like a final answer
	return line

	return answer

	# Load environment variables from .env file
	def load_env_file():
	"""Load environment variables from .env file if it exists."""
	env_file = Path('.env')
	if env_file.exists():
	with open(env_file, 'r') as f:
	for line in f:
	line = line.strip()
	if line and not line.startswith('#') and '=' in line:
	key, value = line.split('=', 1)
	os.environ[key.strip()] = value.strip()

	# Load environment variables at module level
	load_env_file()

	logger = logging.getLogger(__name__)


	class GAIAAgent:
	"""
	GAIA Agent with comprehensive AGNO tools and basic multimodal capabilities.

	This agent combines all AGNO tools with basic multimodal processing,
	providing a single interface for all GAIA evaluation tasks including:
	- Text and mathematical reasoning
	- Basic image analysis using Mistral Vision
	- Web research and content extraction
	- Simple, reliable answer formatting
	"""

	def __init__(self):
	"""Initialize the unified AGNO agent."""
	logger.info("🚀 Initializing Unified AGNO Agent...")

	# Initialize simple answer formatter
	self.response_formatter = SimpleAnswerFormatter()

	# Initialize all AGNO tools
	self.tools = self._init_all_agno_tools()

	# Initialize European open-source multimodal tools
	self.multimodal_tools = self._init_multimodal_tools()
	if self.multimodal_tools:
	self.tools.extend(self.multimodal_tools.tools)

	# Check for required API key
	self.mistral_api_key = os.getenv("MISTRAL_API_KEY")
	if not self.mistral_api_key:
	logger.error("❌ MISTRAL_API_KEY not found - AGNO agent requires this for orchestration")
	self.agent = None
	self.available = False
	return

	# Create the unified AGNO agent
	self.agent = self._create_agno_agent()

	# Set availability flag
	self.available = self.agent is not None

	if self.available:
	logger.info("✅ Unified AGNO Agent initialized successfully")
	logger.info(f"📊 Available tools: {len(self.tools)}")
	else:
	logger.error("❌ Unified AGNO Agent initialization failed")

	def _init_all_agno_tools(self) -> List[Any]:
	"""Initialize all available AGNO tools."""
	tools = []
	tool_status = {}

	# Define all AGNO tools with their requirements
	tools_config = [
	# Core computational tools
	{
	'name': 'calculator',
	'module': 'agno.tools.calculator',
	'class': 'CalculatorTools',
	'required_env': None,
	'description': 'Mathematical calculations and operations'
	},
	{
	'name': 'python',
	'module': 'agno.tools.python',
	'class': 'PythonTools',
	'required_env': None,
	'description': 'Python code execution and analysis'
	},

	# Knowledge and research tools
	{
	'name': 'wikipedia',
	'module': 'agno.tools.wikipedia',
	'class': 'WikipediaTools',
	'required_env': None,
	'description': 'Wikipedia knowledge retrieval'
	},
	{
	'name': 'arxiv',
	'module': 'agno.tools.arxiv',
	'class': 'ArxivTools',
	'required_env': None,
	'description': 'Academic research via ArXiv'
	},

	# Web tools
	{
	'name': 'firecrawl',
	'module': 'agno.tools.firecrawl',
	'class': 'FirecrawlTools',
	'required_env': 'FIRECRAWL_API_KEY',
	'description': 'Web content extraction'
	},
	{
	'name': 'exa',
	'module': 'agno.tools.exa',
	'class': 'ExaTools',
	'required_env': 'EXA_API_KEY',
	'description': 'Advanced web search'
	},

	# System tools
	{
	'name': 'file',
	'module': 'agno.tools.file',
	'class': 'FileTools',
	'required_env': None,
	'description': 'File operations and management'
	},
	{
	'name': 'shell',
	'module': 'agno.tools.shell',
	'class': 'ShellTools',
	'required_env': None,
	'description': 'System shell operations'
	},

	# Optional multimodal tools
	{
	'name': 'youtube',
	'module': 'agno.tools.youtube',
	'class': 'YouTubeTools',
	'required_env': None,
	'description': 'YouTube video transcription and analysis',
	'optional_deps': ['youtube_transcript_api']
	},
	]

	for tool_config in tools_config:
	tool_name = tool_config['name']
	module_path = tool_config['module']
	class_name = tool_config['class']
	required_env = tool_config['required_env']
	description = tool_config['description']
	optional_deps = tool_config.get('optional_deps', [])

	try:
	# Check if required environment variable is available
	if required_env and not os.getenv(required_env):
	logger.warning(f"⚠️ {required_env} not found, {tool_name} tool unavailable")
	tool_status[tool_name] = f"Missing {required_env}"
	continue

	# Import and instantiate the tool
	module = __import__(module_path, fromlist=[class_name])
	tool_class = getattr(module, class_name)

	# Initialize tool with appropriate parameters
	if tool_name == 'exa':
	tool_instance = tool_class(api_key=os.getenv('EXA_API_KEY'))
	elif tool_name == 'firecrawl':
	tool_instance = tool_class(api_key=os.getenv('FIRECRAWL_API_KEY'))
	else:
	tool_instance = tool_class()

	tools.append(tool_instance)
	tool_status[tool_name] = "✅ Available"
	logger.info(f"✅ {class_name} initialized: {description}")

	except ImportError as e:
	if optional_deps and any(dep in str(e) for dep in optional_deps):
	logger.warning(f"⚠️ {class_name} not available: missing optional dependency")
	tool_status[tool_name] = f"Missing optional dependency"
	else:
	logger.warning(f"⚠️ {class_name} not available: {e}")
	tool_status[tool_name] = f"Import error: {str(e)[:50]}"
	except Exception as e:
	logger.warning(f"⚠️ {class_name} not available: {e}")
	tool_status[tool_name] = f"Error: {str(e)[:50]}"

	# Log tool availability summary
	logger.info("📊 AGNO Tools Status:")
	for tool_name, status in tool_status.items():
	logger.info(f" {tool_name}: {status}")

	return tools

	def _init_multimodal_tools(self) -> Optional[Any]:
	"""Initialize European open-source multimodal tools."""
	if not MULTIMODAL_AVAILABLE:
	logger.warning("⚠️ European open-source multimodal tools not available")
	return None

	try:
	multimodal_tools = OpenSourceMultimodalTools()
	logger.info("✅ European open-source multimodal tools initialized")
	logger.info("🇪🇺 Features: Image analysis (BLIP-2/Mistral Vision), Audio transcription (Faster-Whisper), Document analysis")
	return multimodal_tools
	except Exception as e:
	logger.warning(f"⚠️ Failed to initialize multimodal tools: {e}")
	return None

	def _create_agno_agent(self) -> Optional[Agent]:
	"""Create the unified AGNO agent with all available tools."""
	if not self.tools:
	logger.warning("⚠️ No AGNO tools available, creating agent without tools")

	try:
	# Create Mistral model for the agent
	model = MistralChat(
	api_key=self.mistral_api_key,
	id="mistral-large-latest", # Use latest large model for better function calling
	temperature=0.1, # Low temperature for factual accuracy
	max_tokens=2000
	)

	# Create the unified agent with all available tools
	agent = Agent(
	model=model,
	tools=self.tools,
	instructions=self._get_agent_instructions(),
	show_tool_calls=True, # Enable tool call visibility for debugging
	markdown=True,
	debug_mode=True # Enable debug mode to see tool usage
	)

	logger.info(f"✅ Unified AGNO Agent created with {len(self.tools)} tools")
	return agent

	except Exception as e:
	logger.error(f"❌ Failed to create AGNO agent: {e}")
	return None

	def _get_agent_instructions(self) -> str:
	"""Get comprehensive instructions for the unified AGNO agent."""
	return """You are a GAIA evaluation agent with access to comprehensive AGNO tools.

	CRITICAL GAIA EVALUATION REQUIREMENTS:
	1. EXACT ANSWER MATCHING: Your final answer must match the expected answer EXACTLY
	2. NO EXPLANATIONS: Provide only the final answer, no reasoning or explanations
	3. PRECISE FORMAT: Follow the exact format expected (number, text, etc.)
	4. FACTUAL ACCURACY: Use tools to verify all information before answering

	AVAILABLE TOOLS AND WHEN TO USE THEM:

	CORE COMPUTATIONAL TOOLS:
	1. CALCULATOR TOOLS - Use for:
	- Mathematical calculations and operations
	- Unit conversions and numerical computations
	- Complex mathematical expressions

	2. PYTHON TOOLS - Use for:
	- Code execution and analysis
	- Data processing and calculations
	- Algorithm implementation

	KNOWLEDGE AND RESEARCH TOOLS:
	3. WIKIPEDIA TOOLS - Use ONLY when:
	- Wikipedia is explicitly mentioned in the question
	- Question specifically asks about Wikipedia content
	- Question references "according to Wikipedia" or similar

	4. ARXIV TOOLS - Use for:
	- Academic research and scientific papers
	- Technical and research-oriented questions
	- Latest scientific developments

	WEB RESEARCH TOOLS:
	5. EXA TOOLS - Use for:
	- General web search and research
	- Finding current information and recent developments
	- Biographical information and general knowledge queries
	- Any web-based fact-checking and information gathering

	6. FIRECRAWL TOOLS - Use for:
	- Web content extraction from specific URLs provided in the question
	- Detailed webpage analysis when URL is given
	- Content scraping when specific URLs need to be processed

	SYSTEM TOOLS:
	7. FILE TOOLS - Use for:
	- File operations and management
	- Reading and processing local files
	- File system operations

	8. SHELL TOOLS - Use for:
	- System operations and commands
	- Environment queries
	- System-level information gathering

	9. YOUTUBE TOOLS - Use for:
	- YouTube video transcription
	- Video content analysis via transcripts
	- Understanding video content without watching

	MULTIMODAL TOOLS (European Open-Source):
	10. IMAGE ANALYSIS - Use for:
	- Analyzing images using BLIP-2 or Mistral Vision
	- Answering questions about image content
	- Visual reasoning and description

	11. AUDIO TRANSCRIPTION - Use for:
	- Transcribing audio files using Faster-Whisper (European community-driven)
	- Converting speech to text for analysis
	- Processing audio content

	12. DOCUMENT ANALYSIS - Use for:
	- Analyzing document content and answering questions
	- Text-based document processing
	- Document question-answering using DistilBERT

	GENERAL STRATEGY:
	1. Analyze the question to determine the most appropriate tool(s)
	2. Use tools systematically to gather accurate information
	3. Synthesize findings into a precise, compliant answer
	4. Always prioritize accuracy and factual correctness
	5. Use multiple tools if needed for verification

	ANSWER FORMAT:
	- Provide ONLY the final answer
	- No explanations, reasoning, or additional text
	- Match the expected format exactly (number, text, date, etc.)
	- Ensure factual accuracy through tool verification"""

	def __call__(self, question: str) -> str:
	"""Process a question using the unified AGNO agent."""
	if not self.available:
	logger.error("❌ Unified AGNO Agent not available - check MISTRAL_API_KEY")
	return "Agent not available"

	try:
	logger.info(f"🤔 Processing question with Unified AGNO Agent: {question[:100]}...")

	# Use AGNO agent to process the question with full orchestration
	response = self.agent.run(question)

	# Extract the response content
	if hasattr(response, 'content'):
	raw_answer = response.content
	elif isinstance(response, str):
	raw_answer = response
	else:
	raw_answer = str(response)

	# Format the response for GAIA evaluation
	formatted_answer = self.response_formatter.format_answer(raw_answer, question)

	logger.info(f"✅ Question processed successfully")
	logger.info(f"📝 Raw answer: {raw_answer[:200]}...")
	logger.info(f"🎯 Formatted answer: {formatted_answer}")

	return formatted_answer

	except Exception as e:
	logger.error(f"❌ Error processing question: {e}")
	return f"Error: {str(e)}"

	def get_tool_status(self) -> Dict[str, Any]:
	"""Get the current status of all tools."""
	multimodal_status = {}
	if hasattr(self, 'multimodal_tools') and self.multimodal_tools:
	multimodal_status = self.multimodal_tools.get_capabilities_status()

	return {
	'available': self.available,
	'tools_count': len(self.tools) if self.tools else 0,
	'mistral_api_key_present': bool(self.mistral_api_key),
	'agent_created': self.agent is not None,
	'multimodal_tools_available': MULTIMODAL_AVAILABLE,
	'multimodal_status': multimodal_status
	}


	# Create global agent instance
	gaia_agent = GAIAAgent()


	def process_question(question: str) -> str:
	"""Process a question using the GAIA agent."""
	return gaia_agent(question)


	def get_agent_status() -> Dict[str, Any]:
	"""Get the current status of the GAIA agent."""
	return gaia_agent.get_tool_status()