import os import gradio as gr import requests import pandas as pd from smolagents import Tool, CodeAgent, Model # Import internal modules from config import ( DEFAULT_API_URL, USE_LLAMACPP, LLAMACPP_CONFIG ) from tools.tool_manager import ToolManager from utils.llama_cpp_model import LlamaCppModel class GaiaToolCallingAgent: """Tool-calling agent specifically designed for the GAIA system.""" def __init__(self, local_model=None): print("GaiaToolCallingAgent initialized.") self.tool_manager = ToolManager() self.name = "tool_agent" # Add required name attribute for smolagents integration self.description = "A specialized agent that uses various tools to answer questions" # Required by smolagents # Use local model if provided, or create a simpler one self.local_model = local_model if not self.local_model: try: from utils.llama_cpp_model import LlamaCppModel self.local_model = LlamaCppModel( max_tokens=512 ) except Exception as e: print(f"Couldn't initialize local model in tool agent: {e}") self.local_model = None def run(self, query: str) -> str: """Process a query and return a response using available tools.""" print(f"Processing query: {query}") tools = self.tool_manager.get_tools() # For each tool, try to get relevant information context_info = [] for tool in tools: try: if self._should_use_tool(tool, query): print(f"Using tool: {tool.name}") result = tool.forward(query) if result: context_info.append(f"{tool.name} Results:\n{result}") except Exception as e: print(f"Error using {tool.name}: {e}") # Combine all context information full_context = "\n\n".join(context_info) if context_info else "" # If we have context and a local model, generate a proper response if full_context and self.local_model: try: prompt = f""" Based on the following information, please provide a comprehensive answer to the question: "{query}" CONTEXT INFORMATION: {full_context} Answer: """ response = self.local_model.generate(prompt) return response except Exception as e: print(f"Error generating response with local model: {e}") # Fall back to returning just the context return full_context else: # No context or no model, return whatever we have if not full_context: return "I couldn't find any relevant information to answer your question." return full_context def __call__(self, query: str) -> str: """Make the agent callable so it can be used directly by CodeAgent.""" print(f"Tool agent received query: {query}") return self.run(query) def _should_use_tool(self, tool: Tool, query: str) -> bool: """Determine if a specific tool should be used for the query.""" query_lower = query.lower() # Tool-specific patterns patterns = { "web_search": ["current", "latest", "recent", "who", "what", "when", "where", "how"], "web_content": ["content", "webpage", "website", "page"], "youtube_video": ["youtube.com", "youtu.be"], "wikipedia_search": ["wikipedia", "wiki", "article"], "gaia_retriever": ["gaia", "agent", "ai", "artificial intelligence"] } # Use all tools if patterns dict doesn't have the tool name if tool.name not in patterns: return True return any(pattern in query_lower for pattern in patterns.get(tool.name, [])) def create_manager_agent() -> CodeAgent: """Create and configure the main GAIA agent.""" try: # Import config for model from config import LOCAL_MODEL_CONFIG, USE_LLAMACPP, LLAMACPP_CONFIG # Use llama-cpp-python model (no PyTorch dependency) if USE_LLAMACPP: # Initialize llama-cpp model model = LlamaCppModel( model_path=LLAMACPP_CONFIG.get("model_path"), model_url=LLAMACPP_CONFIG.get("model_url"), n_ctx=LLAMACPP_CONFIG.get("n_ctx", 2048), n_gpu_layers=LLAMACPP_CONFIG.get("n_gpu_layers", 0), max_tokens=LLAMACPP_CONFIG.get("max_tokens", 512), temperature=LLAMACPP_CONFIG.get("temperature", 0.7) ) print(f"Using LlamaCpp model") else: # Use a simpler stub model if needed from smolagents import StubModel model = StubModel() print("Using StubModel as fallback") except Exception as e: print(f"Error setting up model: {e}") # Use a simplified configuration as fallback try: # Simple fallback with default params model = LlamaCppModel() print("Using fallback LlamaCpp model configuration") except Exception as e2: # Last resort fallback from smolagents import StubModel model = StubModel() print(f"Using StubModel due to error: {e2}") # Initialize the managed tool-calling agent, sharing the model tool_agent = GaiaToolCallingAgent(local_model=model) # Create the manager agent manager_agent = CodeAgent( model=model, tools=[], # No direct tools for manager managed_agents=[tool_agent], additional_authorized_imports=[ "json", "pandas", "numpy", "re", "requests", "bs4" ], planning_interval=3, verbosity_level=2, max_steps=10 ) print("Manager agent created with local model") return manager_agent def create_agent(): """Create the GAIA agent system.""" try: print("Initializing GAIA agent system...") return create_manager_agent() except Exception as e: print(f"Error creating GAIA agent: {e}") return None def run_and_submit_all(profile: gr.OAuthProfile | None): """ Fetches all questions, runs the GAIA agent on them, submits all answers, and displays the results. """ # --- Determine HF Space Runtime URL and Repo URL --- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code if profile: username = f"{profile.username}" print(f"User logged in: {username}") else: print("User not logged in.") return "Please Login to Hugging Face with the button.", None api_url = DEFAULT_API_URL questions_url = f"{api_url}/questions" submit_url = f"{api_url}/submit" # 1. Initialize Agent try: print("Initializing GAIA agent system...") agent = create_agent() if not agent: return "Error: Could not initialize agent.", None print("GAIA agent initialization complete.") except Exception as e: print(f"Error initializing agent: {e}") return f"Error initializing agent: {e}", None # 2. Fetch Questions print(f"Fetching questions from: {questions_url}") try: response = requests.get(questions_url, timeout=15) response.raise_for_status() questions_data = response.json() if not questions_data: print("Fetched questions list is empty.") return "Fetched questions list is empty or invalid format.", None print(f"Fetched {len(questions_data)} questions.") except requests.exceptions.RequestException as e: print(f"Error fetching questions: {e}") return f"Error fetching questions: {e}", None except Exception as e: print(f"An unexpected error occurred fetching questions: {e}") return f"An unexpected error occurred fetching questions: {e}", None # 3. Run Agent on Questions results_log = [] answers_payload = [] print(f"Running agent on {len(questions_data)} questions...") for item in questions_data: task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: print(f"Skipping item with missing task_id or question: {item}") continue try: # Run the agent and get the response response = agent.run(f"Answer this question concisely: {question_text}") # Clean up the response if needed if isinstance(response, dict): submitted_answer = response.get("answer", str(response)) else: submitted_answer = str(response) # Add to submission payload answers_payload.append({ "task_id": task_id, "submitted_answer": submitted_answer }) # Log the result results_log.append({ "Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer }) except Exception as e: print(f"Error running agent on task {task_id}: {e}") results_log.append({ "Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}" }) if not answers_payload: print("Agent did not produce any answers to submit.") return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) # 4. Prepare Submission agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" submission_data = { "username": username.strip(), "agent_code": agent_code, "answers": answers_payload } # 5. Submit print(f"Submitting {len(answers_payload)} answers to API...") try: response = requests.post(submit_url, json=submission_data, timeout=60) response.raise_for_status() result_data = response.json() status_message = ( f"Submission Successful!\n" f"User: {result_data.get('username')}\n" f"Overall Score: {result_data.get('score', 'N/A')}% " f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" f"Message: {result_data.get('message', 'No message received.')}" ) print("Submission successful.") return status_message, pd.DataFrame(results_log) except Exception as e: status_message = f"Submission Failed: {str(e)}" print(f"Error during submission: {e}") return status_message, pd.DataFrame(results_log) # --- Build Gradio Interface using Blocks --- with gr.Blocks() as demo: gr.Markdown("# GAIA Agent Evaluation Runner") gr.Markdown( """ **Instructions:** 1. Log in to your Hugging Face account using the button below. 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and see the score. The agent uses a managed tool-calling architecture and the smolagents framework for reliable answers. """ ) gr.LoginButton() run_button = gr.Button("Run Evaluation & Submit All Answers") status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) run_button.click( fn=run_and_submit_all, outputs=[status_output, results_table] ) if __name__ == "__main__": print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30) demo.launch(debug=True, share=False)