Spaces:

davidgturner
/

GaiaAgentEvaluator

Sleeping

App Files Files Community

davidgturner commited on 8 days ago

Commit

326479a

1 Parent(s): 66d6d1f

- download model

Browse files

Files changed (2) hide show

app.py +34 -81
config.py +13 -4

app.py CHANGED Viewed

@@ -19,10 +19,9 @@ class GaiaToolCallingAgent:
     def __init__(self, local_model=None):
         print("GaiaToolCallingAgent initialized.")
         self.tool_manager = ToolManager()
-        self.name = "tool_agent"  # Add required name attribute for smolagents integration
-        self.description = "A specialized agent that uses various tools to answer questions"  # Required by smolagents
-        # Use local model if provided, or create a simpler one
         self.local_model = local_model
         if not self.local_model:
             try:
@@ -35,13 +34,10 @@ class GaiaToolCallingAgent:
                 self.local_model = None
     def run(self, query: str) -> str:
-        """Process a query and return a response using available tools."""
         print(f"Processing query: {query}")
         tools = self.tool_manager.get_tools()
-        # For each tool, try to get relevant information
         context_info = []
         for tool in tools:
             try:
                 if self._should_use_tool(tool, query):
@@ -52,10 +48,8 @@ class GaiaToolCallingAgent:
             except Exception as e:
                 print(f"Error using {tool.name}: {e}")
-        # Combine all context information
         full_context = "\n\n".join(context_info) if context_info else ""
-        # If we have context and a local model, generate a proper response
         if full_context and self.local_model:
             try:
                 prompt = f"""
@@ -71,24 +65,18 @@ class GaiaToolCallingAgent:
                 return response
             except Exception as e:
                 print(f"Error generating response with local model: {e}")
-                # Fall back to returning just the context
                 return full_context
         else:
-            # No context or no model, return whatever we have
             if not full_context:
                 return "I couldn't find any relevant information to answer your question."
             return full_context
     def __call__(self, query: str) -> str:
-        """Make the agent callable so it can be used directly by CodeAgent."""
         print(f"Tool agent received query: {query}")
         return self.run(query)
     def _should_use_tool(self, tool: Tool, query: str) -> bool:
-        """Determine if a specific tool should be used for the query."""
         query_lower = query.lower()
-        # Tool-specific patterns
         patterns = {
             "web_search": ["current", "latest", "recent", "who", "what", "when", "where", "how"],
             "web_content": ["content", "webpage", "website", "page"],
@@ -96,77 +84,70 @@ class GaiaToolCallingAgent:
             "wikipedia_search": ["wikipedia", "wiki", "article"],
             "gaia_retriever": ["gaia", "agent", "ai", "artificial intelligence"]
         }
-        # Use all tools if patterns dict doesn't have the tool name
         if tool.name not in patterns:
             return True
         return any(pattern in query_lower for pattern in patterns.get(tool.name, []))
 def create_manager_agent() -> CodeAgent:
-    """Create and configure the main GAIA agent."""
     try:
-        # Import config for model
-        from config import LOCAL_MODEL_CONFIG, USE_LLAMACPP, LLAMACPP_CONFIG
-        # Use llama-cpp-python model (no PyTorch dependency)
         if USE_LLAMACPP:
-            # Initialize llama-cpp model
             model = LlamaCppModel(
-                model_path=LLAMACPP_CONFIG.get("model_path"),
-                model_url=LLAMACPP_CONFIG.get("model_url"),
                 n_ctx=LLAMACPP_CONFIG.get("n_ctx", 2048),
                 n_gpu_layers=LLAMACPP_CONFIG.get("n_gpu_layers", 0),
-                max_tokens=LLAMACPP_CONFIG.get("max_tokens", 512),
                 temperature=LLAMACPP_CONFIG.get("temperature", 0.7)
             )
-            print(f"Using LlamaCpp model")
         else:
-            # Use a simpler stub model if needed
             from smolagents import StubModel
             model = StubModel()
             print("Using StubModel as fallback")
     except Exception as e:
         print(f"Error setting up model: {e}")
-        # Use a simplified configuration as fallback
         try:
-            # Simple fallback with default params
             model = LlamaCppModel()
             print("Using fallback LlamaCpp model configuration")
         except Exception as e2:
-            # Last resort fallback
             from smolagents import StubModel
             model = StubModel()
             print(f"Using StubModel due to error: {e2}")
-    # Initialize the managed tool-calling agent, sharing the model
     tool_agent = GaiaToolCallingAgent(local_model=model)
-    # Create the manager agent
     manager_agent = CodeAgent(
         model=model,
-        tools=[],  # No direct tools for manager
         managed_agents=[tool_agent],
         additional_authorized_imports=[
-            "json",
-            "pandas",
-            "numpy",
-            "re",
-            "requests",
-            "bs4"
         ],
         planning_interval=3,
         verbosity_level=2,
         max_steps=10
     )
     print("Manager agent created with local model")
     return manager_agent
 def create_agent():
-    """Create the GAIA agent system."""
     try:
         print("Initializing GAIA agent system...")
         return create_manager_agent()
@@ -175,13 +156,7 @@ def create_agent():
         return None
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """
-    Fetches all questions, runs the GAIA agent on them, submits all answers,
-    and displays the results.
-    """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
         username = f"{profile.username}"
         print(f"User logged in: {username}")
@@ -193,7 +168,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Initialize Agent
     try:
         print("Initializing GAIA agent system...")
         agent = create_agent()
@@ -204,7 +178,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"Error initializing agent: {e}")
         return f"Error initializing agent: {e}", None
-    # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
@@ -221,7 +194,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run Agent on Questions
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
@@ -232,28 +204,20 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            # Run the agent and get the response
             response = agent.run(f"Answer this question concisely: {question_text}")
-            # Clean up the response if needed
             if isinstance(response, dict):
                 submitted_answer = response.get("answer", str(response))
             else:
                 submitted_answer = str(response)
-            # Add to submission payload
             answers_payload.append({
                 "task_id": task_id,
                 "submitted_answer": submitted_answer
             })
-            # Log the result
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text,
                 "Submitted Answer": submitted_answer
             })
         except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
             results_log.append({
@@ -266,19 +230,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload
-    }    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to API...")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         status_message = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
@@ -293,29 +256,19 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"Error during submission: {e}")
         return status_message, pd.DataFrame(results_log)
-# --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
     gr.Markdown("# GAIA Agent Evaluation Runner")
-    gr.Markdown(
-        """
-        **Instructions:**
-        1. Log in to your Hugging Face account using the button below.
-        2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and see the score.
-        The agent uses a managed tool-calling architecture and the smolagents framework for reliable answers.
-        """
-    )
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
     print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
-    demo.launch(debug=True, share=False)

     def __init__(self, local_model=None):
         print("GaiaToolCallingAgent initialized.")
         self.tool_manager = ToolManager()
+        self.name = "tool_agent"
+        self.description = "A specialized agent that uses various tools to answer questions"
         self.local_model = local_model
         if not self.local_model:
             try:
                 self.local_model = None
     def run(self, query: str) -> str:
         print(f"Processing query: {query}")
         tools = self.tool_manager.get_tools()
         context_info = []
         for tool in tools:
             try:
                 if self._should_use_tool(tool, query):
             except Exception as e:
                 print(f"Error using {tool.name}: {e}")
         full_context = "\n\n".join(context_info) if context_info else ""
         if full_context and self.local_model:
             try:
                 prompt = f"""
                 return response
             except Exception as e:
                 print(f"Error generating response with local model: {e}")
                 return full_context
         else:
             if not full_context:
                 return "I couldn't find any relevant information to answer your question."
             return full_context
     def __call__(self, query: str) -> str:
         print(f"Tool agent received query: {query}")
         return self.run(query)
     def _should_use_tool(self, tool: Tool, query: str) -> bool:
         query_lower = query.lower()
         patterns = {
             "web_search": ["current", "latest", "recent", "who", "what", "when", "where", "how"],
             "web_content": ["content", "webpage", "website", "page"],
             "wikipedia_search": ["wikipedia", "wiki", "article"],
             "gaia_retriever": ["gaia", "agent", "ai", "artificial intelligence"]
         }
         if tool.name not in patterns:
             return True
         return any(pattern in query_lower for pattern in patterns.get(tool.name, []))
+def download_model_if_needed(model_path, model_url):
+    if not os.path.exists(model_path):
+        print(f"Downloading model from {model_url}...")
+        os.makedirs(os.path.dirname(model_path), exist_ok=True)
+        with requests.get(model_url, stream=True) as response:
+            response.raise_for_status()
+            with open(model_path, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+        print("Download complete.")
 def create_manager_agent() -> CodeAgent:
     try:
+        from config import USE_LLAMACPP, LLAMACPP_CONFIG
         if USE_LLAMACPP:
+            # Use TheBloke's model with auto-download
+            model_path = LLAMACPP_CONFIG.get("model_path") or "./models/llama-2-7b.Q4_0.gguf"
+            model_url = "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf"
+            download_model_if_needed(model_path, model_url)
             model = LlamaCppModel(
+                model_path=model_path,
                 n_ctx=LLAMACPP_CONFIG.get("n_ctx", 2048),
                 n_gpu_layers=LLAMACPP_CONFIG.get("n_gpu_layers", 0),
                 temperature=LLAMACPP_CONFIG.get("temperature", 0.7)
             )
+            print(f"Using LlamaCpp model from {model_path}")
         else:
             from smolagents import StubModel
             model = StubModel()
             print("Using StubModel as fallback")
     except Exception as e:
         print(f"Error setting up model: {e}")
         try:
             model = LlamaCppModel()
             print("Using fallback LlamaCpp model configuration")
         except Exception as e2:
             from smolagents import StubModel
             model = StubModel()
             print(f"Using StubModel due to error: {e2}")
     tool_agent = GaiaToolCallingAgent(local_model=model)
     manager_agent = CodeAgent(
         model=model,
+        tools=[],
         managed_agents=[tool_agent],
         additional_authorized_imports=[
+            "json", "pandas", "numpy", "re", "requests", "bs4"
         ],
         planning_interval=3,
         verbosity_level=2,
         max_steps=10
     )
     print("Manager agent created with local model")
     return manager_agent
 def create_agent():
     try:
         print("Initializing GAIA agent system...")
         return create_manager_agent()
         return None
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
         print(f"User logged in: {username}")
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     try:
         print("Initializing GAIA agent system...")
         agent = create_agent()
         print(f"Error initializing agent: {e}")
         return f"Error initializing agent: {e}", None
     print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             response = agent.run(f"Answer this question concisely: {question_text}")
             if isinstance(response, dict):
                 submitted_answer = response.get("answer", str(response))
             else:
                 submitted_answer = str(response)
             answers_payload.append({
                 "task_id": task_id,
                 "submitted_answer": submitted_answer
             })
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text,
                 "Submitted Answer": submitted_answer
             })
         except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
             results_log.append({
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload
+    }
     print(f"Submitting {len(answers_payload)} answers to API...")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         status_message = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
         print(f"Error during submission: {e}")
         return status_message, pd.DataFrame(results_log)
 with gr.Blocks() as demo:
     gr.Markdown("# GAIA Agent Evaluation Runner")
+    gr.Markdown("""
+    **Instructions:**
+    1. Log in to your Hugging Face account using the button below.
+    2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and see the score.
+    """)
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
     print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
+    demo.launch(debug=True, share=False)

config.py CHANGED Viewed

@@ -12,12 +12,21 @@ USE_LOCAL_MODEL = True  # Set to False to use remote API model instead
 USE_LLAMACPP = True  # Set to True to use llama-cpp-python instead of transformers
 # Configuration for llama-cpp-python model
 LLAMACPP_CONFIG = {
-    "model_path": None,  # Will use a default small model if None
-    # Using a smaller GGUF model to avoid download issues
-    "model_url": "https://huggingface.co/eachadea/ggml-gridlocked-alpha-3b/resolve/main/ggml-gridlocked-3b-q4_0.bin",
     "n_ctx": 2048,
-    "n_gpu_layers": 0,  # Use 0 for CPU-only
     "max_tokens": 1024,
     "temperature": 0.7
 }

 USE_LLAMACPP = True  # Set to True to use llama-cpp-python instead of transformers
 # Configuration for llama-cpp-python model
+# LLAMACPP_CONFIG = {
+#     "model_path": None,  # Will use a default small model if None
+#     # Using a smaller GGUF model to avoid download issues
+#     "model_url": "https://huggingface.co/eachadea/ggml-gridlocked-alpha-3b/resolve/main/ggml-gridlocked-3b-q4_0.bin",
+#     "n_ctx": 2048,
+#     "n_gpu_layers": 0,  # Use 0 for CPU-only
+#     "max_tokens": 1024,
+#     "temperature": 0.7
+# }
 LLAMACPP_CONFIG = {
+    "model_path": None,  # Use a default or provide a local path manually
+    "model_url": "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf",
     "n_ctx": 2048,
+    "n_gpu_layers": 0,  # CPU-only, adjust as needed for GPU
     "max_tokens": 1024,
     "temperature": 0.7
 }