Spaces:

davidgturner
/

GaiaAgentEvaluator

Sleeping

App Files Files Community

davidgturner commited on May 18

Commit

c1db1fc

1 Parent(s): e305927

- changes to get agent working

Browse files

Files changed (2) hide show

app.py +480 -6
requirements.txt +7 -1

app.py CHANGED Viewed

@@ -6,17 +6,455 @@ import pandas as pd
 import time
 import json
 from typing import Dict, List, Union, Optional
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         # Initialize the Hugging Face API client
         self.hf_api_url = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
         self.hf_api_token = os.getenv("HF_API_TOKEN")
         if not self.hf_api_token:
@@ -150,14 +588,50 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        return f"Error initializing agent: {e}", None
     # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)

 import time
 import json
 from typing import Dict, List, Union, Optional
+import re
+from bs4 import BeautifulSoup
+from duckduckgo_search import DDGS
+from smolagents import Tool, CodeAgent, InferenceClientModel
+import random
+from smolagents import CodeAgent, InferenceClientModel
+# Import our custom tools from their modules
+# from smolagents.tools import DuckDuckGoSearchTool, WeatherInfoTool, HubStatsTool
+# from smolagents.tools import WebPageVisitTool, WebpageContentExtractorTool
+from smolagents import CodeAgent, InferenceClientModel, load_tool
+# Import necessary libraries
+import random
+from smolagents import CodeAgent, InferenceClientModel
+# Import our custom tools from their modules
+# from tools import DuckDuckGoSearchTool, WeatherInfoTool, HubStatsTool
+# from retriever import load_guest_dataset
+from langchain.docstore.document import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.retrievers import BM25Retriever
+import functools
+# Create a knowledge base for the agent
+GAIA_KNOWLEDGE = """
+### AI and Agent Concepts
+- An agent is an autonomous entity that observes and acts upon an environment using sensors and actuators, usually to achieve specific goals.
+- GAIA (General AI Assistant) is a framework for creating and evaluating AI assistants that can perform a wide range of tasks.
+- The agent loop consists of perception, reasoning, and action.
+- RAG (Retrieval-Augmented Generation) combines retrieval of relevant information with generation capabilities of language models.
+- An LLM (Large Language Model) is a neural network trained on vast amounts of text data to understand and generate human language.
+### Agent Capabilities
+- Tool use refers to an agent's ability to employ external tools like search engines, APIs, or specialized algorithms.
+- An effective agent should be able to decompose complex problems into manageable parts.
+- Chain-of-thought reasoning allows agents to break down problem-solving steps to improve accuracy.
+- Agents should apply appropriate reasoning strategies based on the type of question (factual, analytical, etc.)
+- Self-reflection helps agents identify and correct errors in their reasoning.
+### Evaluation Criteria
+- Agent responses should be accurate, relevant, and factually correct.
+- Effective agents provide concise yet comprehensive answers.
+- Agents should acknowledge limitations and uncertainties when appropriate.
+- Good agents can follow multi-step instructions and fulfill all requirements.
+- Reasoning transparency helps users understand how the agent arrived at its conclusions.
+"""
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+LLAMA_API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct"
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")
+HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
+MAX_RETRIES = 3
+RETRY_DELAY = 2  # seconds
+# Create knowledge base documents
+def create_knowledge_documents():
+    """Create documents from the knowledge base for retrieval."""
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=50,
+        separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
+    )
+    knowledge_chunks = text_splitter.split_text(GAIA_KNOWLEDGE)
+    return [Document(page_content=chunk) for chunk in knowledge_chunks]
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+# --- Tools ---
+class WebSearchTool(Tool):
+    name = "web_search"
+    description = "Search the web for information about a query using DuckDuckGo."
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "The search query."
+        }
+    }
+    output_type = "string"
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.max_results = 3
+    def forward(self, query: str) -> str:
+        assert isinstance(query, str), "Query must be a string."
+        try:
+            results = []
+            with DDGS() as ddgs:
+                ddgs_results = list(ddgs.text(query, max_results=self.max_results))
+            if not ddgs_results:
+                return "No web search results found."
+            formatted_results = "\nWeb Search Results:\n"
+            for i, r in enumerate(ddgs_results, 1):
+                formatted_results += f"\n{i}. {r['title']}\n   {r['body']}\n   Source: {r['href']}\n"
+            return formatted_results
+        except Exception as e:
+            print(f"Error in web search: {str(e)}")
+            return f"Error performing web search: {str(e)}"
+class WebContentTool(Tool):
+    name = "web_content"
+    description = "Fetch and extract content from a specific webpage."
+    inputs = {
+        "url": {
+            "type": "string",
+            "description": "The URL of the webpage to fetch content from."
+        }
+    }
+    output_type = "string"
+    def forward(self, url: str) -> str:
+        assert isinstance(url, str), "URL must be a string."
+        try:
+            headers = {
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+            }
+            response = requests.get(url, headers=headers, timeout=10)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.text, 'html.parser')
+            # Remove script and style elements
+            for script in soup(["script", "style"]):
+                script.extract()
+            # Get text content
+            text = soup.get_text(separator='\n')
+            # Clean up text (remove extra whitespace and blank lines)
+            lines = (line.strip() for line in text.splitlines())
+            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+            text = '\n'.join(chunk for chunk in chunks if chunk)
+            # Truncate if too long
+            if len(text) > 2000:
+                text = text[:2000] + "... [content truncated]"
+            return f"Content from {url}:\n\n{text}"
+        except Exception as e:
+            print(f"Error fetching web content: {str(e)}")
+            return f"Error fetching content from {url}: {str(e)}"
+class GaiaRetrieverTool(Tool):
+    name = "gaia_retriever"
+    description = "Semantic search for retrieving relevant information for GaiaAgent."
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "Query for semantic search."
+        }
+    }
+    output_type = "string"
+    def __init__(self, docs, **kwargs):
+        super().__init__(**kwargs)
+        self.retriever = BM25Retriever.from_documents(docs, k=3)
+        self.docs = docs  # Store docs for fallback
+    def forward(self, query: str) -> str:
+        assert isinstance(query, str), "Query must be a string."
+        try:
+            docs = self.retriever.invoke(query)
+            if not docs:
+                # Fallback to return most relevant general knowledge
+                return "\nNo specific information found. Here's some general knowledge:\n" + "".join([
+                    f"\n- {self.docs[i].page_content}" for i in range(min(3, len(self.docs)))
+                ])
+            return "\nRetrieved Information:\n" + "".join([
+                f"\n- {doc.page_content}" for doc in docs
+            ])
+        except Exception as e:
+            print(f"Error in retriever: {str(e)}")
+            # Return a fallback response
+            return f"Unable to retrieve specific information. The agent will rely on its general knowledge."
+# --- Agent ---
+class GaiaAgent:
+    def __init__(self):
+        print("GaiaAgent initialized.")
+        # Create knowledge base documents
+        self.knowledge_docs = create_knowledge_documents()
+        # Create our tools
+        self.retriever_tool = GaiaRetrieverTool(self.knowledge_docs)
+        self.web_search_tool = WebSearchTool()
+        self.web_content_tool = WebContentTool()
+        # Initialize the Hugging Face model
+        self.model = InferenceClientModel()
+        # Initialize the web search tool
+        # self.search_tool = DuckDuckGoSearchTool()
+        # Initialize the weather tool
+        # self.weather_info_tool = WeatherInfoTool()
+        # Initialize the Hub stats tool
+        # self.hub_stats_tool = HubStatsTool()
+        # Load the guest dataset and initialize the guest info tool
+        # self.guest_info_tool = load_guest_dataset()
+        # Set up LLM API access
+        self.hf_api_url = LLAMA_API_URL
+        self.headers = HEADERS
+        # Set up caching for responses
+        self.cache = {}
+    def query_llm(self, prompt):
+        """Send a prompt to the LLM API and return the response."""
+        # Check cache first
+        if prompt in self.cache:
+            print("Using cached response")
+            return self.cache[prompt]
+        if not HF_API_TOKEN:
+            # Fallback to rule-based approach if no API token
+            return self.rule_based_answer(prompt)
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "max_new_tokens": 512,
+                "temperature": 0.7,
+                "top_p": 0.9,
+                "do_sample": True
+            }
+        }
+        for attempt in range(MAX_RETRIES):
+            try:
+                response = requests.post(self.hf_api_url, headers=self.headers, json=payload, timeout=30)
+                response.raise_for_status()
+                result = response.json()
+                # Extract the generated text from the response
+                if isinstance(result, list) and len(result) > 0:
+                    generated_text = result[0].get("generated_text", "")
+                    # Clean up the response to get just the answer
+                    clean_response = self.clean_response(generated_text, prompt)
+                    # Cache the response
+                    self.cache[prompt] = clean_response
+                    return clean_response
+                return "I couldn't generate a proper response."
+            except Exception as e:
+                print(f"Attempt {attempt+1}/{MAX_RETRIES} failed: {str(e)}")
+                if attempt < MAX_RETRIES - 1:
+                    time.sleep(RETRY_DELAY)
+                else:
+                    # Fall back to rule-based method on failure
+                    return self.rule_based_answer(prompt)
+    def clean_response(self, response, prompt):
+        """Clean up the LLM response to extract the answer."""
+        # Remove the prompt from the beginning if it's included
+        if response.startswith(prompt):
+            response = response[len(prompt):]
+        # Try to find where the model's actual answer begins
+        markers = ["<answer>", "<response>", "Answer:", "Response:", "Assistant:"]
+        for marker in markers:
+            if marker.lower() in response.lower():
+                parts = response.lower().split(marker.lower(), 1)
+                if len(parts) > 1:
+                    response = parts[1].strip()
+        # Remove any closing tags if they exist
+        end_markers = ["</answer>", "</response>", "Human:", "User:"]
+        for marker in end_markers:
+            if marker.lower() in response.lower():
+                response = response.lower().split(marker.lower())[0].strip()
+        return response.strip()
+    def rule_based_answer(self, question):
+        """Fallback method using rule-based answers for common question types."""
+        question_lower = question.lower()
+        # Simple pattern matching for common question types
+        if "what is" in question_lower or "define" in question_lower:
+            if "agent" in question_lower:
+                return "An agent is an autonomous entity that observes and acts upon an environment using sensors and actuators, usually to achieve specific goals."
+            if "gaia" in question_lower:
+                return "GAIA (General AI Assistant) is a framework for creating and evaluating AI assistants that can perform a wide range of tasks."
+            if "llm" in question_lower or "large language model" in question_lower:
+                return "A Large Language Model (LLM) is a neural network trained on vast amounts of text data to understand and generate human language."
+            if "rag" in question_lower or "retrieval" in question_lower:
+                return "RAG (Retrieval-Augmented Generation) combines retrieval of relevant information with generation capabilities of language models."
+        if "how to" in question_lower:
+            return "To accomplish this task, you should first understand the requirements, then implement a solution step by step, and finally test your implementation."
+        if "example" in question_lower:
+            return "Here's an example implementation that demonstrates the concept in a practical manner."
+        if "evaluate" in question_lower or "criteria" in question_lower:
+            return "Evaluation criteria for agents typically include accuracy, relevance, factual correctness, conciseness, ability to follow instructions, and transparency in reasoning."
+        # Default response for unmatched questions
+        return "Based on my understanding, the answer involves analyzing the context carefully and applying the relevant principles to arrive at a solution."
+    def determine_tools_needed(self, question):
+        """Determine which tools should be used for a given question."""
+        question_lower = question.lower()
+        # Patterns that suggest the need for web search
+        web_search_patterns = [
+            "current", "latest", "recent", "news", "update", "today",
+            "statistics", "data", "facts", "information about",
+            "what is happening", "how many", "where is", "when was"
+        ]
+        # Check if the question likely needs web search
+        needs_web_search = False
+        for pattern in web_search_patterns:
+            if pattern in question_lower:
+                needs_web_search = True
+                break
+        # Check if question appears to be about GAIA, agents, or AI concepts
+        needs_knowledge_retrieval = any(term in question_lower for term in
+                                       ["agent", "gaia", "llm", "ai", "artificial intelligence",
+                                        "evaluation", "tool", "rag", "retrieval"])
+        # Determine which tools to use based on the analysis
+        return {
+            "use_web_search": needs_web_search,
+            "use_knowledge_retrieval": needs_knowledge_retrieval or not needs_web_search,  # Fallback to knowledge retrieval
+            "use_webpage_visit": "example" in question_lower or "details" in question_lower or "explain" in question_lower
+        }
+    def format_prompt(self, question, knowledge_info="", web_info="", webpage_content=""):
+        """Format the question into a proper prompt for the LLM."""
+        context = ""
+        if knowledge_info:
+            context += f"\nLocal Knowledge Base Information:\n{knowledge_info}\n"
+        if web_info:
+            context += f"\nWeb Search Results:\n{web_info}\n"
+        if webpage_content:
+            context += f"\nDetailed Web Content:\n{webpage_content}\n"
+        if context:
+            return f"""You are an intelligent AI assistant specialized in answering questions about AI agents, GAIA (General AI Assistant), and related concepts.
+Use the following information to help answer the question accurately. If the information doesn't contain what you need, use your general knowledge.
+{context}
+Question: {question}
+Provide a clear, concise, and accurate answer. Use reasoning steps when appropriate. If you're uncertain, acknowledge limitations.
+Answer:"""
+        else:
+            return f"""You are an intelligent AI assistant specialized in answering questions about AI agents, GAIA (General AI Assistant), and related concepts.
+Question: {question}
+Provide a clear, concise, and accurate answer. Use reasoning steps when appropriate. If you're uncertain, acknowledge limitations.
+Answer:"""
+    def __call__(self, question: str) -> str:
+        print(f"GaiaAgent received question (first 50 chars): {question[:50]}...")
+        try:
+            # Step 1: Determine which tools to use
+            tool_selection = self.determine_tools_needed(question)
+            # Step 2: Gather information from selected tools
+            knowledge_info = ""
+            web_info = ""
+            webpage_content = ""
+            # Get knowledge base information
+            if tool_selection["use_knowledge_retrieval"]:
+                try:
+                    knowledge_info = self.retriever_tool.forward(question)
+                    print("Retrieved knowledge base information")
+                except Exception as e:
+                    print(f"Error retrieving knowledge base information: {e}")
+            # Get web search results
+            if tool_selection["use_web_search"]:
+                try:
+                    web_info = self.web_search_tool.forward(question)
+                    print("Retrieved web search results")
+                except Exception as e:
+                    print(f"Error with web search: {e}")
+                # If web search found URLs and we should visit them
+                if tool_selection["use_webpage_visit"] and web_info and "http" in web_info.lower():
+                    # Extract URL from search results
+                    url_match = re.search(r'Source: (https?://[^\s]+)', web_info)
+                    if url_match:
+                        url = url_match.group(1)
+                        try:
+                            content_result = self.web_content_tool.forward(url)
+                            # Only use if result seems valid
+                            if content_result and len(content_result) > 100:
+                                webpage_content = content_result
+                                print(f"Retrieved webpage content from {url}")
+                            else:
+                                print("Webpage content was too short or empty")
+                        except Exception as e:
+                            print(f"Error extracting webpage content: {e}")
+            # Step 3: Format prompt with gathered information
+            prompt = self.format_prompt(question, knowledge_info, web_info, webpage_content)
+            # Step 4: Query the LLM with the formatted prompt
+            answer = self.query_llm(prompt)
+            print(f"GaiaAgent returning answer (first 50 chars): {answer[:50]}...")
+            return answer
+        except Exception as e:
+            print(f"Error in GaiaAgent: {e}")
+            # Fallback to the rule-based method if anything goes wrong
+            fallback_answer = self.rule_based_answer(question)
+            print(f"GaiaAgent returning fallback answer: {fallback_answer[:50]}...")
+            return fallback_answer
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         # Initialize the Hugging Face API client
+        # https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct
         self.hf_api_url = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
         self.hf_api_token = os.getenv("HF_API_TOKEN")
         if not self.hf_api_token:
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        print("Initializing GaiaAgent...")
+        agent = GaiaAgent()
+        # Initialize the Hugging Face model
+        model = InferenceClientModel()
+        # Initialize the web search tool
+        #search_tool = DuckDuckGoSearchTool()
+        # Initialize the weather tool
+        #weather_info_tool = WeatherInfoTool()
+        # Initialize the Hub stats tool
+        #hub_stats_tool = HubStatsTool()
+        # Load the guest dataset and initialize the guest info tool
+        guest_info_tool = load_guest_dataset()
+        # Initialize the Hugging Face model
+        model = InferenceClientModel()
+        # Load the DuckDuckGo search tool dynamically
+        search_tool = load_tool(repo_id="smol-ai/duckduckgo-search", trust_remote_code=True)
+        agent = CodeAgent(
+            tools=[guest_info_tool, search_tool],
+            model=model,
+            add_base_tools=True,  # Add any additional base tools
+            planning_interval=3   # Enable planning every 3 steps
+        )
+        print("GaiaAgent initialization complete.")
     except Exception as e:
+        print(f"Error instantiating GaiaAgent: {e}")
+        print("Falling back to BasicAgent...")
+        try:
+            agent = BasicAgent()
+            print("BasicAgent initialization complete.")
+        except Exception as e:
+            print(f"Error instantiating BasicAgent: {e}")
+            return f"Error initializing agents: {e}", None
     # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)

requirements.txt CHANGED Viewed

@@ -1,3 +1,9 @@
 gradio
 requests
-pandas

 gradio
 requests
+pandas
+langchain
+langchain-community
+smolagents
+gradio[oauth]
+beautifulsoup4
+duckduckgo-search