File size: 10,677 Bytes
311c0d0
 
4bb25ec
6f446d0
07ad0d5
c1db1fc
f5bafc2
 
fc78ae4
 
 
f5bafc2
 
fc78ae4
c1db1fc
f5bafc2
 
08e2c16
07ad0d5
f5bafc2
 
326479a
 
c1db1fc
07ad0d5
 
 
fc78ae4
 
07ad0d5
 
 
 
 
 
f5bafc2
07ad0d5
f5bafc2
c1db1fc
f5bafc2
 
c1db1fc
f5bafc2
07ad0d5
f5bafc2
 
 
c1db1fc
f5bafc2
c1db1fc
f5bafc2
c1db1fc
07ad0d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08e2c16
f5bafc2
 
 
c1db1fc
f5bafc2
 
 
 
 
 
 
 
08e2c16
07ad0d5
 
f5bafc2
08e2c16
326479a
 
 
 
 
 
 
 
 
 
 
f5bafc2
07ad0d5
326479a
07ad0d5
fc78ae4
326479a
 
 
 
 
 
fc78ae4
326479a
fc78ae4
 
 
 
326479a
fc78ae4
 
 
 
07ad0d5
fc78ae4
 
 
 
 
 
 
 
07ad0d5
 
e305927
f5bafc2
07ad0d5
326479a
f5bafc2
 
326479a
f5bafc2
 
 
 
 
07ad0d5
f5bafc2
e305927
f5bafc2
 
 
 
 
 
 
08e2c16
f5bafc2
326479a
cedc6dd
f5bafc2
6f446d0
cedc6dd
6f446d0
8ea0ccb
6f446d0
cedc6dd
08e2aa5
f5bafc2
c1db1fc
f5bafc2
 
 
 
 
 
08e2aa5
f5bafc2
 
6f446d0
08e2aa5
4bb25ec
08e2aa5
8ea0ccb
08e2aa5
 
f5bafc2
 
08e2aa5
d68dd9c
08e2aa5
 
8ea0ccb
08e2aa5
 
d68dd9c
8ea0ccb
 
6f446d0
08e2aa5
 
 
 
 
 
 
f5bafc2
 
 
 
 
 
 
 
 
 
 
 
 
 
08e2aa5
f5bafc2
 
 
 
 
 
08e2aa5
 
6f446d0
08e2aa5
 
f5bafc2
 
 
 
 
326479a
 
f5bafc2
d68dd9c
8ea0ccb
d68dd9c
 
f5bafc2
d68dd9c
 
6f446d0
 
 
d68dd9c
 
f5bafc2
8ea0ccb
f5bafc2
 
 
d68dd9c
 
f5bafc2
326479a
 
 
 
 
cedc6dd
08e2aa5
9ef5250
8ea0ccb
326479a
d68dd9c
 
f5bafc2
326479a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
import os
import gradio as gr
import requests
import pandas as pd
from smolagents import Tool, CodeAgent, Model

# Import internal modules
from config import (
    DEFAULT_API_URL,
    USE_LLAMACPP,
    LLAMACPP_CONFIG
)
from tools.tool_manager import ToolManager
from utils.llama_cpp_model import LlamaCppModel

class GaiaToolCallingAgent:
    """Tool-calling agent specifically designed for the GAIA system."""
    
    def __init__(self, local_model=None):
        print("GaiaToolCallingAgent initialized.")
        self.tool_manager = ToolManager()
        self.name = "tool_agent"
        self.description = "A specialized agent that uses various tools to answer questions"
        
        self.local_model = local_model
        if not self.local_model:
            try:
                from utils.llama_cpp_model import LlamaCppModel
                self.local_model = LlamaCppModel(
                    max_tokens=512
                )
            except Exception as e:
                print(f"Couldn't initialize local model in tool agent: {e}")
                self.local_model = None
        
    def run(self, query: str) -> str:
        print(f"Processing query: {query}")
        tools = self.tool_manager.get_tools()
        
        context_info = []
        for tool in tools:
            try:
                if self._should_use_tool(tool, query):
                    print(f"Using tool: {tool.name}")
                    result = tool.forward(query)
                    if result:
                        context_info.append(f"{tool.name} Results:\n{result}")
            except Exception as e:
                print(f"Error using {tool.name}: {e}")
        
        full_context = "\n\n".join(context_info) if context_info else ""
        
        if full_context and self.local_model:
            try:
                prompt = f"""
                Based on the following information, please provide a comprehensive answer to the question: "{query}"
                
                CONTEXT INFORMATION:
                {full_context}
                
                Answer:
                """
                
                response = self.local_model.generate(prompt)
                return response
            except Exception as e:
                print(f"Error generating response with local model: {e}")
                return full_context
        else:
            if not full_context:
                return "I couldn't find any relevant information to answer your question."
            return full_context
        
    def __call__(self, query: str) -> str:
        print(f"Tool agent received query: {query}")
        return self.run(query)
    
    def _should_use_tool(self, tool: Tool, query: str) -> bool:
        query_lower = query.lower()
        patterns = {
            "web_search": ["current", "latest", "recent", "who", "what", "when", "where", "how"],
            "web_content": ["content", "webpage", "website", "page"],
            "youtube_video": ["youtube.com", "youtu.be"],
            "wikipedia_search": ["wikipedia", "wiki", "article"],
            "gaia_retriever": ["gaia", "agent", "ai", "artificial intelligence"]
        }
        if tool.name not in patterns:
            return True
        return any(pattern in query_lower for pattern in patterns.get(tool.name, []))

def download_model_if_needed(model_path, model_url):
    if not os.path.exists(model_path):
        print(f"Downloading model from {model_url}...")
        os.makedirs(os.path.dirname(model_path), exist_ok=True)
        with requests.get(model_url, stream=True) as response:
            response.raise_for_status()
            with open(model_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
        print("Download complete.")

def create_manager_agent() -> CodeAgent:
    try:
        from config import USE_LLAMACPP, LLAMACPP_CONFIG
        
        if USE_LLAMACPP:
            # Use TheBloke's model with auto-download
            model_path = LLAMACPP_CONFIG.get("model_path") or "./models/llama-2-7b.Q4_0.gguf"
            model_url = "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf"
            
            download_model_if_needed(model_path, model_url)
            
            model = LlamaCppModel(
                model_path=model_path,
                n_ctx=LLAMACPP_CONFIG.get("n_ctx", 2048),
                n_gpu_layers=LLAMACPP_CONFIG.get("n_gpu_layers", 0),
                temperature=LLAMACPP_CONFIG.get("temperature", 0.7)
            )
            print(f"Using LlamaCpp model from {model_path}")
        else:
            from smolagents import StubModel
            model = StubModel()
            print("Using StubModel as fallback")
    except Exception as e:
        print(f"Error setting up model: {e}")
        try:
            model = LlamaCppModel()
            print("Using fallback LlamaCpp model configuration")
        except Exception as e2:
            from smolagents import StubModel
            model = StubModel()
            print(f"Using StubModel due to error: {e2}")
    
    tool_agent = GaiaToolCallingAgent(local_model=model)
    
    manager_agent = CodeAgent(
        model=model,
        tools=[],
        managed_agents=[tool_agent],
        additional_authorized_imports=[
            "json", "pandas", "numpy", "re", "requests", "bs4"
        ],
        planning_interval=3,
        verbosity_level=2,
        max_steps=10
    )
    print("Manager agent created with local model")
    return manager_agent

def create_agent():
    try:
        print("Initializing GAIA agent system...")
        return create_manager_agent()
    except Exception as e:
        print(f"Error creating GAIA agent: {e}")
        return None

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if profile:
        username = f"{profile.username}"
        print(f"User logged in: {username}")
    else:
        print("User not logged in.")
        return "Please Login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    try:
        print("Initializing GAIA agent system...")
        agent = create_agent()
        if not agent:
            return "Error: Could not initialize agent.", None
        print("GAIA agent initialization complete.")
    except Exception as e:
        print(f"Error initializing agent: {e}")
        return f"Error initializing agent: {e}", None

    print(f"Fetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            print("Fetched questions list is empty.")
            return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
    except requests.exceptions.RequestException as e:
        print(f"Error fetching questions: {e}")
        return f"Error fetching questions: {e}", None
    except Exception as e:
        print(f"An unexpected error occurred fetching questions: {e}")
        return f"An unexpected error occurred fetching questions: {e}", None

    results_log = []
    answers_payload = []
    print(f"Running agent on {len(questions_data)} questions...")
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            print(f"Skipping item with missing task_id or question: {item}")
            continue
        try:
            response = agent.run(f"Answer this question concisely: {question_text}")
            if isinstance(response, dict):
                submitted_answer = response.get("answer", str(response))
            else:
                submitted_answer = str(response)
            answers_payload.append({
                "task_id": task_id,
                "submitted_answer": submitted_answer
            })
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": submitted_answer
            })
        except Exception as e:
            print(f"Error running agent on task {task_id}: {e}")
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": f"AGENT ERROR: {e}"
            })

    if not answers_payload:
        print("Agent did not produce any answers to submit.")
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload
    }

    print(f"Submitting {len(answers_payload)} answers to API...")
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        status_message = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        print("Submission successful.")
        return status_message, pd.DataFrame(results_log)
    except Exception as e:
        status_message = f"Submission Failed: {str(e)}"
        print(f"Error during submission: {e}")
        return status_message, pd.DataFrame(results_log)

with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent Evaluation Runner")
    gr.Markdown("""
    **Instructions:**
    1. Log in to your Hugging Face account using the button below.
    2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and see the score.
    """)
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
    demo.launch(debug=True, share=False)