File size: 10,057 Bytes
10e9b7d
 
eccf8e4
7d65c66
3c4371f
2a43fe1
10e9b7d
e80aab9
3db6293
e80aab9
2a43fe1
 
 
 
 
 
 
 
 
 
 
31243f4
2a43fe1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4021bf3
2a43fe1
 
31243f4
2a43fe1
31243f4
 
7d65c66
b177367
3c4371f
7e4a06b
1ca9f65
3c4371f
7e4a06b
3c4371f
7d65c66
3c4371f
7e4a06b
31243f4
 
e80aab9
2a43fe1
31243f4
2a43fe1
 
 
 
 
31243f4
3c4371f
31243f4
2a43fe1
 
36ed51a
2a43fe1
3c4371f
7d65c66
31243f4
eccf8e4
31243f4
7d65c66
31243f4
 
3c4371f
 
31243f4
e80aab9
31243f4
 
3c4371f
 
7d65c66
3c4371f
7d65c66
31243f4
 
e80aab9
b177367
7d65c66
 
3c4371f
31243f4
 
 
 
 
 
 
7d65c66
 
 
31243f4
 
7d65c66
31243f4
 
3c4371f
31243f4
 
2a43fe1
7d65c66
3c4371f
31243f4
e80aab9
7d65c66
31243f4
e80aab9
7d65c66
e80aab9
 
31243f4
e80aab9
 
3c4371f
 
 
e80aab9
 
31243f4
 
e80aab9
3c4371f
e80aab9
 
3c4371f
e80aab9
7d65c66
3c4371f
31243f4
7d65c66
31243f4
3c4371f
 
 
 
 
e80aab9
31243f4
 
 
 
7d65c66
31243f4
 
 
 
e80aab9
 
 
 
2a43fe1
0ee0419
e514fd7
 
2a43fe1
 
dec180d
e514fd7
 
2a43fe1
 
e514fd7
e80aab9
 
dec180d
e80aab9
31243f4
e80aab9
9088b99
7d65c66
e80aab9
dec180d
 
 
31243f4
 
 
e80aab9
 
 
3c4371f
2a43fe1
 
7d65c66
 
 
2a43fe1
dec180d
2a43fe1
 
 
 
7d65c66
3c4371f
2a43fe1
 
3c4371f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import os
import gradio as gr
import requests
import inspect
import pandas as pd
from groq import Groq

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Groq Powered Agent Definition ---
# This new agent uses the Groq API to generate answers.
class GroqAgent:
    def __init__(self, api_key):
        """
        Initializes the GroqAgent with the Groq API client.
        """
        print("Initializing GroqAgent...")
        self.client = Groq(api_key=api_key)
        print("GroqAgent initialized successfully.")

    def __call__(self, question: str) -> str:
        """
        This method is called to answer a question using the Groq API.
        """
        print(f"GroqAgent received question (first 50 chars): {question[:50]}...")

        # A system prompt is used to guide the model to provide concise, direct answers,
        # which is ideal for the GAIA benchmark's exact-match scoring.
        system_prompt = (
            "You are an expert AI agent. Your goal is to answer the following question as accurately "
            "and concisely as possible. Provide only the final answer, without any introductory text, "
            "explanations, or additional formatting."
        )

        try:
            chat_completion = self.client.chat.completions.create(
                messages=[
                    {
                        "role": "system",
                        "content": system_prompt,
                    },
                    {
                        "role": "user",
                        "content": question,
                    }
                ],
                model="llama3-70b-8192",  # A powerful model available via Groq
                temperature=0.0, # Set to 0 for deterministic, factual answers
            )

            answer = chat_completion.choices[0].message.content.strip()
            print(f"GroqAgent generated answer: {answer}")
            return answer

        except Exception as e:
            print(f"An error occurred while calling Groq API: {e}")
            return f"Error generating answer: {e}"


def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetches all questions, runs the GroqAgent on them, submits all answers,
    and displays the results.
    """
    # --- Determine HF Space Runtime URL and Repo URL ---
    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code

    if profile:
        username= f"{profile.username}"
        print(f"User logged in: {username}")
    else:
        print("User not logged in.")
        return "Please Login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # 1. Instantiate Agent (Now using GroqAgent)
    try:
        # Securely get the API key from Hugging Face Space secrets
        groq_api_key = os.getenv("GROQ_API_KEY")
        if not groq_api_key:
            raise ValueError("GROQ_API_KEY secret not found! Please set it in your Space's settings.")
        agent = GroqAgent(api_key=groq_api_key)
    except Exception as e:
        print(f"Error instantiating agent: {e}")
        return f"Error initializing agent: {e}", None

    # The link to your codebase (useful for verification, so please keep your space public)
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    print(f"Agent code link: {agent_code}")

    # 2. Fetch Questions
    print(f"Fetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
             print("Fetched questions list is empty.")
             return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
    except requests.exceptions.RequestException as e:
        print(f"Error fetching questions: {e}")
        return f"Error fetching questions: {e}", None
    except requests.exceptions.JSONDecodeError as e:
         print(f"Error decoding JSON response from questions endpoint: {e}")
         print(f"Response text: {response.text[:500]}")
         return f"Error decoding server response for questions: {e}", None
    except Exception as e:
        print(f"An unexpected error occurred fetching questions: {e}")
        return f"An unexpected error occurred fetching questions: {e}", None

    # 3. Run your Agent
    results_log = []
    answers_payload = []
    print(f"Running agent on {len(questions_data)} questions...")
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            print(f"Skipping item with missing task_id or question: {item}")
            continue
        try:
            submitted_answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
        except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

    if not answers_payload:
        print("Agent did not produce any answers to submit.")
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    # 4. Prepare Submission
    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
    print(status_update)

    # 5. Submit
    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        print("Submission successful.")
        results_df = pd.DataFrame(results_log)
        return final_status, results_df
    except requests.exceptions.HTTPError as e:
        error_detail = f"Server responded with status {e.response.status_code}."
        try:
            error_json = e.response.json()
            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
        except requests.exceptions.JSONDecodeError:
            error_detail += f" Response: {e.response.text[:500]}"
        status_message = f"Submission Failed: {error_detail}"
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df
    except requests.exceptions.Timeout:
        status_message = "Submission Failed: The request timed out."
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df
    except requests.exceptions.RequestException as e:
        status_message = f"Submission Failed: Network error - {e}"
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df
    except Exception as e:
        status_message = f"An unexpected error occurred during submission: {e}"
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df


# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
    gr.Markdown("# Groq-Powered Agent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1.  Make sure you have set your `GROQ_API_KEY` in the 'Secrets' section of your Space settings.
        2.  Log in to your Hugging Face account using the button below. This is required for submission.
        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit your answers, and see your score.
        ---
        **Disclaimers:**
        Once you click the "submit" button, the process can take some time as the agent answers all the questions.
        This space provides a basic setup. You are encouraged to modify the `GroqAgent` class to experiment with different models, prompts, or even add tools to improve your score!
        """
    )

    gr.LoginButton()

    run_button = gr.Button("Run Evaluation & Submit All Answers")

    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    # CORRECTED LINE: The `inputs` argument is removed. Gradio passes the
    # OAuthProfile automatically to the `run_and_submit_all` function
    # because of the type hint in its definition.
    run_button.click(
        fn=run_and_submit_all,
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    print("\n" + "-"*30 + " App Starting " + "-"*30)
    space_id_startup = os.getenv("SPACE_ID")
    if space_id_startup:
        print(f"✅ SPACE_ID found: {space_id_startup}")
        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
    else:
        print("ℹ️  SPACE_ID environment variable not found (running locally?).")

    if not os.getenv("GROQ_API_KEY"):
         print("⚠️  WARNING: GROQ_API_KEY secret is not set. The app will fail if run.")
    else:
         print("✅ GROQ_API_KEY secret is set.")


    print("-"*(60 + len(" App Starting ")) + "\n")
    print("Launching Gradio Interface for Groq Agent Evaluation...")
    demo.launch(debug=True, share=False)