File size: 2,947 Bytes
50b462a
996d1a7
 
50b462a
6a6387f
50b462a
 
996d1a7
6a6387f
50b462a
6a6387f
50b462a
6a6387f
 
 
 
 
996d1a7
50b462a
 
6a6387f
 
 
 
 
50b462a
6a6387f
50b462a
 
 
6a6387f
 
 
50b462a
6a6387f
996d1a7
6a6387f
 
50b462a
6a6387f
 
 
 
50b462a
6a6387f
 
 
 
50b462a
6a6387f
 
50b462a
6a6387f
996d1a7
50b462a
6a6387f
 
 
 
 
 
 
 
 
50b462a
6a6387f
50b462a
6a6387f
50b462a
996d1a7
6a6387f
 
50b462a
6a6387f
 
 
 
 
996d1a7
50b462a
6a6387f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import gradio as gr
import requests
import pandas as pd
from agent import run_agent_on_question  # your real agent logic

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

class WrappedAgent:
    def __init__(self):
        print("Custom Agent loaded.")
    def __call__(self, question: str) -> str:
        # question comes with context already
        parts = question.split("|||", 1)
        raw_question = parts[-1]
        print(f"Calling run_agent_on_question on: {raw_question[:60]}...")
        return run_agent_on_question({"question": raw_question, "task_id": "N/A"})

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if not profile:
        return "Please login to Hugging Face via the login button.", None
    username = profile.username
    questions_url = f"{DEFAULT_API_URL}/questions"
    submit_url = f"{DEFAULT_API_URL}/submit"

    agent = WrappedAgent()
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    try:
        resp = requests.get(questions_url, timeout=15)
        resp.raise_for_status()
        questions = resp.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    results = []
    answers = []

    for q in questions:
        task_id = q.get("task_id")
        question = q.get("question", "")
        # optionally fetch file: requests.get(BASE_URL + f"/files/{task_id}")

        full_input = f"{task_id}|||{question}"
        submitted = agent(full_input)
        answers.append({"task_id": task_id, "submitted_answer": submitted})
        results.append({"Task ID": task_id, "Question": question, "Submitted Answer": submitted})

    submission = {
        "username": username,
        "agent_code": agent_code,
        "answers": answers
    }
    try:
        sub_resp = requests.post(submit_url, json=submission, timeout=60)
        sub_resp.raise_for_status()
        data = sub_resp.json()
        status = (
            f"Submission Successful!\n"
            f"User: {data.get('username')}\n"
            f"Score: {data.get('score', 'N/A')}% "
            f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})\n"
            f"{data.get('message')}"
        )
        return status, pd.DataFrame(results)
    except Exception as e:
        return f"Submission failed: {e}", pd.DataFrame(results)

with gr.Blocks() as demo:
    gr.Markdown("# Custom GAIA Evaluation Runner")
    gr.Markdown("Log in, click the button, and your agent runs through all GAIA questions.")
    gr.LoginButton()

    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    out_txt = gr.Textbox(label="Run Status / Submission Result", interactive=False, lines=5)
    out_tbl = gr.DataFrame(label="Questions & Submitted Answer")
    run_btn.click(fn=run_and_submit_all, outputs=[out_txt, out_tbl])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)