File size: 3,548 Bytes
10e9b7d
 
eccf8e4
3c4371f
8b83970
10e9b7d
182cf83
3db6293
0f0a208
e80aab9
0f0a208
31243f4
0f0a208
 
8b83970
 
 
 
 
 
 
0f0a208
8b83970
 
31243f4
182cf83
0f0a208
 
e85b640
0f0a208
182cf83
e85b640
0f0a208
 
 
 
 
 
 
 
4021bf3
9efd6bd
7e4a06b
0f0a208
182cf83
0f0a208
eccf8e4
0f0a208
 
7d65c66
0f0a208
e80aab9
0f0a208
 
 
 
31243f4
0f0a208
 
 
8b83970
 
0f0a208
 
 
 
8b83970
31243f4
0f0a208
 
 
 
8b83970
31243f4
0f0a208
e80aab9
0f0a208
 
 
 
 
 
 
 
 
8b83970
0f0a208
 
 
 
e80aab9
7d65c66
0f0a208
182cf83
4e281a3
e80aab9
0f0a208
 
 
 
 
 
 
 
 
8b83970
 
0f0a208
 
 
182cf83
8b83970
e23ab90
0f0a208
e23ab90
e80aab9
 
0f0a208
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os
import gradio as gr
import requests
import pandas as pd
from transformers import pipeline

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
HF_MODEL_NAME = "facebook/bart-large-mnli"  # Free model that works in Spaces

# --- Agent Definition ---
class BasicAgent:
    def __init__(self):
        print("Initializing Agent...")
        try:
            self.llm = pipeline(
                "text-generation",
                model=HF_MODEL_NAME,
                device_map="auto"
            )
        except Exception as e:
            print(f"LLM initialization failed: {e}")
            self.llm = None

    def __call__(self, question: str) -> str:
        if not self.llm:
            return "Default answer (LLM not available)"
        
        try:
            response = self.llm(question, max_length=100)
            return response[0]['generated_text']
        except Exception as e:
            return f"Error: {str(e)}"

def run_and_submit_all():
    """Simplified version that works with Gradio auth"""
    # Get username from Gradio's auth system
    username = os.getenv("GRADIO_AUTH_USERNAME")
    if not username:
        return "Please login first", None

    space_id = os.getenv("SPACE_ID")
    api_url = DEFAULT_API_URL
    agent = BasicAgent()
    
    # Fetch questions
    try:
        response = requests.get(f"{api_url}/questions", timeout=15)
        questions = response.json()
    except Exception as e:
        return f"Failed to get questions: {str(e)}", None

    # Process questions
    results = []
    answers = []
    for q in questions:
        try:
            answer = agent(q.get("question", ""))
            answers.append({
                "task_id": q.get("task_id"),
                "submitted_answer": answer
            })
            results.append({
                "Task ID": q.get("task_id"),
                "Question": q.get("question"),
                "Answer": answer
            })
        except Exception as e:
            results.append({
                "Task ID": q.get("task_id"),
                "Question": q.get("question"),
                "Answer": f"Error: {str(e)}"
            })

    # Submit answers
    try:
        response = requests.post(
            f"{api_url}/submit",
            json={
                "username": username,
                "agent_code": f"https://huggingface.co/spaces/{space_id}",
                "answers": answers
            },
            timeout=60
        )
        result = response.json()
        return (
            f"Success! Score: {result.get('score', 'N/A')}%\n"
            f"Correct: {result.get('correct_count', 0)}/{result.get('total_attempted', 0)}",
            pd.DataFrame(results)
        )
    except Exception as e:
        return f"Submission failed: {str(e)}", pd.DataFrame(results)

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# LLM Agent Evaluation")
    
    with gr.Accordion("Instructions", open=False):
        gr.Markdown("""
        1. Click the login button
        2. Authorize with your Hugging Face account
        3. Click 'Run Evaluation'
        """)
    
    gr.LoginButton()
    
    run_btn = gr.Button("Run Evaluation", variant="primary")
    status = gr.Textbox(label="Status")
    results = gr.DataFrame(label="Results", wrap=True)
    
    run_btn.click(
        fn=run_and_submit_all,
        outputs=[status, results]
    )

if __name__ == "__main__":
    demo.launch(auth_message="Please login with your Hugging Face account")