""" app.py This script provides the Gradio web interface to run the evaluation for the Hugging Face Agents course. It fetches questions from a server, runs the agent defined in agent.py on them, and submits the answers for scoring. """ import os import re # <-- 1. ADDED IMPORT for Regular Expressions import gradio as gr import requests import pandas as pd # --- Import your agent's factory function --- from agent import create_agent_executor # <-- 2. ADDED IMPORT for your agent # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # --- DELETED BasicAgent class as it's no longer needed --- # --- 3. ADDED HELPER FUNCTION TO PARSE THE AGENT'S OUTPUT --- def parse_final_answer(agent_response: str) -> str: """ Extracts the final answer from the agent's full response string. The agent is prompted to return 'FINAL ANSWER: [answer]'. This function isolates and returns '[answer]'. """ # Use a regular expression to find the text after "FINAL ANSWER:" match = re.search(r"FINAL ANSWER:\s*(.*)", agent_response, re.IGNORECASE | re.DOTALL) if match: # If a match is found, return the captured group, stripped of whitespace return match.group(1).strip() # As a fallback, if the specific format is not found, return the last non-empty line lines = [line for line in agent_response.split('\n') if line.strip()] if lines: return lines[-1].strip() # If all else fails, return a default message return "Could not parse a final answer." def run_and_submit_all(profile: gr.OAuthProfile | None): """ Fetches all questions, runs the agent on them, submits all answers, and displays the results. """ if not profile: print("User not logged in.") return "Please log in to Hugging Face with the button above to submit.", None username = profile.username print(f"User logged in: {username}") space_id = os.getenv("SPACE_ID") agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" questions_url = f"{DEFAULT_API_URL}/questions" submit_url = f"{DEFAULT_API_URL}/submit" # --- 4. MODIFIED AGENT INSTANTIATION AND EXECUTION --- # 1. Instantiate Agent (using your factory function from agent.py) print("Initializing your custom agent...") try: agent_executor = create_agent_executor(provider="groq") # or "groq" except Exception as e: print(f"Error instantiating agent: {e}") return f"Fatal Error: Could not initialize agent. Check logs. Details: {e}", None # 2. Fetch Questions (this part is correct) print(f"Fetching questions from: {questions_url}") try: response = requests.get(questions_url, timeout=20) response.raise_for_status() questions_data = response.json() print(f"Fetched {len(questions_data)} questions.") except Exception as e: return f"Error fetching questions: {e}", None # 3. Run your Agent (THIS IS THE MOST IMPORTANTLY CORRECTED SECTION) results_log = [] answers_payload = [] print(f"Running agent on {len(questions_data)} questions...") for i, item in enumerate(questions_data): task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: print(f"Skipping item with missing data: {item}") continue print(f"\n--- Running Task {i+1}/{len(questions_data)} (ID: {task_id}) ---") print(f"Question: {question_text}") try: # CORRECT INVOCATION: Use the agent_executor with the .invoke() method # The input must be a dictionary with a "messages" key result = agent_executor.invoke({"messages": [("user", question_text)]}) # The agent's final response is in the 'messages' list of the output raw_answer = result['messages'][-1].content # Use our helper function to extract the clean answer submitted_answer = parse_final_answer(raw_answer) print(f"Raw LLM Response: '{raw_answer}'") print(f"PARSED FINAL ANSWER: '{submitted_answer}'") answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}) except Exception as e: print(f"!! AGENT ERROR on task {task_id}: {e}") # It's important to log errors so you can see them in the UI results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT RUNTIME ERROR: {e}"}) if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) # 4. Prepare and 5. Submit (these parts are correct) submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload} print(f"\nSubmitting {len(answers_payload)} answers for user '{username}'...") try: response = requests.post(submit_url, json=submission_data, timeout=60) response.raise_for_status() result_data = response.json() final_status = ( f"Submission Successful!\n" f"User: {result_data.get('username')}\n" f"Overall Score: {result_data.get('score', 'N/A')}% " f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)" ) print("Submission successful.") return final_status, pd.DataFrame(results_log) except Exception as e: status_message = f"Submission Failed: {e}" print(status_message) return status_message, pd.DataFrame(results_log) # --- Build Gradio Interface using Blocks (This part is correct) --- with gr.Blocks() as demo: gr.Markdown("# Agent Evaluation Runner") gr.Markdown("Run your custom agent against the evaluation questions and submit for a score.") gr.LoginButton() run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary") status_output = gr.Textbox(label="Run Status / Submission Result", lines=4, interactive=False) results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, row_count=10) run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table]) if __name__ == "__main__": print("\n" + "-"*30 + " App Starting " + "-"*30) # The startup info logs are helpful, so we keep them. space_id_startup = os.getenv("SPACE_ID") if space_id_startup: print(f"✅ SPACE_ID found: {space_id_startup}") print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}") else: print("ℹ️ SPACE_ID environment variable not found (likely running locally).") print("-"*(60 + len(" App Starting ")) + "\n") print("Launching Gradio Interface...") demo.launch()