|
import os |
|
import gradio as gr |
|
import requests |
|
import ast |
|
import json |
|
import time |
|
import pandas as pd |
|
from datetime import datetime |
|
from typing import List, Dict, Any, Annotated, Optional |
|
from langgraph.graph import Graph, StateGraph, END |
|
from typing_extensions import TypedDict |
|
from openai import OpenAI |
|
from tools import simple_search |
|
import re |
|
from huggingface_hub import InferenceClient |
|
import io |
|
import mimetypes |
|
import base64 |
|
import cv2 |
|
import numpy as np |
|
from io import BytesIO |
|
import tempfile |
|
import subprocess |
|
import sys |
|
import textwrap |
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
|
|
|
client = InferenceClient(token=HF_TOKEN) |
|
|
|
|
|
SESSION = requests.Session() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def override(_, new): |
|
return new |
|
|
|
def merge_dicts(old: Dict, new: Dict) -> Dict: |
|
"""Merge two dictionaries, with *new* values taking precedence.""" |
|
return {**old, **new} |
|
|
|
def tighten(q: str) -> str: |
|
""" |
|
Strip long GAIA questions down to quoted phrases and capitalised words. |
|
Falls back to the original text if we strip too much. |
|
""" |
|
quoted = re.findall(r'"([^"]+)"', q) |
|
caps = re.findall(r'\b([A-Z0-9][\w-]{2,})', q) |
|
short = " ".join(quoted + caps) |
|
return short or q |
|
|
|
|
|
|
|
|
|
|
|
def retry_hf_inference(func): |
|
"""Decorator to retry HF Inference API calls with backoff.""" |
|
def wrapper(*args, **kwargs): |
|
max_retries = 2 |
|
base_delay = 7 |
|
|
|
for attempt in range(max_retries + 1): |
|
try: |
|
return func(*args, **kwargs) |
|
except Exception as e: |
|
if attempt == max_retries: |
|
raise |
|
delay = base_delay * (attempt + 1) |
|
print(f"HF API error: {str(e)}. Retrying in {delay}s...") |
|
time.sleep(delay) |
|
return wrapper |
|
|
|
@retry_hf_inference |
|
def image_qa_bytes(data: bytes, prompt: str) -> str: |
|
"""Query LLaVA for image-based QA using bytes.""" |
|
headers = {"Content-Type": "application/octet-stream"} |
|
return client.post("llava-hf/llava-v1.6-mistral-7b-hf", data=data, headers=headers) |
|
|
|
@retry_hf_inference |
|
def video_label_bytes(data: bytes) -> str: |
|
"""Get video classification using VideoMAE-Base from bytes.""" |
|
|
|
|
|
|
|
video_bytes = BytesIO(data) |
|
cap = cv2.VideoCapture() |
|
cap.open(video_bytes) |
|
|
|
|
|
fps = cap.get(cv2.CAP_PROP_FPS) |
|
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
|
|
|
|
target_frames = 16 |
|
target_duration = 8 |
|
frame_interval = max(1, int(frame_count / (fps * target_duration))) |
|
|
|
frames = [] |
|
frame_idx = 0 |
|
|
|
while len(frames) < target_frames and frame_idx < frame_count: |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
|
|
if frame_idx % frame_interval == 0: |
|
|
|
frame = cv2.resize(frame, (224, 224)) |
|
frames.append(frame) |
|
|
|
frame_idx += 1 |
|
|
|
cap.release() |
|
|
|
|
|
while len(frames) < target_frames: |
|
frames.append(frames[-1]) |
|
|
|
|
|
video_array = np.stack(frames) |
|
_, buffer = cv2.imencode('.mp4', video_array) |
|
processed_bytes = buffer.tobytes() |
|
|
|
|
|
headers = {"Content-Type": "application/octet-stream"} |
|
preds = client.post( |
|
"MCG-NJU/videomae-base-finetuned-ucf101", |
|
data=processed_bytes, |
|
headers=headers |
|
) |
|
return sorted(preds, key=lambda x: x["score"], reverse=True)[0]["label"] |
|
|
|
def sheet_answer_bytes(data: bytes) -> str: |
|
"""Process spreadsheet data from bytes and return numeric answer.""" |
|
try: |
|
df = pd.read_excel(io.BytesIO(data)) |
|
except ValueError: |
|
df = pd.read_csv(io.BytesIO(data)) |
|
|
|
if {"Category", "Sales"}.issubset(df.columns): |
|
total = df[df["Category"] == "Food"]["Sales"].sum() |
|
return f"{total:.2f}" |
|
return "sheet_answer_placeholder" |
|
|
|
def run_python(code: str) -> str: |
|
"""Quick & dirty evaluator for Python code.""" |
|
with tempfile.NamedTemporaryFile("w+", suffix=".py", delete=False) as f: |
|
f.write(textwrap.dedent(code)) |
|
f.flush() |
|
out = subprocess.check_output([sys.executable, f.name], timeout=10) |
|
return out.decode().strip() |
|
|
|
def discover_attachment(task_id: str, api_url: str) -> Optional[str]: |
|
"""Probe if a task has an attachment, return URL if it exists.""" |
|
probe = f"{api_url}/files/{task_id}" |
|
try: |
|
r = SESSION.get(probe, stream=True, timeout=10, allow_redirects=True) |
|
if 200 <= r.status_code < 400: |
|
return probe |
|
except requests.RequestException: |
|
pass |
|
return None |
|
|
|
|
|
|
|
|
|
|
|
class AgentState(TypedDict): |
|
question: str |
|
answer: str |
|
current_step: str |
|
next_step: str |
|
file_url: str |
|
history: List[Dict[str, str]] |
|
|
|
|
|
|
|
|
|
|
|
class BasicAgent: |
|
"""A very small agent that can handle text questions and a few file types.""" |
|
|
|
JSON_INSTRUCTION = "Return ONLY this exact JSON object: {\"ANSWER\": \"<answer text>\"}" |
|
|
|
def __init__(self, api_url: str = DEFAULT_API_URL): |
|
if not OPENAI_API_KEY: |
|
raise EnvironmentError("OPENAI_API_KEY not set") |
|
self.llm = OpenAI(api_key=OPENAI_API_KEY) |
|
self.api_url = api_url |
|
self.workflow = self._build_workflow() |
|
|
|
def _call_llm(self, prompt: str, max_tokens: int = 256) -> str: |
|
try: |
|
resp = self.llm.chat.completions.create( |
|
model="gpt-4.1", |
|
messages=[ |
|
{"role": "user", "content": prompt}, |
|
], |
|
temperature=0, |
|
top_p=0.1, |
|
max_tokens=max_tokens, |
|
) |
|
return resp.choices[0].message.content.strip() |
|
except Exception as e: |
|
print(f"\nLLM Error: {str(e)}") |
|
raise |
|
|
|
def _safe_parse(self, raw: str) -> str: |
|
"""Pull ANSWER from the JSON string, tolerant to model chatter.""" |
|
try: |
|
return json.loads(raw)["ANSWER"] |
|
except Exception: |
|
|
|
match = re.search(r'\{.*?\}', raw, re.S) |
|
if match: |
|
try: |
|
return json.loads(match.group())["ANSWER"] |
|
except Exception: |
|
pass |
|
|
|
return raw.split(':', 1)[-1].strip() |
|
|
|
def __call__(self, question: str, task_id: str = "unknown", file_url: str = "") -> str: |
|
|
|
if not file_url: |
|
file_url = discover_attachment(task_id, self.api_url) or "" |
|
|
|
|
|
state: AgentState = { |
|
"question": question, |
|
"answer": "", |
|
"current_step": "route", |
|
"next_step": "", |
|
"file_url": file_url, |
|
"history": [] |
|
} |
|
|
|
print(f"\nProcessing task {task_id}") |
|
print(f"Question: {state['question']}") |
|
print(f"File URL: {state['file_url']}") |
|
|
|
try: |
|
|
|
final_state = self.workflow.invoke({"question": question}) |
|
|
|
|
|
if "answer" not in final_state: |
|
raise ValueError(f"☠ No 'answer' key in state; keys = {list(final_state.keys())}") |
|
|
|
return final_state["answer"] |
|
|
|
except Exception as e: |
|
print(f"Error in workflow execution: {str(e)}") |
|
return f"Error processing question: {str(e)}" |
|
|
|
def _route_to_tool(self, state: AgentState) -> Dict[str, Any]: |
|
"""Route the state to the appropriate tool based on file type.""" |
|
if not state["file_url"]: |
|
print("No file URL, routing to text processing") |
|
return {"next_step": "process_text"} |
|
|
|
try: |
|
response = SESSION.get(state["file_url"], timeout=30) |
|
response.raise_for_status() |
|
data = response.content |
|
|
|
|
|
kind = response.headers.get("Content-Type", "") |
|
if kind in ("application/octet-stream", ""): |
|
|
|
sig = data[:4] |
|
if sig.startswith(b"\x89PNG"): |
|
kind = "image/png" |
|
elif sig.startswith(b"\xFF\xD8"): |
|
kind = "image/jpeg" |
|
elif sig[:2] == b"PK": |
|
kind = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" |
|
elif not kind: |
|
kind = mimetypes.guess_type(state["file_url"])[0] or "" |
|
|
|
print(f"Detected file type: {kind}") |
|
|
|
if "image" in kind: |
|
return {"next_step": "process_image"} |
|
elif "video" in kind: |
|
return {"next_step": "process_video"} |
|
elif "spreadsheet" in kind or "excel" in kind: |
|
return {"next_step": "process_spreadsheet"} |
|
elif state["file_url"].endswith(".py"): |
|
return {"next_step": "process_python"} |
|
else: |
|
print(f"Unsupported file type: {kind}") |
|
return {"next_step": "process_text"} |
|
|
|
except Exception as e: |
|
print(f"Error determining file type: {str(e)}") |
|
return {"next_step": "process_text"} |
|
|
|
def _process_image(self, state: AgentState) -> Dict[str, Any]: |
|
"""Process image files using LLaVA.""" |
|
try: |
|
print(f"Downloading {state['file_url']} …") |
|
response = SESSION.get(state["file_url"], timeout=30) |
|
response.raise_for_status() |
|
data = response.content |
|
print(f"Successfully downloaded file, size: {len(data)} bytes") |
|
|
|
print("Processing as image...") |
|
answer = image_qa_bytes(data, state["question"]) |
|
|
|
print(f"Generated answer: {answer}") |
|
return { |
|
"answer": answer, |
|
"next_step": END |
|
} |
|
except Exception as e: |
|
print(f"\nError processing image {state['file_url']}: {str(e)}") |
|
return { |
|
"answer": f"Error processing image: {str(e)}", |
|
"next_step": END |
|
} |
|
|
|
def _process_video(self, state: AgentState) -> Dict[str, Any]: |
|
"""Process video files using VideoMAE.""" |
|
try: |
|
print(f"Downloading {state['file_url']} …") |
|
response = SESSION.get(state["file_url"], timeout=30) |
|
response.raise_for_status() |
|
data = response.content |
|
print(f"Successfully downloaded file, size: {len(data)} bytes") |
|
|
|
print("Processing as video...") |
|
answer = video_label_bytes(data) |
|
|
|
print(f"Generated answer: {answer}") |
|
return { |
|
"answer": answer, |
|
"next_step": END |
|
} |
|
except Exception as e: |
|
print(f"\nError processing video {state['file_url']}: {str(e)}") |
|
return { |
|
"answer": f"Error processing video: {str(e)}", |
|
"next_step": END |
|
} |
|
|
|
def _process_spreadsheet(self, state: AgentState) -> Dict[str, Any]: |
|
"""Process spreadsheet files.""" |
|
try: |
|
print(f"Downloading {state['file_url']} …") |
|
response = SESSION.get(state["file_url"], timeout=30) |
|
response.raise_for_status() |
|
data = response.content |
|
print(f"Successfully downloaded file, size: {len(data)} bytes") |
|
|
|
print("Processing as spreadsheet...") |
|
answer = sheet_answer_bytes(data) |
|
|
|
print(f"Generated answer: {answer}") |
|
return { |
|
"answer": answer, |
|
"next_step": END |
|
} |
|
except Exception as e: |
|
print(f"\nError processing spreadsheet {state['file_url']}: {str(e)}") |
|
return { |
|
"answer": f"Error processing spreadsheet: {str(e)}", |
|
"next_step": END |
|
} |
|
|
|
def _process_python(self, state: AgentState) -> Dict[str, Any]: |
|
"""Process Python files.""" |
|
try: |
|
print(f"Downloading {state['file_url']} …") |
|
response = SESSION.get(state["file_url"], timeout=30) |
|
response.raise_for_status() |
|
data = response.content |
|
print(f"Successfully downloaded file, size: {len(data)} bytes") |
|
|
|
print("Processing as Python file...") |
|
answer = run_python(data.decode()) |
|
|
|
print(f"Generated answer: {answer}") |
|
return { |
|
"answer": answer, |
|
"next_step": END |
|
} |
|
except Exception as e: |
|
print(f"\nError processing Python file {state['file_url']}: {str(e)}") |
|
return { |
|
"answer": f"Error processing Python file: {str(e)}", |
|
"next_step": END |
|
} |
|
|
|
def _process_text(self, state: AgentState) -> Dict[str, Any]: |
|
"""Process text-only questions using LLM.""" |
|
print("\nProcessing as text-only question...") |
|
prompt = f""" |
|
Answer this question using the materials provided. |
|
|
|
QUESTION: |
|
{state['question']} |
|
|
|
{self.JSON_INSTRUCTION} |
|
""" |
|
try: |
|
raw = self._call_llm(prompt, 300) |
|
answer = self._safe_parse(raw) |
|
print(f"Generated answer: {answer}") |
|
return { |
|
"answer": answer, |
|
"next_step": END |
|
} |
|
except Exception as e: |
|
print(f"\nLLM Error in answer generation: {str(e)}") |
|
return { |
|
"answer": "I encountered an error while generating the answer.", |
|
"next_step": END |
|
} |
|
|
|
def _build_workflow(self) -> Graph: |
|
"""Build the workflow graph with conditional edges.""" |
|
sg = StateGraph(state_schema=AgentState) |
|
|
|
|
|
sg.add_node("route", self._route_to_tool) |
|
sg.add_node("process_image", self._process_image) |
|
sg.add_node("process_video", self._process_video) |
|
sg.add_node("process_spreadsheet", self._process_spreadsheet) |
|
sg.add_node("process_python", self._process_python) |
|
sg.add_node("process_text", self._process_text) |
|
|
|
|
|
sg.set_entry_point("route") |
|
|
|
|
|
sg.add_conditional_edges( |
|
"route", |
|
{ |
|
"process_image": lambda x: x["next_step"] == "process_image", |
|
"process_video": lambda x: x["next_step"] == "process_video", |
|
"process_spreadsheet": lambda x: x["next_step"] == "process_spreadsheet", |
|
"process_python": lambda x: x["next_step"] == "process_python", |
|
"process_text": lambda x: x["next_step"] == "process_text" |
|
} |
|
) |
|
|
|
|
|
for node in ["process_image", "process_video", "process_spreadsheet", "process_python", "process_text"]: |
|
sg.add_edge(node, END) |
|
|
|
return sg.compile() |
|
|
|
|
|
|
|
|
|
|
|
def run_and_submit_all(profile: gr.OAuthProfile | None): |
|
""" |
|
Fetches all questions, runs the BasicAgent on them, submits all answers, |
|
and displays the results. |
|
""" |
|
|
|
space_id = os.getenv("SPACE_ID") |
|
print("Space ID: ", space_id) |
|
if profile: |
|
username = f"{profile.username}" |
|
print(f"User logged in: {username}") |
|
else: |
|
print("User not logged in.") |
|
return "Please Login to Hugging Face with the button.", None |
|
|
|
api_url = DEFAULT_API_URL |
|
questions_url = f"{api_url}/questions" |
|
submit_url = f"{api_url}/submit" |
|
|
|
|
|
sess = requests.Session() |
|
|
|
|
|
try: |
|
print("Initializing agent...") |
|
agent = BasicAgent(api_url=api_url) |
|
print("Agent initialized successfully.") |
|
except Exception as e: |
|
print(f"Error instantiating agent: {e}") |
|
return f"Error initializing agent: {e}", None |
|
|
|
|
|
print(f"Fetching questions from: {questions_url}") |
|
try: |
|
response = sess.get(questions_url, timeout=30) |
|
response.raise_for_status() |
|
questions_data = response.json() |
|
if not questions_data: |
|
print("Fetched questions list is empty.") |
|
return "Fetched questions list is empty or invalid format.", None |
|
print(f"Fetched {len(questions_data)} questions.") |
|
except Exception as e: |
|
print(f"Error fetching questions: {e}") |
|
return f"Error fetching questions: {e}", None |
|
|
|
|
|
results_log = [] |
|
answers_payload = [] |
|
|
|
for item in questions_data: |
|
task_id = item.get("task_id") |
|
if not task_id: |
|
print("Skipping item without task_id") |
|
continue |
|
|
|
try: |
|
print(f"\nProcessing question {task_id}...") |
|
|
|
|
|
raw_url = item.get("file_url") or "" |
|
if not raw_url: |
|
raw_url = discover_attachment(task_id, api_url) or "" |
|
file_url = raw_url |
|
|
|
|
|
question = item.get("question", "") |
|
if not question: |
|
print(f"Skipping task {task_id} - no question text") |
|
continue |
|
|
|
print(f"Question: {question}") |
|
print(f"File URL: {file_url}") |
|
|
|
|
|
answer = agent( |
|
question=question, |
|
task_id=task_id, |
|
file_url=file_url |
|
) |
|
|
|
if not answer: |
|
print(f"Warning: Empty answer for task {task_id}") |
|
answer = "No answer generated" |
|
|
|
|
|
answers_payload.append({ |
|
"task_id": task_id, |
|
"submitted_answer": answer |
|
}) |
|
results_log.append({ |
|
"Task ID": task_id, |
|
"Question": question, |
|
"Submitted Answer": answer |
|
}) |
|
|
|
print(f"Successfully processed task {task_id}") |
|
|
|
except Exception as e: |
|
print(f"Error processing task {task_id}: {e}") |
|
results_log.append({ |
|
"Task ID": task_id, |
|
"Question": item.get("question", ""), |
|
"Submitted Answer": f"ERROR: {e}" |
|
}) |
|
|
|
if not answers_payload: |
|
print("No answers were generated.") |
|
return "No answers were generated. Please check the logs for details.", pd.DataFrame(results_log) |
|
|
|
|
|
print(f"\nSubmitting {len(answers_payload)} answers...") |
|
submission_data = { |
|
"username": username.strip(), |
|
"agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main", |
|
"answers": answers_payload |
|
} |
|
|
|
try: |
|
print(f"Submitting to: {submit_url}") |
|
print(f"Submission data: {json.dumps(submission_data, indent=2)}") |
|
|
|
response = sess.post(submit_url, json=submission_data, timeout=60) |
|
response.raise_for_status() |
|
result_data = response.json() |
|
|
|
final_status = ( |
|
f"Submission Successful!\n" |
|
f"User: {result_data.get('username')}\n" |
|
f"Overall Score: {result_data.get('score', 'N/A')}% " |
|
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" |
|
f"Message: {result_data.get('message', 'No message received.')}" |
|
) |
|
print(final_status) |
|
return final_status, pd.DataFrame(results_log) |
|
except Exception as e: |
|
error_msg = f"Submission Failed: {str(e)}" |
|
print(error_msg) |
|
return error_msg, pd.DataFrame(results_log) |
|
|
|
def attachment_url(task_id: str, api_url: str, sess: requests.Session) -> str | None: |
|
"""Probe if a task has an attachment, return URL if it exists.""" |
|
probe = f"{api_url}/files/{task_id}" |
|
try: |
|
r = sess.head(probe, timeout=10) |
|
if r.status_code == 200: |
|
return probe |
|
except requests.RequestException: |
|
pass |
|
return None |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Basic Agent Evaluation Runner") |
|
gr.Markdown( |
|
""" |
|
**Instructions:** |
|
|
|
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ... |
|
2. Log in to your Hugging Face account using the button below. This uses your HF username for submission. |
|
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score. |
|
|
|
--- |
|
**Disclaimers:** |
|
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions). |
|
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async. |
|
""" |
|
) |
|
|
|
gr.LoginButton() |
|
|
|
run_button = gr.Button("Run Evaluation & Submit All Answers") |
|
|
|
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) |
|
results_table = gr.DataFrame( |
|
label="Questions and Agent Answers", |
|
wrap=True, |
|
column_widths=["10%", "30%", "30%", "30%"] |
|
) |
|
|
|
run_button.click( |
|
fn=run_and_submit_all, |
|
outputs=[status_output, results_table] |
|
) |
|
|
|
if __name__ == "__main__": |
|
print("\n" + "-"*30 + " App Starting " + "-"*30) |
|
|
|
space_host_startup = os.getenv("SPACE_HOST") |
|
space_id_startup = os.getenv("SPACE_ID") |
|
|
|
if space_host_startup: |
|
print(f"✅ SPACE_HOST found: {space_host_startup}") |
|
print(f" Runtime URL should be: https://{space_host_startup}.hf.space") |
|
else: |
|
print("ℹ️ SPACE_HOST environment variable not found (running locally?).") |
|
|
|
if space_id_startup: |
|
print(f"✅ SPACE_ID found: {space_id_startup}") |
|
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}") |
|
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") |
|
else: |
|
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.") |
|
|
|
print("-"*(60 + len(" App Starting ")) + "\n") |
|
|
|
print("Launching Gradio Interface for Basic Agent Evaluation...") |
|
demo.launch(debug=True, share=False) |
|
|