Abbasid commited on
Commit
2da7120
·
verified ·
1 Parent(s): 1376719

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -52
app.py CHANGED
@@ -1,9 +1,7 @@
1
- # app.py
2
  """
 
3
  This script provides the Gradio web interface to run the evaluation.
4
- ## MODIFICATION: This version is simplified to work with the new agent architecture.
5
- It no longer performs file-type detection or prompt enhancement, as that responsibility
6
- has been moved into the agent's 'multimodal_router'.
7
  """
8
 
9
  import os
@@ -11,16 +9,14 @@ import re
11
  import gradio as gr
12
  import requests
13
  import pandas as pd
14
- # --- Import HumanMessage ---
15
- from langchain_core.messages import HumanMessage
16
 
17
  from agent import create_agent_executor
18
 
19
  # --- Constants ---
20
- # Ensure the URL is correctly formatted (remove trailing spaces)
21
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
- # --- Helper function to parse the agent's output (remains the same) ---
24
  def parse_final_answer(agent_response: str) -> str:
25
  match = re.search(r"FINAL ANSWER:\s*(.*)", agent_response, re.IGNORECASE | re.DOTALL)
26
  if match: return match.group(1).strip()
@@ -28,12 +24,74 @@ def parse_final_answer(agent_response: str) -> str:
28
  if lines: return lines[-1].strip()
29
  return "Could not parse a final answer."
30
 
31
- ## MODIFICATION: The `detect_file_type` function has been removed.
32
- ## It is now redundant as this logic is handled inside the agent.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- ## MODIFICATION: The `create_enhanced_prompt` function has been removed.
35
- ## It was causing errors by trying to instruct the agent to use tools that no longer exist.
36
- ## The agent is now responsible for handling the raw input itself.
 
 
 
 
 
 
 
 
37
 
38
  def run_and_submit_all(profile: gr.OAuthProfile | None):
39
  """
@@ -46,17 +104,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
46
  username = profile.username
47
  print(f"User logged in: {username}")
48
 
49
- # --- Fix SPACE_ID retrieval and URL construction ---
50
- # Ensure SPACE_ID environment variable is set correctly in your Hugging Face Space.
51
  space_id = os.getenv("SPACE_ID")
52
- if not space_id:
53
- # Fallback or error handling if SPACE_ID is not set
54
- # You might need to adjust this based on how your space is configured
55
- # For example, if running locally, you might not have SPACE_ID.
56
- # This is a placeholder; adjust as needed.
57
- # Consider using a default or making it configurable.
58
- space_id = "your-username/your-space-name" # Example placeholder
59
- print(f"Warning: SPACE_ID environment variable not found. Using placeholder: {space_id}")
60
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
61
  questions_url = f"{DEFAULT_API_URL}/questions"
62
  submit_url = f"{DEFAULT_API_URL}/submit"
@@ -64,7 +112,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
64
  # 1. Instantiate Agent
65
  print("Initializing your custom agent...")
66
  try:
67
- agent_executor = create_agent_executor(provider="groq")
68
  except Exception as e:
69
  return f"Fatal Error: Could not initialize agent. Check logs. Details: {e}", None
70
 
@@ -90,24 +138,21 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
90
 
91
  print(f"\n--- Running Task {i+1}/{len(questions_data)} (ID: {task_id}) ---")
92
 
 
93
  file_url = item.get("file_url")
94
 
95
- ## MODIFICATION: Prompt creation is now much simpler.
96
- # We just combine the question and the URL into one string.
97
- # The agent's multimodal_router will handle the rest.
98
  if file_url:
99
- full_question_text = f"{question_text}\n\nHere is the relevant file: {file_url}"
100
- print(f"File provided: {file_url}")
101
- else:
102
- full_question_text = question_text
103
 
104
- print(f"Raw Prompt for Agent:\n{full_question_text}")
105
 
106
  try:
107
- # --- FIX: Pass a list of HumanMessage objects ---
108
- # The agent expects MessagesState["messages"] to be a list of BaseMessage objects.
109
- input_state = {"messages": [HumanMessage(content=full_question_text)]}
110
- result = agent_executor.invoke(input_state)
111
 
112
  raw_answer = result['messages'][-1].content
113
  submitted_answer = parse_final_answer(raw_answer)
@@ -120,6 +165,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
120
  "Task ID": task_id,
121
  "Question": question_text,
122
  "File URL": file_url or "None",
 
123
  "Submitted Answer": submitted_answer
124
  })
125
 
@@ -131,13 +177,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
131
  "Task ID": task_id,
132
  "Question": question_text,
133
  "File URL": file_url or "None",
 
134
  "Submitted Answer": error_msg
135
  })
136
 
137
  if not answers_payload:
138
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
139
 
140
- # 4. Prepare and 5. Submit (remains the same)
141
  submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
142
  print(f"\nSubmitting {len(answers_payload)} answers for user '{username}'...")
143
  try:
@@ -153,7 +200,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
153
  print(status_message)
154
  return status_message, pd.DataFrame(results_log)
155
 
156
- # --- Gradio UI (remains largely the same) ---
157
  with gr.Blocks(title="Multimodal Agent Evaluation") as demo:
158
  gr.Markdown("# Multimodal Agent Evaluation Runner")
159
  gr.Markdown("This agent can process images, YouTube videos, audio files, and perform web searches.")
@@ -165,22 +212,11 @@ with gr.Blocks(title="Multimodal Agent Evaluation") as demo:
165
  label="Questions and Agent Answers",
166
  wrap=True,
167
  row_count=10,
168
- # MODIFICATION: Removed the 'File Type' column as it's no longer detected here.
169
- # Adjust column widths if necessary based on actual content/columns
170
- # column_widths=[80, 250, 200, 250]
171
  )
172
 
173
- # We also remove "File Type" from the results_log being displayed
174
- # (Though it's not in the log anymore, this is a safe check)
175
- def display_wrapper(profile):
176
- status, df = run_and_submit_all(profile)
177
- # Ensure df is a DataFrame before attempting operations
178
- if isinstance(df, pd.DataFrame) and "File Type" in df.columns:
179
- df = df.drop(columns=["File Type"])
180
- return status, df
181
-
182
- run_button.click(fn=display_wrapper, outputs=[status_output, results_table])
183
 
184
  if __name__ == "__main__":
185
  print("\n" + "-"*30 + " Multimodal App Starting " + "-"*30)
186
- demo.launch()
 
 
1
  """
2
+ app.py
3
  This script provides the Gradio web interface to run the evaluation.
4
+ This version properly handles multimodal inputs including images, videos, and audio.
 
 
5
  """
6
 
7
  import os
 
9
  import gradio as gr
10
  import requests
11
  import pandas as pd
12
+ from urllib.parse import urlparse
 
13
 
14
  from agent import create_agent_executor
15
 
16
  # --- Constants ---
 
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
+ # --- Helper function to parse the agent's output ---
20
  def parse_final_answer(agent_response: str) -> str:
21
  match = re.search(r"FINAL ANSWER:\s*(.*)", agent_response, re.IGNORECASE | re.DOTALL)
22
  if match: return match.group(1).strip()
 
24
  if lines: return lines[-1].strip()
25
  return "Could not parse a final answer."
26
 
27
+ def detect_file_type(url: str) -> str:
28
+ """Detect the type of file from URL."""
29
+ if not url:
30
+ return "unknown"
31
+
32
+ url_lower = url.lower()
33
+
34
+ # Image extensions
35
+ if any(ext in url_lower for ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']):
36
+ return "image"
37
+
38
+ # Video extensions and YouTube
39
+ if any(domain in url_lower for domain in ['youtube.com', 'youtu.be', 'vimeo.com']):
40
+ return "youtube"
41
+ if any(ext in url_lower for ext in ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm']):
42
+ return "video"
43
+
44
+ # Audio extensions
45
+ if any(ext in url_lower for ext in ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a']):
46
+ return "audio"
47
+
48
+ # Try to detect from headers if possible
49
+ try:
50
+ response = requests.head(url, timeout=5)
51
+ content_type = response.headers.get('content-type', '').lower()
52
+
53
+ if 'image' in content_type:
54
+ return "image"
55
+ elif 'audio' in content_type:
56
+ return "audio"
57
+ elif 'video' in content_type:
58
+ return "video"
59
+ except:
60
+ pass
61
+
62
+ return "unknown"
63
+
64
+ def create_enhanced_prompt(question_text: str, file_url: str = None) -> str:
65
+ """Create an enhanced prompt that guides the agent to use appropriate tools."""
66
+
67
+ if not file_url:
68
+ return question_text
69
+
70
+ file_type = detect_file_type(file_url)
71
+
72
+ if file_type == "image":
73
+ return f"""{question_text}
74
+
75
+ [IMAGE ATTACHMENT]: {file_url}
76
+ INSTRUCTION: There is an image attached to this question. You MUST use the 'describe_image' tool to analyze this image before answering the question."""
77
+
78
+ elif file_type == "youtube":
79
+ return f"""{question_text}
80
+
81
+ [YOUTUBE VIDEO]: {file_url}
82
+ INSTRUCTION: There is a YouTube video attached to this question. You MUST use the 'process_youtube_video' tool to analyze this video before answering the question."""
83
 
84
+ elif file_type == "audio":
85
+ return f"""{question_text}
86
+
87
+ [AUDIO FILE]: {file_url}
88
+ INSTRUCTION: There is an audio file attached to this question. You MUST use the 'process_audio_file' tool to analyze this audio before answering the question."""
89
+
90
+ else:
91
+ return f"""{question_text}
92
+
93
+ [ATTACHMENT]: {file_url}
94
+ INSTRUCTION: There is a file attachment. Analyze the URL and use the appropriate tool to process this content before answering the question."""
95
 
96
  def run_and_submit_all(profile: gr.OAuthProfile | None):
97
  """
 
104
  username = profile.username
105
  print(f"User logged in: {username}")
106
 
 
 
107
  space_id = os.getenv("SPACE_ID")
 
 
 
 
 
 
 
 
108
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
109
  questions_url = f"{DEFAULT_API_URL}/questions"
110
  submit_url = f"{DEFAULT_API_URL}/submit"
 
112
  # 1. Instantiate Agent
113
  print("Initializing your custom agent...")
114
  try:
115
+ agent_executor = create_agent_executor(provider="google") # Using Google for better multimodal support
116
  except Exception as e:
117
  return f"Fatal Error: Could not initialize agent. Check logs. Details: {e}", None
118
 
 
138
 
139
  print(f"\n--- Running Task {i+1}/{len(questions_data)} (ID: {task_id}) ---")
140
 
141
+ # Get file URL if it exists
142
  file_url = item.get("file_url")
143
 
144
+ # Create enhanced prompt that instructs the agent to use appropriate tools
145
+ full_question_text = create_enhanced_prompt(question_text, file_url)
146
+
147
  if file_url:
148
+ file_type = detect_file_type(file_url)
149
+ print(f"File detected: {file_url} (Type: {file_type})")
 
 
150
 
151
+ print(f"Enhanced Prompt for Agent:\n{full_question_text}")
152
 
153
  try:
154
+ # Pass the enhanced question to the agent
155
+ result = agent_executor.invoke({"messages": [("user", full_question_text)]})
 
 
156
 
157
  raw_answer = result['messages'][-1].content
158
  submitted_answer = parse_final_answer(raw_answer)
 
165
  "Task ID": task_id,
166
  "Question": question_text,
167
  "File URL": file_url or "None",
168
+ "File Type": detect_file_type(file_url) if file_url else "None",
169
  "Submitted Answer": submitted_answer
170
  })
171
 
 
177
  "Task ID": task_id,
178
  "Question": question_text,
179
  "File URL": file_url or "None",
180
+ "File Type": detect_file_type(file_url) if file_url else "None",
181
  "Submitted Answer": error_msg
182
  })
183
 
184
  if not answers_payload:
185
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
186
 
187
+ # 4. Prepare and 5. Submit
188
  submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
189
  print(f"\nSubmitting {len(answers_payload)} answers for user '{username}'...")
190
  try:
 
200
  print(status_message)
201
  return status_message, pd.DataFrame(results_log)
202
 
203
+ # --- Gradio UI ---
204
  with gr.Blocks(title="Multimodal Agent Evaluation") as demo:
205
  gr.Markdown("# Multimodal Agent Evaluation Runner")
206
  gr.Markdown("This agent can process images, YouTube videos, audio files, and perform web searches.")
 
212
  label="Questions and Agent Answers",
213
  wrap=True,
214
  row_count=10,
215
+ column_widths=[80, 200, 150, 80, 200]
 
 
216
  )
217
 
218
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
 
 
 
 
 
 
219
 
220
  if __name__ == "__main__":
221
  print("\n" + "-"*30 + " Multimodal App Starting " + "-"*30)
222
+ demo.launch()