Update app.py
Browse files
app.py
CHANGED
@@ -1,335 +1,195 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
-
import inspect
|
5 |
-
import pandas as pd
|
6 |
import ast
|
7 |
-
import operator
|
8 |
-
import time
|
9 |
import json
|
|
|
|
|
10 |
from datetime import datetime
|
11 |
-
from typing import List, Dict, Any,
|
12 |
from langgraph.graph import Graph, StateGraph
|
13 |
-
from langgraph.prebuilt import ToolNode
|
14 |
-
from tools import simple_search, jina_search_tool
|
15 |
-
from openai import OpenAI
|
16 |
from typing_extensions import TypedDict
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
def override(_, new):
|
|
|
19 |
|
20 |
-
def merge_dicts(
|
21 |
-
"""Merge two dictionaries, with values
|
22 |
-
return {**
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
print("trial")
|
25 |
-
# (Keep Constants as is)
|
26 |
-
# --- Constants ---
|
27 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
28 |
-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
29 |
-
JINA_API_KEY = os.getenv("JINA_API_KEY")
|
30 |
|
31 |
-
# Create logs directory if it doesn't exist
|
32 |
LOGS_DIR = "question_logs"
|
33 |
os.makedirs(LOGS_DIR, exist_ok=True)
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def log_to_file(task_id: str, question: str, log_data: Dict[str, Any]):
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
"timestamp": timestamp,
|
45 |
-
"logs": log_data
|
46 |
-
}
|
47 |
-
|
48 |
-
print(f"\n=== Saving Logs ===")
|
49 |
-
print(f"Task ID: {task_id}")
|
50 |
-
print(f"Question: {question}")
|
51 |
-
print(f"Log Data: {json.dumps(log_data, indent=2)}")
|
52 |
-
|
53 |
-
with open(filename, 'w', encoding='utf-8') as f:
|
54 |
-
json.dump(log_entry, f, indent=2, ensure_ascii=False)
|
55 |
-
|
56 |
-
print(f"Logs saved to {filename}")
|
57 |
-
except Exception as e:
|
58 |
-
print(f"Error saving logs: {e}")
|
59 |
|
60 |
class AgentState(TypedDict):
|
61 |
question: Annotated[str, override]
|
62 |
current_step: Annotated[str, override]
|
63 |
-
tool_output: Annotated[str, override]
|
64 |
final_answer: Annotated[str, override]
|
65 |
-
history: Annotated[List[Dict[str, str]],
|
66 |
-
|
67 |
search_query: Annotated[str, override]
|
68 |
task_id: Annotated[str, override]
|
69 |
-
logs: Annotated[Dict[str, Any], merge_dicts]
|
|
|
|
|
|
|
|
|
70 |
|
71 |
class BasicAgent:
|
72 |
def __init__(self):
|
73 |
-
print("Initializing BasicAgent with OpenAI...")
|
74 |
if not OPENAI_API_KEY:
|
75 |
-
raise
|
76 |
-
|
77 |
-
|
78 |
self.llm = OpenAI(api_key=OPENAI_API_KEY)
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
model="gpt-4.1-nano",
|
92 |
-
messages=[
|
93 |
-
{"role": "system", "content": "You are a helpful AI assistant that provides clear and concise answers."},
|
94 |
-
{"role": "user", "content": prompt}
|
95 |
-
],
|
96 |
-
max_tokens=200,
|
97 |
-
temperature=0.7,
|
98 |
-
top_p=0.95,
|
99 |
-
frequency_penalty=0.1
|
100 |
-
)
|
101 |
-
print("=== Received response ===")
|
102 |
-
response_text = response.choices[0].message.content
|
103 |
-
print(response_text)
|
104 |
-
return response_text
|
105 |
-
except Exception as e:
|
106 |
-
print(f"Error calling LLM API: {e}")
|
107 |
-
return f"Error getting response from LLM: {str(e)}"
|
108 |
-
|
109 |
-
def _create_workflow(self) -> Graph:
|
110 |
-
"""Create the agent workflow using LangGraph."""
|
111 |
-
# Create the workflow with state schema
|
112 |
-
print("Creating Stategraph : error happens here?")
|
113 |
-
workflow = StateGraph(state_schema=AgentState)
|
114 |
-
print("Stategraph created")
|
115 |
-
# Add nodes
|
116 |
-
workflow.add_node("analyze", self._analyze_question)
|
117 |
-
workflow.add_node("search", self._use_search)
|
118 |
-
workflow.add_node("generate_answer", self._generate_final_answer)
|
119 |
-
|
120 |
-
# Define edges
|
121 |
-
workflow.add_edge("analyze", "search")
|
122 |
-
workflow.add_edge("analyze", "generate_answer")
|
123 |
-
workflow.add_edge("search", "generate_answer")
|
124 |
-
|
125 |
-
# Define conditional edges
|
126 |
-
def router(state: AgentState) -> str:
|
127 |
-
if state["current_step"] == 'search':
|
128 |
-
return 'search'
|
129 |
-
elif state["current_step"] == 'final_answer':
|
130 |
-
return 'generate_answer'
|
131 |
-
return 'analyze'
|
132 |
-
|
133 |
-
workflow.add_conditional_edges(
|
134 |
-
"analyze",
|
135 |
-
router,
|
136 |
-
{
|
137 |
-
"search": "search",
|
138 |
-
"final_answer": "generate_answer"
|
139 |
-
}
|
140 |
)
|
141 |
-
|
142 |
-
# Set entry and exit points
|
143 |
-
workflow.set_entry_point("analyze")
|
144 |
-
workflow.set_finish_point("generate_answer")
|
145 |
-
|
146 |
-
return workflow.compile()
|
147 |
|
|
|
148 |
def _analyze_question(self, state: AgentState) -> AgentState:
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
{
|
154 |
-
|
155 |
-
|
156 |
-
}}"""
|
157 |
-
|
158 |
try:
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
state["logs"] = {
|
166 |
-
"analyze": {
|
167 |
-
"prompt": prompt,
|
168 |
-
"response": llm_response,
|
169 |
-
"timestamp": datetime.now().isoformat()
|
170 |
-
}
|
171 |
-
}
|
172 |
-
|
173 |
-
analysis = ast.literal_eval(llm_response)
|
174 |
-
state["needs_more_info"] = analysis.get('needs_search', False)
|
175 |
-
state["search_query"] = analysis.get('search_query', '')
|
176 |
-
|
177 |
-
if analysis.get('needs_search', False):
|
178 |
-
state["current_step"] = 'search'
|
179 |
-
else:
|
180 |
-
state["current_step"] = 'final_answer'
|
181 |
-
except (ValueError, SyntaxError) as e:
|
182 |
-
print(f"Error parsing LLM response: {e}")
|
183 |
-
# Default to search if we can't parse the response
|
184 |
-
state["needs_more_info"] = True
|
185 |
state["search_query"] = state["question"]
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
"timestamp": datetime.now().isoformat()
|
193 |
-
}
|
194 |
-
}
|
195 |
-
|
196 |
return state
|
197 |
|
198 |
-
def
|
199 |
-
"""Use the search tool."""
|
200 |
-
time.sleep(2) # Sleep before search
|
201 |
-
|
202 |
try:
|
203 |
-
|
204 |
-
print(f"Search Query: {state['search_query']}")
|
205 |
-
|
206 |
-
# Try Jina search first, fall back to simple search if it fails
|
207 |
-
search_results = []
|
208 |
-
if JINA_API_KEY:
|
209 |
-
try:
|
210 |
-
search_results = jina_search_tool(
|
211 |
-
query=state["search_query"],
|
212 |
-
api_key=JINA_API_KEY
|
213 |
-
)
|
214 |
-
print("Using Jina search results")
|
215 |
-
except Exception as e:
|
216 |
-
print(f"Jina search failed: {e}, falling back to simple search")
|
217 |
-
search_results = simple_search(
|
218 |
-
query=state["search_query"],
|
219 |
-
max_results=3
|
220 |
-
)
|
221 |
-
else:
|
222 |
-
print("No Jina API key found, using simple search")
|
223 |
-
search_results = simple_search(
|
224 |
-
query=state["search_query"],
|
225 |
-
max_results=3
|
226 |
-
)
|
227 |
-
|
228 |
-
print("Search Results:")
|
229 |
-
for i, result in enumerate(search_results, 1):
|
230 |
-
print(f"{i}. {result}")
|
231 |
-
|
232 |
-
# Log the search step
|
233 |
-
state["logs"]["search"] = {
|
234 |
-
"query": state["search_query"],
|
235 |
-
"results": search_results,
|
236 |
-
"timestamp": datetime.now().isoformat(),
|
237 |
-
"search_type": "jina" if JINA_API_KEY and search_results else "simple"
|
238 |
-
}
|
239 |
-
|
240 |
-
state["history"].append({
|
241 |
-
'step': 'search',
|
242 |
-
'query': state["search_query"],
|
243 |
-
'results': search_results
|
244 |
-
})
|
245 |
-
state["needs_more_info"] = False
|
246 |
-
state["current_step"] = 'final_answer'
|
247 |
except Exception as e:
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
})
|
253 |
-
state["current_step"] = 'final_answer'
|
254 |
-
|
255 |
-
# Log the error
|
256 |
-
state["logs"]["search_error"] = {
|
257 |
-
"error": str(e),
|
258 |
-
"timestamp": datetime.now().isoformat()
|
259 |
-
}
|
260 |
return state
|
261 |
|
262 |
-
def
|
263 |
-
""
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
print("\n=== Generate Final Answer ===")
|
275 |
-
print(f"Question: {state['question']}")
|
276 |
-
print("History:")
|
277 |
-
print(history_str)
|
278 |
-
|
279 |
-
llm_response = self._call_llm_api(prompt)
|
280 |
-
print("\nFinal Answer:")
|
281 |
-
print(llm_response)
|
282 |
-
|
283 |
-
# Log the final answer generation
|
284 |
-
state["logs"]["final_answer"] = {
|
285 |
-
"prompt": prompt,
|
286 |
-
"response": llm_response,
|
287 |
-
"history": history_str,
|
288 |
-
"timestamp": datetime.now().isoformat()
|
289 |
-
}
|
290 |
-
|
291 |
-
state["final_answer"] = llm_response
|
292 |
return state
|
293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
294 |
def __call__(self, question: str, task_id: str = "unknown") -> str:
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
final_state = self.workflow.invoke(initial_state)
|
314 |
-
|
315 |
-
# Ensure logs directory exists
|
316 |
-
os.makedirs(LOGS_DIR, exist_ok=True)
|
317 |
-
|
318 |
-
# Save logs to file
|
319 |
-
if final_state["logs"]: # Only save if we have logs
|
320 |
-
log_to_file(
|
321 |
-
task_id=final_state["task_id"],
|
322 |
-
question=final_state["question"],
|
323 |
-
log_data=final_state["logs"]
|
324 |
-
)
|
325 |
-
else:
|
326 |
-
print("No logs to save in final state")
|
327 |
-
|
328 |
-
return final_state["final_answer"]
|
329 |
-
|
330 |
-
except Exception as e:
|
331 |
-
print(f"Error in agent processing: {e}")
|
332 |
-
return f"I encountered an error while processing your question: {str(e)}"
|
333 |
|
334 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
335 |
"""
|
@@ -337,7 +197,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
337 |
and displays the results.
|
338 |
"""
|
339 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
340 |
-
space_id = os.getenv("SPACE_ID")
|
341 |
print("Space ID: ", space_id)
|
342 |
if profile:
|
343 |
username = f"{profile.username}"
|
@@ -352,9 +212,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
352 |
|
353 |
# 1. Instantiate Agent
|
354 |
try:
|
355 |
-
print("Initializing agent
|
356 |
agent = BasicAgent()
|
357 |
-
print("Agent initialized successfully
|
358 |
except Exception as e:
|
359 |
print(f"Error instantiating agent: {e}")
|
360 |
return f"Error initializing agent: {e}", None
|
@@ -397,50 +257,25 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
397 |
continue
|
398 |
|
399 |
try:
|
400 |
-
# Initialize the state for this question
|
401 |
-
initial_state = {
|
402 |
-
"question": question_text,
|
403 |
-
"current_step": "analyze",
|
404 |
-
"tool_output": "",
|
405 |
-
"final_answer": "",
|
406 |
-
"history": [],
|
407 |
-
"needs_more_info": False,
|
408 |
-
"search_query": "",
|
409 |
-
"task_id": task_id,
|
410 |
-
"logs": {}
|
411 |
-
}
|
412 |
-
|
413 |
-
# Run the workflow for this question
|
414 |
print(f"\nProcessing question {task_id}: {question_text[:50]}...")
|
415 |
-
|
416 |
-
|
417 |
-
# Log the workflow history
|
418 |
-
workflow_history = "\n".join([
|
419 |
-
f"Step: {h['step']}\n" +
|
420 |
-
f"Input: {h.get('input', h.get('query', ''))}\n" +
|
421 |
-
f"Output: {h.get('output', h.get('results', h.get('error', '')))}"
|
422 |
-
for h in final_state["history"]
|
423 |
-
])
|
424 |
|
425 |
# Add to results
|
426 |
-
|
427 |
-
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
428 |
results_log.append({
|
429 |
"Task ID": task_id,
|
430 |
"Question": question_text,
|
431 |
-
"Submitted Answer":
|
432 |
-
"Workflow History": workflow_history
|
433 |
})
|
434 |
|
435 |
-
print(f"Completed question {task_id}
|
436 |
|
437 |
except Exception as e:
|
438 |
-
print(f"Error running agent
|
439 |
results_log.append({
|
440 |
"Task ID": task_id,
|
441 |
"Question": question_text,
|
442 |
-
"Submitted Answer": f"
|
443 |
-
"Workflow History": "Error occurred before workflow completion"
|
444 |
})
|
445 |
|
446 |
if not answers_payload:
|
@@ -523,7 +358,6 @@ with gr.Blocks() as demo:
|
|
523 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
524 |
|
525 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
526 |
-
# Removed max_rows=10 from DataFrame constructor
|
527 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
528 |
|
529 |
run_button.click(
|
@@ -535,7 +369,7 @@ if __name__ == "__main__":
|
|
535 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
536 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
537 |
space_host_startup = os.getenv("SPACE_HOST")
|
538 |
-
space_id_startup = os.getenv("SPACE_ID")
|
539 |
|
540 |
if space_host_startup:
|
541 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
@@ -543,7 +377,7 @@ if __name__ == "__main__":
|
|
543 |
else:
|
544 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
545 |
|
546 |
-
if space_id_startup:
|
547 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
548 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
549 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
@@ -553,4 +387,4 @@ if __name__ == "__main__":
|
|
553 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
554 |
|
555 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
556 |
-
demo.launch(debug=True, share=False)
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
|
|
|
|
4 |
import ast
|
|
|
|
|
5 |
import json
|
6 |
+
import time
|
7 |
+
import pandas as pd
|
8 |
from datetime import datetime
|
9 |
+
from typing import List, Dict, Any, Annotated
|
10 |
from langgraph.graph import Graph, StateGraph
|
|
|
|
|
|
|
11 |
from typing_extensions import TypedDict
|
12 |
+
from openai import OpenAI
|
13 |
+
|
14 |
+
# -------------------------
|
15 |
+
# Utility helpers
|
16 |
+
# -------------------------
|
17 |
|
18 |
+
def override(_, new):
|
19 |
+
return new
|
20 |
|
21 |
+
def merge_dicts(old: Dict, new: Dict) -> Dict:
|
22 |
+
"""Merge two dictionaries, with *new* values taking precedence."""
|
23 |
+
return {**old, **new}
|
24 |
+
|
25 |
+
# -------------------------
|
26 |
+
# Environment & constants
|
27 |
+
# -------------------------
|
28 |
|
|
|
|
|
|
|
29 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
30 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
31 |
+
JINA_API_KEY = os.getenv("JINA_API_KEY")
|
32 |
|
|
|
33 |
LOGS_DIR = "question_logs"
|
34 |
os.makedirs(LOGS_DIR, exist_ok=True)
|
35 |
|
36 |
+
# -------------------------
|
37 |
+
# Jina AI search tool (replaces DDG + Reader)
|
38 |
+
# -------------------------
|
39 |
+
|
40 |
+
def jina_search_tool(query: str, api_key: str, max_results: int = 5) -> List[str]:
|
41 |
+
"""Return *max_results* clean markdown snippets for *query* using s.jina.ai."""
|
42 |
+
endpoint = f"https://s.jina.ai/{query.replace(' ', '+')}"
|
43 |
+
headers = {
|
44 |
+
"Authorization": f"Bearer {api_key}",
|
45 |
+
"Accept": "application/json",
|
46 |
+
"User-Agent": "Mozilla/5.0",
|
47 |
+
}
|
48 |
+
resp = requests.get(endpoint, headers=headers, timeout=15)
|
49 |
+
if resp.status_code != 200:
|
50 |
+
raise RuntimeError(f"Jina search failed with status {resp.status_code}: {resp.text[:200]}")
|
51 |
+
data = resp.json()
|
52 |
+
return [item.get("content", "") for item in data.get("results", [])][:max_results]
|
53 |
+
|
54 |
+
# -------------------------
|
55 |
+
# Logging helper
|
56 |
+
# -------------------------
|
57 |
+
|
58 |
def log_to_file(task_id: str, question: str, log_data: Dict[str, Any]):
|
59 |
+
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
60 |
+
filename = os.path.join(LOGS_DIR, f"question_{task_id}_{ts}.json")
|
61 |
+
with open(filename, "w", encoding="utf-8") as f:
|
62 |
+
json.dump({"task_id": task_id, "question": question, "timestamp": ts, "logs": log_data}, f, indent=2)
|
63 |
+
|
64 |
+
# -------------------------
|
65 |
+
# State definition
|
66 |
+
# -------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
class AgentState(TypedDict):
|
69 |
question: Annotated[str, override]
|
70 |
current_step: Annotated[str, override]
|
|
|
71 |
final_answer: Annotated[str, override]
|
72 |
+
history: Annotated[List[Dict[str, str]], list.__add__]
|
73 |
+
needs_search: Annotated[bool, override]
|
74 |
search_query: Annotated[str, override]
|
75 |
task_id: Annotated[str, override]
|
76 |
+
logs: Annotated[Dict[str, Any], merge_dicts]
|
77 |
+
|
78 |
+
# -------------------------
|
79 |
+
# BasicAgent implementation
|
80 |
+
# -------------------------
|
81 |
|
82 |
class BasicAgent:
|
83 |
def __init__(self):
|
|
|
84 |
if not OPENAI_API_KEY:
|
85 |
+
raise EnvironmentError("OPENAI_API_KEY not set")
|
86 |
+
if not JINA_API_KEY:
|
87 |
+
raise EnvironmentError("JINA_API_KEY not set")
|
88 |
self.llm = OpenAI(api_key=OPENAI_API_KEY)
|
89 |
+
self.workflow = self._build_workflow()
|
90 |
+
|
91 |
+
# ---- Low‑level LLM call
|
92 |
+
def _call_llm(self, prompt: str, max_tokens: int = 256) -> str:
|
93 |
+
resp = self.llm.chat.completions.create(
|
94 |
+
model="gpt-4o-mini",
|
95 |
+
messages=[
|
96 |
+
{"role": "system", "content": "You are a careful reasoning assistant."},
|
97 |
+
{"role": "user", "content": prompt},
|
98 |
+
],
|
99 |
+
temperature=0.3,
|
100 |
+
max_tokens=max_tokens,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
)
|
102 |
+
return resp.choices[0].message.content.strip()
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
+
# ---- Workflow nodes
|
105 |
def _analyze_question(self, state: AgentState) -> AgentState:
|
106 |
+
prompt = (
|
107 |
+
"You will receive a user question. Think step‑by‑step to decide whether external web search is required. "
|
108 |
+
"Respond ONLY with a valid Python dict literal in the following format and NOTHING else:\n"
|
109 |
+
"{\n 'needs_search': bool,\n 'search_query': str\n} \n\n"
|
110 |
+
f"Question: {state['question']}"
|
111 |
+
)
|
112 |
+
raw = self._call_llm(prompt)
|
|
|
|
|
113 |
try:
|
114 |
+
decision = ast.literal_eval(raw)
|
115 |
+
state["needs_search"] = bool(decision.get("needs_search", False))
|
116 |
+
state["search_query"] = decision.get("search_query", state["question"])
|
117 |
+
except Exception:
|
118 |
+
# fallback: assume search needed
|
119 |
+
state["needs_search"] = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
state["search_query"] = state["question"]
|
121 |
+
decision = {"parse_error": raw}
|
122 |
+
state["logs"] = {
|
123 |
+
"analyze": {"prompt": prompt, "llm_response": raw, "decision": decision}
|
124 |
+
}
|
125 |
+
state["current_step"] = "search" if state["needs_search"] else "answer"
|
126 |
+
state["history"].append({"step": "analyze", "output": decision})
|
|
|
|
|
|
|
|
|
127 |
return state
|
128 |
|
129 |
+
def _perform_search(self, state: AgentState) -> AgentState:
|
|
|
|
|
|
|
130 |
try:
|
131 |
+
results = jina_search_tool(state["search_query"], JINA_API_KEY)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
except Exception as e:
|
133 |
+
results = [f"SEARCH_ERROR: {e}"]
|
134 |
+
state["history"].append({"step": "search", "results": results})
|
135 |
+
state["logs"]["search"] = {"query": state["search_query"], "results": results}
|
136 |
+
state["current_step"] = "answer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
return state
|
138 |
|
139 |
+
def _generate_answer(self, state: AgentState) -> AgentState:
|
140 |
+
history_text = "\n".join(str(item) for item in state["history"])
|
141 |
+
prompt = (
|
142 |
+
f"Answer the user question as directly as possible. If sources were retrieved, incorporate them.\n"
|
143 |
+
f"Question: {state['question']}\n\nContext:\n{history_text}\n\n"
|
144 |
+
"Give ONLY the final answer without extra formatting or explanation."
|
145 |
+
)
|
146 |
+
answer = self._call_llm(prompt, max_tokens=150)
|
147 |
+
state["final_answer"] = answer
|
148 |
+
state["history"].append({"step": "answer", "output": answer})
|
149 |
+
state["logs"]["final_answer"] = {"prompt": prompt, "response": answer}
|
150 |
+
state["current_step"] = "done"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
return state
|
152 |
|
153 |
+
# ---- Build LangGraph workflow
|
154 |
+
def _build_workflow(self) -> Graph:
|
155 |
+
sg = StateGraph(state_schema=AgentState)
|
156 |
+
sg.add_node("analyze", self._analyze_question)
|
157 |
+
sg.add_node("search", self._perform_search)
|
158 |
+
sg.add_node("answer", self._generate_answer)
|
159 |
+
|
160 |
+
# transitions
|
161 |
+
sg.add_edge("analyze", "search")
|
162 |
+
sg.add_edge("analyze", "answer")
|
163 |
+
sg.add_edge("search", "answer")
|
164 |
+
|
165 |
+
def router(state: AgentState):
|
166 |
+
return state["current_step"]
|
167 |
+
|
168 |
+
sg.add_conditional_edges("analyze", router, {"search": "search", "answer": "answer"})
|
169 |
+
sg.set_entry_point("analyze")
|
170 |
+
sg.set_finish_point("answer")
|
171 |
+
return sg.compile()
|
172 |
+
|
173 |
+
# ---- Public call
|
174 |
def __call__(self, question: str, task_id: str = "unknown") -> str:
|
175 |
+
state: AgentState = {
|
176 |
+
"question": question,
|
177 |
+
"current_step": "analyze",
|
178 |
+
"final_answer": "",
|
179 |
+
"history": [],
|
180 |
+
"needs_search": False,
|
181 |
+
"search_query": "",
|
182 |
+
"task_id": task_id,
|
183 |
+
"logs": {},
|
184 |
+
}
|
185 |
+
final_state = self.workflow.invoke(state)
|
186 |
+
if final_state["logs"]:
|
187 |
+
log_to_file(task_id, question, final_state["logs"])
|
188 |
+
return final_state["final_answer"]
|
189 |
+
|
190 |
+
# ----------------------------------------------------------------------------------
|
191 |
+
# Gradio Interface & Submission Routines
|
192 |
+
# ----------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
195 |
"""
|
|
|
197 |
and displays the results.
|
198 |
"""
|
199 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
200 |
+
space_id = os.getenv("SPACE_ID")
|
201 |
print("Space ID: ", space_id)
|
202 |
if profile:
|
203 |
username = f"{profile.username}"
|
|
|
212 |
|
213 |
# 1. Instantiate Agent
|
214 |
try:
|
215 |
+
print("Initializing agent...")
|
216 |
agent = BasicAgent()
|
217 |
+
print("Agent initialized successfully.")
|
218 |
except Exception as e:
|
219 |
print(f"Error instantiating agent: {e}")
|
220 |
return f"Error initializing agent: {e}", None
|
|
|
257 |
continue
|
258 |
|
259 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
print(f"\nProcessing question {task_id}: {question_text[:50]}...")
|
261 |
+
answer = agent(question_text, task_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
|
263 |
# Add to results
|
264 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
|
|
|
265 |
results_log.append({
|
266 |
"Task ID": task_id,
|
267 |
"Question": question_text,
|
268 |
+
"Submitted Answer": answer
|
|
|
269 |
})
|
270 |
|
271 |
+
print(f"Completed question {task_id}")
|
272 |
|
273 |
except Exception as e:
|
274 |
+
print(f"Error running agent on task {task_id}: {e}")
|
275 |
results_log.append({
|
276 |
"Task ID": task_id,
|
277 |
"Question": question_text,
|
278 |
+
"Submitted Answer": f"ERROR: {e}"
|
|
|
279 |
})
|
280 |
|
281 |
if not answers_payload:
|
|
|
358 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
359 |
|
360 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
|
|
361 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
362 |
|
363 |
run_button.click(
|
|
|
369 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
370 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
371 |
space_host_startup = os.getenv("SPACE_HOST")
|
372 |
+
space_id_startup = os.getenv("SPACE_ID")
|
373 |
|
374 |
if space_host_startup:
|
375 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
|
377 |
else:
|
378 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
379 |
|
380 |
+
if space_id_startup:
|
381 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
382 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
383 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
|
387 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
388 |
|
389 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
390 |
+
demo.launch(debug=True, share=False)
|