Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import json
|
|
6 |
import time
|
7 |
import pandas as pd
|
8 |
from datetime import datetime
|
9 |
-
from typing import List, Dict, Any, Annotated
|
10 |
from langgraph.graph import Graph, StateGraph
|
11 |
from typing_extensions import TypedDict
|
12 |
from openai import OpenAI
|
@@ -35,6 +35,9 @@ HF_TOKEN = os.getenv("HF_TOKEN")
|
|
35 |
# Initialize HF client
|
36 |
client = InferenceClient(token=HF_TOKEN)
|
37 |
|
|
|
|
|
|
|
38 |
# -------------------------
|
39 |
# Constants
|
40 |
# -------------------------
|
@@ -143,20 +146,17 @@ def video_label_bytes(data: bytes) -> str:
|
|
143 |
)
|
144 |
return sorted(preds, key=lambda x: x["score"], reverse=True)[0]["label"]
|
145 |
|
146 |
-
def sheet_answer_bytes(data: bytes
|
147 |
"""Process spreadsheet data from bytes and return numeric answer."""
|
148 |
-
|
149 |
-
df = pd.read_csv(io.BytesIO(data))
|
150 |
-
else:
|
151 |
df = pd.read_excel(io.BytesIO(data))
|
|
|
|
|
152 |
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
# -------------------------
|
158 |
-
# Code Analysis helpers
|
159 |
-
# -------------------------
|
160 |
|
161 |
def run_python(code: str) -> str:
|
162 |
"""Quick & dirty evaluator for Python code."""
|
@@ -166,6 +166,17 @@ def run_python(code: str) -> str:
|
|
166 |
out = subprocess.check_output([sys.executable, f.name], timeout=10)
|
167 |
return out.decode().strip()
|
168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
# -------------------------
|
170 |
# State definition
|
171 |
# -------------------------
|
@@ -187,12 +198,16 @@ class AgentState(TypedDict):
|
|
187 |
# -------------------------
|
188 |
|
189 |
class BasicAgent:
|
190 |
-
|
|
|
|
|
|
|
|
|
191 |
if not OPENAI_API_KEY:
|
192 |
raise EnvironmentError("OPENAI_API_KEY not set")
|
193 |
self.llm = OpenAI(api_key=OPENAI_API_KEY)
|
|
|
194 |
self.workflow = self._build_workflow()
|
195 |
-
self.session = session
|
196 |
|
197 |
def _call_llm(self, prompt: str, max_tokens: int = 256) -> str:
|
198 |
try:
|
@@ -211,20 +226,25 @@ class BasicAgent:
|
|
211 |
raise
|
212 |
|
213 |
def _safe_parse(self, raw: str) -> str:
|
|
|
214 |
try:
|
215 |
return json.loads(raw)["ANSWER"]
|
216 |
except Exception:
|
217 |
-
#
|
218 |
match = re.search(r'\{.*?\}', raw, re.S)
|
219 |
if match:
|
220 |
try:
|
221 |
return json.loads(match.group())["ANSWER"]
|
222 |
except Exception:
|
223 |
pass
|
224 |
-
#
|
225 |
return raw.split(':', 1)[-1].strip()
|
226 |
|
227 |
def __call__(self, question: str, task_id: str = "unknown", file_url: str = "") -> str:
|
|
|
|
|
|
|
|
|
228 |
state: AgentState = {
|
229 |
"question": question,
|
230 |
"current_step": "answer",
|
@@ -249,7 +269,7 @@ class BasicAgent:
|
|
249 |
if state["file_url"]:
|
250 |
try:
|
251 |
print(f"Downloading {state['file_url']} …")
|
252 |
-
response =
|
253 |
response.raise_for_status()
|
254 |
data = response.content
|
255 |
print(f"Successfully downloaded file, size: {len(data)} bytes")
|
@@ -277,7 +297,7 @@ class BasicAgent:
|
|
277 |
answer = video_label_bytes(data)
|
278 |
elif "spreadsheet" in kind or "excel" in kind:
|
279 |
print("Processing as spreadsheet...")
|
280 |
-
answer = sheet_answer_bytes(data
|
281 |
elif state["file_url"].endswith(".py"):
|
282 |
print("Processing as Python file...")
|
283 |
answer = run_python(data.decode())
|
@@ -308,8 +328,7 @@ Answer this question using the materials provided.
|
|
308 |
QUESTION:
|
309 |
{state['question']}
|
310 |
|
311 |
-
|
312 |
-
{{"ANSWER": "<answer text>"}}
|
313 |
"""
|
314 |
try:
|
315 |
raw = self._call_llm(prompt, 300)
|
|
|
6 |
import time
|
7 |
import pandas as pd
|
8 |
from datetime import datetime
|
9 |
+
from typing import List, Dict, Any, Annotated, Optional
|
10 |
from langgraph.graph import Graph, StateGraph
|
11 |
from typing_extensions import TypedDict
|
12 |
from openai import OpenAI
|
|
|
35 |
# Initialize HF client
|
36 |
client = InferenceClient(token=HF_TOKEN)
|
37 |
|
38 |
+
# Create a single Session for all requests
|
39 |
+
SESSION = requests.Session()
|
40 |
+
|
41 |
# -------------------------
|
42 |
# Constants
|
43 |
# -------------------------
|
|
|
146 |
)
|
147 |
return sorted(preds, key=lambda x: x["score"], reverse=True)[0]["label"]
|
148 |
|
149 |
+
def sheet_answer_bytes(data: bytes) -> str:
|
150 |
"""Process spreadsheet data from bytes and return numeric answer."""
|
151 |
+
try:
|
|
|
|
|
152 |
df = pd.read_excel(io.BytesIO(data))
|
153 |
+
except ValueError:
|
154 |
+
df = pd.read_csv(io.BytesIO(data))
|
155 |
|
156 |
+
if {"Category", "Sales"}.issubset(df.columns):
|
157 |
+
total = df[df["Category"] == "Food"]["Sales"].sum()
|
158 |
+
return f"{total:.2f}"
|
159 |
+
return "sheet_answer_placeholder"
|
|
|
|
|
|
|
160 |
|
161 |
def run_python(code: str) -> str:
|
162 |
"""Quick & dirty evaluator for Python code."""
|
|
|
166 |
out = subprocess.check_output([sys.executable, f.name], timeout=10)
|
167 |
return out.decode().strip()
|
168 |
|
169 |
+
def discover_attachment(task_id: str, api_url: str) -> Optional[str]:
|
170 |
+
"""Probe if a task has an attachment, return URL if it exists."""
|
171 |
+
probe = f"{api_url}/files/{task_id}"
|
172 |
+
try:
|
173 |
+
r = SESSION.get(probe, stream=True, timeout=10, allow_redirects=True)
|
174 |
+
if 200 <= r.status_code < 400:
|
175 |
+
return probe
|
176 |
+
except requests.RequestException:
|
177 |
+
pass
|
178 |
+
return None
|
179 |
+
|
180 |
# -------------------------
|
181 |
# State definition
|
182 |
# -------------------------
|
|
|
198 |
# -------------------------
|
199 |
|
200 |
class BasicAgent:
|
201 |
+
"""A very small agent that can handle text questions and a few file types."""
|
202 |
+
|
203 |
+
JSON_INSTRUCTION = "Return ONLY this exact JSON object: {\"ANSWER\": \"<answer text>\"}"
|
204 |
+
|
205 |
+
def __init__(self, api_url: str = DEFAULT_API_URL):
|
206 |
if not OPENAI_API_KEY:
|
207 |
raise EnvironmentError("OPENAI_API_KEY not set")
|
208 |
self.llm = OpenAI(api_key=OPENAI_API_KEY)
|
209 |
+
self.api_url = api_url
|
210 |
self.workflow = self._build_workflow()
|
|
|
211 |
|
212 |
def _call_llm(self, prompt: str, max_tokens: int = 256) -> str:
|
213 |
try:
|
|
|
226 |
raise
|
227 |
|
228 |
def _safe_parse(self, raw: str) -> str:
|
229 |
+
"""Pull ANSWER from the JSON string, tolerant to model chatter."""
|
230 |
try:
|
231 |
return json.loads(raw)["ANSWER"]
|
232 |
except Exception:
|
233 |
+
# Try to find any JSON object in the text
|
234 |
match = re.search(r'\{.*?\}', raw, re.S)
|
235 |
if match:
|
236 |
try:
|
237 |
return json.loads(match.group())["ANSWER"]
|
238 |
except Exception:
|
239 |
pass
|
240 |
+
# As a last resort, take everything after the first colon
|
241 |
return raw.split(':', 1)[-1].strip()
|
242 |
|
243 |
def __call__(self, question: str, task_id: str = "unknown", file_url: str = "") -> str:
|
244 |
+
# 1) if file_url blank, attempt discovery once
|
245 |
+
if not file_url:
|
246 |
+
file_url = discover_attachment(task_id, self.api_url) or ""
|
247 |
+
|
248 |
state: AgentState = {
|
249 |
"question": question,
|
250 |
"current_step": "answer",
|
|
|
269 |
if state["file_url"]:
|
270 |
try:
|
271 |
print(f"Downloading {state['file_url']} …")
|
272 |
+
response = SESSION.get(state["file_url"], timeout=30)
|
273 |
response.raise_for_status()
|
274 |
data = response.content
|
275 |
print(f"Successfully downloaded file, size: {len(data)} bytes")
|
|
|
297 |
answer = video_label_bytes(data)
|
298 |
elif "spreadsheet" in kind or "excel" in kind:
|
299 |
print("Processing as spreadsheet...")
|
300 |
+
answer = sheet_answer_bytes(data)
|
301 |
elif state["file_url"].endswith(".py"):
|
302 |
print("Processing as Python file...")
|
303 |
answer = run_python(data.decode())
|
|
|
328 |
QUESTION:
|
329 |
{state['question']}
|
330 |
|
331 |
+
{self.JSON_INSTRUCTION}
|
|
|
332 |
"""
|
333 |
try:
|
334 |
raw = self._call_llm(prompt, 300)
|