naman1102 commited on
Commit
ee90aca
·
1 Parent(s): 793736b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -20
app.py CHANGED
@@ -6,7 +6,7 @@ import json
6
  import time
7
  import pandas as pd
8
  from datetime import datetime
9
- from typing import List, Dict, Any, Annotated
10
  from langgraph.graph import Graph, StateGraph
11
  from typing_extensions import TypedDict
12
  from openai import OpenAI
@@ -35,6 +35,9 @@ HF_TOKEN = os.getenv("HF_TOKEN")
35
  # Initialize HF client
36
  client = InferenceClient(token=HF_TOKEN)
37
 
 
 
 
38
  # -------------------------
39
  # Constants
40
  # -------------------------
@@ -143,20 +146,17 @@ def video_label_bytes(data: bytes) -> str:
143
  )
144
  return sorted(preds, key=lambda x: x["score"], reverse=True)[0]["label"]
145
 
146
- def sheet_answer_bytes(data: bytes, question: str) -> str:
147
  """Process spreadsheet data from bytes and return numeric answer."""
148
- if mimetypes.guess_type("x.xlsx")[0] == "text/csv" or question.endswith(".csv"):
149
- df = pd.read_csv(io.BytesIO(data))
150
- else:
151
  df = pd.read_excel(io.BytesIO(data))
 
 
152
 
153
- # Calculate total sales for Food category
154
- total = df[df["Category"] == "Food"]["Sales"].sum()
155
- return f"{total:.2f}"
156
-
157
- # -------------------------
158
- # Code Analysis helpers
159
- # -------------------------
160
 
161
  def run_python(code: str) -> str:
162
  """Quick & dirty evaluator for Python code."""
@@ -166,6 +166,17 @@ def run_python(code: str) -> str:
166
  out = subprocess.check_output([sys.executable, f.name], timeout=10)
167
  return out.decode().strip()
168
 
 
 
 
 
 
 
 
 
 
 
 
169
  # -------------------------
170
  # State definition
171
  # -------------------------
@@ -187,12 +198,16 @@ class AgentState(TypedDict):
187
  # -------------------------
188
 
189
  class BasicAgent:
190
- def __init__(self, session: requests.Session):
 
 
 
 
191
  if not OPENAI_API_KEY:
192
  raise EnvironmentError("OPENAI_API_KEY not set")
193
  self.llm = OpenAI(api_key=OPENAI_API_KEY)
 
194
  self.workflow = self._build_workflow()
195
- self.session = session
196
 
197
  def _call_llm(self, prompt: str, max_tokens: int = 256) -> str:
198
  try:
@@ -211,20 +226,25 @@ class BasicAgent:
211
  raise
212
 
213
  def _safe_parse(self, raw: str) -> str:
 
214
  try:
215
  return json.loads(raw)["ANSWER"]
216
  except Exception:
217
- # grab the first {...} in the text
218
  match = re.search(r'\{.*?\}', raw, re.S)
219
  if match:
220
  try:
221
  return json.loads(match.group())["ANSWER"]
222
  except Exception:
223
  pass
224
- # as a last resort, strip everything before the first colon
225
  return raw.split(':', 1)[-1].strip()
226
 
227
  def __call__(self, question: str, task_id: str = "unknown", file_url: str = "") -> str:
 
 
 
 
228
  state: AgentState = {
229
  "question": question,
230
  "current_step": "answer",
@@ -249,7 +269,7 @@ class BasicAgent:
249
  if state["file_url"]:
250
  try:
251
  print(f"Downloading {state['file_url']} …")
252
- response = self.session.get(state["file_url"], timeout=30)
253
  response.raise_for_status()
254
  data = response.content
255
  print(f"Successfully downloaded file, size: {len(data)} bytes")
@@ -277,7 +297,7 @@ class BasicAgent:
277
  answer = video_label_bytes(data)
278
  elif "spreadsheet" in kind or "excel" in kind:
279
  print("Processing as spreadsheet...")
280
- answer = sheet_answer_bytes(data, state["question"])
281
  elif state["file_url"].endswith(".py"):
282
  print("Processing as Python file...")
283
  answer = run_python(data.decode())
@@ -308,8 +328,7 @@ Answer this question using the materials provided.
308
  QUESTION:
309
  {state['question']}
310
 
311
- Return ONLY this exact JSON object:
312
- {{"ANSWER": "<answer text>"}}
313
  """
314
  try:
315
  raw = self._call_llm(prompt, 300)
 
6
  import time
7
  import pandas as pd
8
  from datetime import datetime
9
+ from typing import List, Dict, Any, Annotated, Optional
10
  from langgraph.graph import Graph, StateGraph
11
  from typing_extensions import TypedDict
12
  from openai import OpenAI
 
35
  # Initialize HF client
36
  client = InferenceClient(token=HF_TOKEN)
37
 
38
+ # Create a single Session for all requests
39
+ SESSION = requests.Session()
40
+
41
  # -------------------------
42
  # Constants
43
  # -------------------------
 
146
  )
147
  return sorted(preds, key=lambda x: x["score"], reverse=True)[0]["label"]
148
 
149
+ def sheet_answer_bytes(data: bytes) -> str:
150
  """Process spreadsheet data from bytes and return numeric answer."""
151
+ try:
 
 
152
  df = pd.read_excel(io.BytesIO(data))
153
+ except ValueError:
154
+ df = pd.read_csv(io.BytesIO(data))
155
 
156
+ if {"Category", "Sales"}.issubset(df.columns):
157
+ total = df[df["Category"] == "Food"]["Sales"].sum()
158
+ return f"{total:.2f}"
159
+ return "sheet_answer_placeholder"
 
 
 
160
 
161
  def run_python(code: str) -> str:
162
  """Quick & dirty evaluator for Python code."""
 
166
  out = subprocess.check_output([sys.executable, f.name], timeout=10)
167
  return out.decode().strip()
168
 
169
+ def discover_attachment(task_id: str, api_url: str) -> Optional[str]:
170
+ """Probe if a task has an attachment, return URL if it exists."""
171
+ probe = f"{api_url}/files/{task_id}"
172
+ try:
173
+ r = SESSION.get(probe, stream=True, timeout=10, allow_redirects=True)
174
+ if 200 <= r.status_code < 400:
175
+ return probe
176
+ except requests.RequestException:
177
+ pass
178
+ return None
179
+
180
  # -------------------------
181
  # State definition
182
  # -------------------------
 
198
  # -------------------------
199
 
200
  class BasicAgent:
201
+ """A very small agent that can handle text questions and a few file types."""
202
+
203
+ JSON_INSTRUCTION = "Return ONLY this exact JSON object: {\"ANSWER\": \"<answer text>\"}"
204
+
205
+ def __init__(self, api_url: str = DEFAULT_API_URL):
206
  if not OPENAI_API_KEY:
207
  raise EnvironmentError("OPENAI_API_KEY not set")
208
  self.llm = OpenAI(api_key=OPENAI_API_KEY)
209
+ self.api_url = api_url
210
  self.workflow = self._build_workflow()
 
211
 
212
  def _call_llm(self, prompt: str, max_tokens: int = 256) -> str:
213
  try:
 
226
  raise
227
 
228
  def _safe_parse(self, raw: str) -> str:
229
+ """Pull ANSWER from the JSON string, tolerant to model chatter."""
230
  try:
231
  return json.loads(raw)["ANSWER"]
232
  except Exception:
233
+ # Try to find any JSON object in the text
234
  match = re.search(r'\{.*?\}', raw, re.S)
235
  if match:
236
  try:
237
  return json.loads(match.group())["ANSWER"]
238
  except Exception:
239
  pass
240
+ # As a last resort, take everything after the first colon
241
  return raw.split(':', 1)[-1].strip()
242
 
243
  def __call__(self, question: str, task_id: str = "unknown", file_url: str = "") -> str:
244
+ # 1) if file_url blank, attempt discovery once
245
+ if not file_url:
246
+ file_url = discover_attachment(task_id, self.api_url) or ""
247
+
248
  state: AgentState = {
249
  "question": question,
250
  "current_step": "answer",
 
269
  if state["file_url"]:
270
  try:
271
  print(f"Downloading {state['file_url']} …")
272
+ response = SESSION.get(state["file_url"], timeout=30)
273
  response.raise_for_status()
274
  data = response.content
275
  print(f"Successfully downloaded file, size: {len(data)} bytes")
 
297
  answer = video_label_bytes(data)
298
  elif "spreadsheet" in kind or "excel" in kind:
299
  print("Processing as spreadsheet...")
300
+ answer = sheet_answer_bytes(data)
301
  elif state["file_url"].endswith(".py"):
302
  print("Processing as Python file...")
303
  answer = run_python(data.decode())
 
328
  QUESTION:
329
  {state['question']}
330
 
331
+ {self.JSON_INSTRUCTION}
 
332
  """
333
  try:
334
  raw = self._call_llm(prompt, 300)