Final_Assignment_Template

Running

App Files Files Community

naman1102 commited on 12 days ago

Commit

cc1c674

1 Parent(s): 0f38f21

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -38

app.py CHANGED Viewed

@@ -12,6 +12,21 @@ from typing_extensions import TypedDict
 from openai import OpenAI
 from tools import simple_search
 import re
 # -------------------------
 # Utility helpers
@@ -35,13 +50,36 @@ def tighten(q: str) -> str:
     return short or q
 # -------------------------
-# Environment & constants
 # -------------------------
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-# Remove logs directory creation since we're not storing logs anymore
 # -------------------------
 # State definition
@@ -83,27 +121,70 @@ class BasicAgent:
     # ---- Workflow nodes
     def _analyze_question(self, state: AgentState) -> AgentState:
-        prompt = (
-            "You will receive a user question. Think step‑by‑step to decide whether external web search is required. "
-            "Respond ONLY with a valid Python dict literal in the following format and NOTHING else:\n"
-            "{\n  'needs_search': bool,\n  'search_query': str\n} \n\n"
-            f"Question: {state['question']}"
-        )
-        raw = self._call_llm(prompt)
         try:
-            decision = ast.literal_eval(raw)
-            state["needs_search"] = bool(decision.get("needs_search", False))
-            state["search_query"] = decision.get("search_query", state["question"])
-        except Exception:
-            # fallback: assume search needed
-            state["needs_search"] = True
-            state["search_query"] = state["question"]
-            decision = {"parse_error": raw}
-        state["logs"] = {
-            "analyze": {"prompt": prompt, "llm_response": raw, "decision": decision}
-        }
-        state["current_step"] = "search" if state["needs_search"] else "answer"
-        state["history"].append({"step": "analyze", "output": decision})
         return state
     def _perform_search(self, state: AgentState) -> AgentState:
@@ -147,26 +228,28 @@ class BasicAgent:
             search_block = "Error retrieving search results."
         prompt = f"""
-You are an expert fact-extractor. Using ONLY the text below, answer the question.
-Question:
 {state['question']}
-Search snippets (bold terms are highlighted):
 {search_block}
-Think step-by-step. Quote exact numbers/names if needed.
-END EACH STEP with ➤.  After reasoning, output:
-ANSWER: <the short answer here>
-No other text.
 """
         raw = self._call_llm(prompt, 300)
-        answer = raw.splitlines()[-1].replace("ANSWER:", "").strip()
         state["final_answer"] = answer
-        state["history"].append({"step": "answer", "output": raw})  # Store full response for debugging
         state["logs"]["final_answer"] = {"prompt": prompt, "response": raw}
         state["current_step"] = "done"
         return state
@@ -174,21 +257,39 @@ No other text.
     # ---- Build LangGraph workflow
     def _build_workflow(self) -> Graph:
         sg = StateGraph(state_schema=AgentState)
         sg.add_node("analyze", self._analyze_question)
         sg.add_node("search", self._perform_search)
         sg.add_node("recheck", self._re_evaluate)
         sg.add_node("answer", self._generate_answer)
-        # transitions
         sg.add_edge("analyze", "search")
         sg.add_edge("analyze", "answer")
         sg.add_edge("search", "recheck")
         def router(state: AgentState):
             return state["current_step"]
-        sg.add_conditional_edges("analyze", router, {"search": "search", "answer": "answer"})
-        sg.add_conditional_edges("recheck", router, {"search": "search", "answer": "answer"})
         sg.set_entry_point("analyze")
         sg.set_finish_point("answer")
         return sg.compile()

 from openai import OpenAI
 from tools import simple_search
 import re
+from huggingface_hub import InferenceClient
+import io
+import mimetypes
+import base64
+# -------------------------
+# Environment & constants
+# -------------------------
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+HF_TOKEN = os.getenv("HF_TOKEN")
+# Initialize HF client
+client = InferenceClient(token=HF_TOKEN)
 # -------------------------
 # Utility helpers
     return short or q
 # -------------------------
+# Multimodal helpers
 # -------------------------
+def image_qa(image_path: str, prompt: str) -> str:
+    """Query LLaVA model for image-based QA."""
+    with open(image_path, "rb") as f:
+        data = {"prompt": prompt, "image": f.read()}
+    return client.post("llava-hf/llava-v1.6-mistral-7b-hf", data=data)
+def video_label(video_path: str, topk: int = 1) -> str:
+    """Get video classification using VideoMAE."""
+    with open(video_path, "rb") as f:
+        preds = client.post(
+            "MCG-NJU/videomae-base-finetuned-ucf101", data=f.read()
+        )
+    preds = sorted(preds, key=lambda x: x["score"], reverse=True)[:topk]
+    return preds[0]["label"]
+def sheet_answer(data: bytes, question: str) -> str:
+    """Process spreadsheet data and answer questions."""
+    if mimetypes.guess_type("x.xlsx")[0] == "text/csv" or question.endswith(".csv"):
+        df = pd.read_csv(io.BytesIO(data))
+    else:
+        df = pd.read_excel(io.BytesIO(data))
+    numeric_cols = df.select_dtypes("number")
+    col = numeric_cols.max().idxmax()
+    row = numeric_cols[col].idxmax()
+    value = df.loc[row, col]
+    label = df.columns[col]
+    return f"{label}: {value}"
 # -------------------------
 # State definition
     # ---- Workflow nodes
     def _analyze_question(self, state: AgentState) -> AgentState:
+        # Check for multimodal content
+        q = state["question"].lower()
+        if "video" in q or q.endswith(".mp4"):
+            state["current_step"] = "video"
+        elif q.endswith((".jpg", ".png", ".jpeg")):
+            state["current_step"] = "image"
+        elif q.endswith((".xlsx", ".csv")):
+            state["current_step"] = "sheet"
+        else:
+            # Regular text question analysis
+            prompt = (
+                "You will receive a user question. Think step‑by‑step to decide whether external web search is required. "
+                "Respond ONLY with a valid Python dict literal in the following format and NOTHING else:\n"
+                "{\n  'needs_search': bool,\n  'search_query': str\n} \n\n"
+                f"Question: {state['question']}"
+            )
+            raw = self._call_llm(prompt)
+            try:
+                decision = ast.literal_eval(raw)
+                state["needs_search"] = bool(decision.get("needs_search", False))
+                state["search_query"] = decision.get("search_query", state["question"])
+            except Exception:
+                state["needs_search"] = True
+                state["search_query"] = state["question"]
+                decision = {"parse_error": raw}
+            state["logs"] = {
+                "analyze": {"prompt": prompt, "llm_response": raw, "decision": decision}
+            }
+            state["current_step"] = "search" if state["needs_search"] else "answer"
+            state["history"].append({"step": "analyze", "output": decision})
+        return state
+    def _image_node(self, state: AgentState) -> AgentState:
+        """Handle image-based questions."""
         try:
+            answer = image_qa(state["question"], "What is shown in this image?")
+            state["history"].append({"step": "image", "output": answer})
+            state["current_step"] = "answer"
+        except Exception as e:
+            state["logs"]["image_error"] = str(e)
+            state["current_step"] = "answer"
+        return state
+    def _video_node(self, state: AgentState) -> AgentState:
+        """Handle video-based questions."""
+        try:
+            label = video_label(state["question"])
+            state["history"].append({"step": "video", "output": label})
+            state["current_step"] = "answer"
+        except Exception as e:
+            state["logs"]["video_error"] = str(e)
+            state["current_step"] = "answer"
+        return state
+    def _sheet_node(self, state: AgentState) -> AgentState:
+        """Handle spreadsheet-based questions."""
+        try:
+            with open(state["question"], "rb") as f:
+                answer = sheet_answer(f.read(), state["question"])
+            state["history"].append({"step": "sheet", "output": answer})
+            state["current_step"] = "answer"
+        except Exception as e:
+            state["logs"]["sheet_error"] = str(e)
+            state["current_step"] = "answer"
         return state
     def _perform_search(self, state: AgentState) -> AgentState:
             search_block = "Error retrieving search results."
         prompt = f"""
+You are an expert assistant. Use ONLY the materials below to answer.
+QUESTION:
 {state['question']}
+MATERIALS:
 {search_block}
+Think step-by-step. Write ANSWER: <answer> on its own line.
 """
         raw = self._call_llm(prompt, 300)
+        answer = raw.split("ANSWER:")[-1].strip()
+        # Validate answer
+        if not answer:
+            answer = "I cannot provide a definitive answer at this time."
+        elif any(k in answer.lower() for k in ["i cannot find", "sorry"]):
+            # Fall back to a more general response
+            answer = "Based on the available information, I cannot provide a complete answer."
         state["final_answer"] = answer
+        state["history"].append({"step": "answer", "output": raw})
         state["logs"]["final_answer"] = {"prompt": prompt, "response": raw}
         state["current_step"] = "done"
         return state
     # ---- Build LangGraph workflow
     def _build_workflow(self) -> Graph:
         sg = StateGraph(state_schema=AgentState)
+        # Add all nodes
         sg.add_node("analyze", self._analyze_question)
         sg.add_node("search", self._perform_search)
         sg.add_node("recheck", self._re_evaluate)
         sg.add_node("answer", self._generate_answer)
+        sg.add_node("image", self._image_node)
+        sg.add_node("video", self._video_node)
+        sg.add_node("sheet", self._sheet_node)
+        # Add edges
         sg.add_edge("analyze", "search")
         sg.add_edge("analyze", "answer")
         sg.add_edge("search", "recheck")
+        sg.add_edge("image", "answer")
+        sg.add_edge("video", "answer")
+        sg.add_edge("sheet", "answer")
         def router(state: AgentState):
             return state["current_step"]
+        sg.add_conditional_edges("analyze", router, {
+            "search": "search",
+            "answer": "answer",
+            "image": "image",
+            "video": "video",
+            "sheet": "sheet"
+        })
+        sg.add_conditional_edges("recheck", router, {
+            "search": "search",
+            "answer": "answer"
+        })
         sg.set_entry_point("analyze")
         sg.set_finish_point("answer")
         return sg.compile()