Final_Assignment_Template

Running

App Files Files Community

naman1102 commited on 15 days ago

Commit

8286288

1 Parent(s): 8476091

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -48

app.py CHANGED Viewed

@@ -59,13 +59,17 @@ def image_qa(image_path: str, prompt: str) -> str:
     """Query LLaVA model for image-based QA."""
     with open(image_path, "rb") as f:
         data = {"prompt": prompt, "image": f.read()}
-    return client.post("llava-hf/llava-v1.6-mistral-7b-hf", data=data)
 def video_label(video_path: str, topk: int = 1) -> str:
     """Get video classification using VideoMAE."""
     with open(video_path, "rb") as f:
         preds = client.post(
-            "MCG-NJU/videomae-base-finetuned-ucf101", data=f.read()
         )
     preds = sorted(preds, key=lambda x: x["score"], reverse=True)[:topk]
     return preds[0]["label"]
@@ -97,6 +101,7 @@ class AgentState(TypedDict):
     task_id: Annotated[str, override]
     logs: Annotated[Dict[str, Any], merge_dicts]
     code_blocks: Annotated[List[Dict[str, str]], list.__add__]
 # -------------------------
 # BasicAgent implementation
@@ -139,36 +144,54 @@ class BasicAgent:
                 state["code_blocks"] = code_blocks
                 return state
-        # Check for multimodal content
-        q = state["question"].lower()
-        if "video" in q or q.endswith(".mp4"):
-            state["current_step"] = "video"
-        elif q.endswith((".jpg", ".png", ".jpeg")):
-            state["current_step"] = "image"
-        elif q.endswith((".xlsx", ".csv")):
-            state["current_step"] = "sheet"
-        else:
-            # Regular text question analysis
-            prompt = (
-                "You will receive a user question. Think step‑by‑step to decide whether external web search is required. "
-                "Respond ONLY with a valid Python dict literal in the following format and NOTHING else:\n"
-                "{\n  'needs_search': bool,\n  'search_query': str\n} \n\n"
-                f"Question: {state['question']}"
-            )
-            raw = self._call_llm(prompt)
             try:
-                decision = ast.literal_eval(raw)
-                state["needs_search"] = bool(decision.get("needs_search", False))
-                state["search_query"] = decision.get("search_query", state["question"])
-            except Exception:
-                state["needs_search"] = True
-                state["search_query"] = state["question"]
-                decision = {"parse_error": raw}
-            state["logs"] = {
-                "analyze": {"prompt": prompt, "llm_response": raw, "decision": decision}
-            }
-            state["current_step"] = "search" if state["needs_search"] else "answer"
-            state["history"].append({"step": "analyze", "output": decision})
         return state
     def _extract_code_blocks(self, text: str) -> List[Dict[str, str]]:
@@ -218,11 +241,54 @@ class BasicAgent:
         return state
     def _image_node(self, state: AgentState) -> AgentState:
         """Handle image-based questions."""
         try:
-            answer = image_qa(state["question"], "What is shown in this image?")
-            state["history"].append({"step": "image", "output": answer})
             state["current_step"] = "answer"
         except Exception as e:
             state["logs"]["image_error"] = str(e)
@@ -232,8 +298,13 @@ class BasicAgent:
     def _video_node(self, state: AgentState) -> AgentState:
         """Handle video-based questions."""
         try:
-            label = video_label(state["question"])
-            state["history"].append({"step": "video", "output": label})
             state["current_step"] = "answer"
         except Exception as e:
             state["logs"]["video_error"] = str(e)
@@ -243,9 +314,13 @@ class BasicAgent:
     def _sheet_node(self, state: AgentState) -> AgentState:
         """Handle spreadsheet-based questions."""
         try:
-            with open(state["question"], "rb") as f:
-                answer = sheet_answer(f.read(), state["question"])
-            state["history"].append({"step": "sheet", "output": answer})
             state["current_step"] = "answer"
         except Exception as e:
             state["logs"]["sheet_error"] = str(e)
@@ -299,16 +374,21 @@ class BasicAgent:
         return text.strip()
     def _generate_answer(self, state: AgentState) -> AgentState:
-        # Get the last search results with error handling
-        search_block = "No search results available."
-        try:
-            # Find the last search step in history
-            search_steps = [item for item in state["history"] if item.get("step") == "search"]
-            if search_steps and "results" in search_steps[-1]:
-                search_block = "\n".join(search_steps[-1]["results"])
-        except Exception as e:
-            print(f"Error accessing search results: {e}")
-            search_block = "Error retrieving search results."
         prompt = f"""
 You are an expert assistant. Use ONLY the materials below to answer.
@@ -392,10 +472,21 @@ Think step-by-step. Write ANSWER: <answer> on its own line.
             "task_id": task_id,
             "logs": {},
             "code_blocks": [],
         }
         final_state = self.workflow.invoke(state)
         return final_state["final_answer"]
 # ----------------------------------------------------------------------------------
 # Gradio Interface & Submission Routines
 # ----------------------------------------------------------------------------------
@@ -479,6 +570,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
                 "task_id": task_id,
                 "logs": {},
                 "code_blocks": [],
             }
             # Run the workflow

     """Query LLaVA model for image-based QA."""
     with open(image_path, "rb") as f:
         data = {"prompt": prompt, "image": f.read()}
+        headers = {"Content-Type": "application/octet-stream"}
+    return client.post("llava-hf/llava-v1.6-mistral-7b-hf", data=data, headers=headers)
 def video_label(video_path: str, topk: int = 1) -> str:
     """Get video classification using VideoMAE."""
     with open(video_path, "rb") as f:
+        headers = {"Content-Type": "application/octet-stream"}
         preds = client.post(
+            "MCG-NJU/videomae-base-finetuned-ucf101",
+            data=f.read(),
+            headers=headers
         )
     preds = sorted(preds, key=lambda x: x["score"], reverse=True)[:topk]
     return preds[0]["label"]
     task_id: Annotated[str, override]
     logs: Annotated[Dict[str, Any], merge_dicts]
     code_blocks: Annotated[List[Dict[str, str]], list.__add__]
+    attachment_data: Annotated[Dict[str, bytes], merge_dicts]  # Store downloaded file data
 # -------------------------
 # BasicAgent implementation
                 state["code_blocks"] = code_blocks
                 return state
+        # Check for file attachments in the question
+        if "file_url" in state["question"]:
             try:
+                # Parse the question to get file URL
+                question_data = json.loads(state["question"])
+                file_url = question_data.get("file_url")
+                if file_url:
+                    # Download the file
+                    file_data = self._download_file(file_url)
+                    # Store in state
+                    state["attachment_data"] = {
+                        "content": file_data,
+                        "type": self._detect_file_type(file_data, file_url)
+                    }
+                    # Set appropriate step based on file type
+                    if state["attachment_data"]["type"] == "video":
+                        state["current_step"] = "video"
+                    elif state["attachment_data"]["type"] == "image":
+                        state["current_step"] = "image"
+                    elif state["attachment_data"]["type"] in ["excel", "csv"]:
+                        state["current_step"] = "sheet"
+                    return state
+            except Exception as e:
+                state["logs"]["file_download_error"] = str(e)
+                state["current_step"] = "answer"
+                return state
+        # Regular text question analysis
+        prompt = (
+            "You will receive a user question. Think step‑by‑step to decide whether external web search is required. "
+            "Respond ONLY with a valid Python dict literal in the following format and NOTHING else:\n"
+            "{\n  'needs_search': bool,\n  'search_query': str\n} \n\n"
+            f"Question: {state['question']}"
+        )
+        raw = self._call_llm(prompt)
+        try:
+            decision = ast.literal_eval(raw)
+            state["needs_search"] = bool(decision.get("needs_search", False))
+            state["search_query"] = decision.get("search_query", state["question"])
+        except Exception:
+            state["needs_search"] = True
+            state["search_query"] = state["question"]
+            decision = {"parse_error": raw}
+        state["logs"] = {
+            "analyze": {"prompt": prompt, "llm_response": raw, "decision": decision}
+        }
+        state["current_step"] = "search" if state["needs_search"] else "answer"
+        state["history"].append({"step": "analyze", "output": decision})
         return state
     def _extract_code_blocks(self, text: str) -> List[Dict[str, str]]:
         return state
+    def _detect_file_type(self, data: bytes, url: str) -> str:
+        """Detect file type from content and URL."""
+        # Check URL extension first
+        url_lower = url.lower()
+        if url_lower.endswith((".mp4", ".avi", ".mov")):
+            return "video"
+        elif url_lower.endswith((".jpg", ".jpeg", ".png", ".gif")):
+            return "image"
+        elif url_lower.endswith(".xlsx"):
+            return "excel"
+        elif url_lower.endswith(".csv"):
+            return "csv"
+        # If URL check fails, try content-based detection
+        try:
+            # Try to detect image
+            from PIL import Image
+            Image.open(io.BytesIO(data))
+            return "image"
+        except:
+            pass
+        try:
+            # Try to detect Excel
+            pd.read_excel(io.BytesIO(data))
+            return "excel"
+        except:
+            pass
+        try:
+            # Try to detect CSV
+            pd.read_csv(io.BytesIO(data))
+            return "csv"
+        except:
+            pass
+        return "unknown"
     def _image_node(self, state: AgentState) -> AgentState:
         """Handle image-based questions."""
         try:
+            if "attachment_data" in state and "content" in state["attachment_data"]:
+                # Use the downloaded image data
+                image_data = state["attachment_data"]["content"]
+                answer = image_qa(image_data, "What is shown in this image?")
+                state["history"].append({"step": "image", "output": answer})
+            else:
+                raise ValueError("No image data found in state")
             state["current_step"] = "answer"
         except Exception as e:
             state["logs"]["image_error"] = str(e)
     def _video_node(self, state: AgentState) -> AgentState:
         """Handle video-based questions."""
         try:
+            if "attachment_data" in state and "content" in state["attachment_data"]:
+                # Use the downloaded video data
+                video_data = state["attachment_data"]["content"]
+                label = video_label(video_data)
+                state["history"].append({"step": "video", "output": label})
+            else:
+                raise ValueError("No video data found in state")
             state["current_step"] = "answer"
         except Exception as e:
             state["logs"]["video_error"] = str(e)
     def _sheet_node(self, state: AgentState) -> AgentState:
         """Handle spreadsheet-based questions."""
         try:
+            if "attachment_data" in state and "content" in state["attachment_data"]:
+                # Use the downloaded spreadsheet data
+                sheet_data = state["attachment_data"]["content"]
+                answer = sheet_answer(sheet_data, state["question"])
+                state["history"].append({"step": "sheet", "output": answer})
+            else:
+                raise ValueError("No spreadsheet data found in state")
             state["current_step"] = "answer"
         except Exception as e:
             state["logs"]["sheet_error"] = str(e)
         return text.strip()
     def _generate_answer(self, state: AgentState) -> AgentState:
+        # Collect all relevant tool outputs
+        materials = []
+        for item in state["history"]:
+            if item["step"] in ("search", "image", "video", "sheet", "code_analysis"):
+                # Handle different output formats
+                if item["step"] == "search":
+                    output = item.get("results", [])
+                    if isinstance(output, list):
+                        output = "\n".join(output)
+                else:
+                    output = item.get("output", "")
+                materials.append(str(output))
+        # Join all materials with proper formatting
+        search_block = "\n".join(materials) if materials else "No artefacts available."
         prompt = f"""
 You are an expert assistant. Use ONLY the materials below to answer.
             "task_id": task_id,
             "logs": {},
             "code_blocks": [],
+            "attachment_data": {}
         }
         final_state = self.workflow.invoke(state)
         return final_state["final_answer"]
+    def _download_file(self, url: str) -> bytes:
+        """Download a file from a URL."""
+        try:
+            response = requests.get(url, timeout=20)
+            response.raise_for_status()
+            return response.content
+        except Exception as e:
+            print(f"Error downloading file from {url}: {e}")
+            raise
 # ----------------------------------------------------------------------------------
 # Gradio Interface & Submission Routines
 # ----------------------------------------------------------------------------------
                 "task_id": task_id,
                 "logs": {},
                 "code_blocks": [],
+                "attachment_data": {}
             }
             # Run the workflow