Spaces:

GuglielmoTor
/

LinkedinMonitor

Sleeping

App Files Files Community

GuglielmoTor commited on May 26

Commit

67e5b47

verified ·

1 Parent(s): 2db7713

Update eb_agent_module.py

Browse files

Files changed (1) hide show

eb_agent_module.py +110 -80

eb_agent_module.py CHANGED Viewed

@@ -11,6 +11,11 @@ import traceback
 import pandasai as pai
 from pandasai_litellm import LiteLLM
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
@@ -575,54 +580,78 @@ class EmployerBrandingAgent:
         else:
             return "general"
-    async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
-        """Generate response using PandasAI for data queries"""
-        if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
-            return "Data analysis not available - PandasAI not initialized.", False
         try:
-            logging.info(f"Processing data query with PandasAI: {query[:100]}...")
-            # Use the first available dataframe for single-df queries
-            # For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
-            if len(self.pandas_dfs) == 1:
-                df = list(self.pandas_dfs.values())[0]
-                logging.info(f"Using single DataFrame for query with shape: {df.df.shape}")
-                pandas_response = df.chat(query)
-            else:
-                # For multiple dataframes, use pai.chat with all dfs
-                dfs = list(self.pandas_dfs.values())
-                pandas_response = pai.chat(query, *dfs)
-            # Handle different response types (text, charts, etc.)
-            response_text = ""
-            chart_info = ""
-            # Check if a chart was generated
-            import os
-            charts_dir = "./charts"
-            if os.path.exists(charts_dir):
-                chart_files = [f for f in os.listdir(charts_dir) if f.endswith(('.png', '.jpg', '.jpeg', '.svg'))]
-                if chart_files:
-                    # Get the most recent chart file
-                    chart_files.sort(key=lambda x: os.path.getmtime(os.path.join(charts_dir, x)), reverse=True)
-                    latest_chart = chart_files[0]
-                    chart_path = os.path.join(charts_dir, latest_chart)
-                    chart_info = f"\n\n📊 **Chart Generated**: {latest_chart}\nChart saved at: {chart_path}"
-                    logging.info(f"Chart generated: {chart_path}")
-            # Combine text response with chart info
-            if pandas_response and str(pandas_response).strip():
-                response_text = str(pandas_response).strip()
-            else:
-                response_text = "Analysis completed"
-            final_response = response_text + chart_info
-            return final_response, True
         except Exception as e:
-            logging.error(f"Error in PandasAI processing: {e}", exc_info=True)
-            return f"I encountered an error while analyzing the data: {str(e)}", False
     async def _generate_enhanced_response(self, query: str, pandas_result: str = "", query_type: str = "general") -> str:
         """Generate enhanced response combining PandasAI results with RAG context"""
@@ -753,64 +782,65 @@ class EmployerBrandingAgent:
             return False
         return True
-    async def process_query(self, user_query: str) -> str:
         """
-        Main method to process user queries with hybrid approach:
-        1. Classify query type (data/advice/hybrid)
-        2. Use PandasAI for data queries
-        3. Use enhanced LLM for interpretation and advice
-        4. Combine results for comprehensive responses
         """
         if not self._validate_query(user_query):
-            return "Please provide a valid query (3 to 3000 characters)."
         if not self.is_ready:
             logging.warning("process_query called but agent is not ready. Attempting re-initialization.")
             init_success = await self.initialize()
             if not init_success:
-                return "The agent is not properly initialized and could not be started. Please check configuration and logs."
         try:
-            # Classify the query type
             query_type = self._classify_query_type(user_query)
             logging.info(f"Query classified as: {query_type}")
-            pandas_result = ""
-            pandas_success = False
             # For data-related queries, try PandasAI first
             if query_type in ["data", "hybrid"] and self.pandas_agent:
                 logging.info("Attempting PandasAI analysis...")
-                pandas_result, pandas_success = await self._generate_pandas_response(user_query)
                 if pandas_success:
-                    logging.info("PandasAI analysis successful")
-                    # For pure data queries with successful analysis, we might return enhanced result
-                    if query_type == "data":
-                        enhanced_response = await self._generate_enhanced_response(
-                            user_query, pandas_result, query_type
-                        )
-                        return enhanced_response
                 else:
-                    logging.warning("PandasAI analysis failed, falling back to general response")
-            # For hybrid queries, advice queries, or when PandasAI fails
-            if query_type == "hybrid" and pandas_success:
-                # Combine PandasAI results with enhanced advice
-                enhanced_response = await self._generate_enhanced_response(
-                    user_query, pandas_result, query_type
-                )
-                return enhanced_response
-            else:
-                # General advice or fallback response
-                enhanced_response = await self._generate_enhanced_response(
-                    user_query, "", query_type
-                )
-                return enhanced_response
         except Exception as e:
-            logging.error(f"Error in process_query: {e}", exc_info=True)
-            return f"I encountered an error while processing your request: {str(e)}"
     def update_dataframes(self, new_dataframes: Dict[str, pd.DataFrame]):
         """Updates the agent's DataFrames and reinitializes PandasAI agent"""

 import pandasai as pai
 from pandasai_litellm import LiteLLM
+# Add this early, before matplotlib.pyplot is imported directly or by pandasai
+import matplotlib
+matplotlib.use('Agg') # Use a non-interactive backend for Matplotlib
+import matplotlib.pyplot as plt
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
         else:
             return "general"
+    async def _generate_pandas_response(self, query: str) -> Tuple[Optional[str], Optional[str], bool]:
+        """
+        Generate response using PandasAI for data queries.
+        Returns: (textual_output_from_pandas_ai, chart_file_path, success_flag)
+        """
+        if not self.pandas_agent or not self.pandas_dfs: # Check if pandas_dfs is populated
+            return "Data analysis tool (PandasAI) is not initialized or no data is loaded.", None, False
+        latest_chart_path: Optional[str] = None
+        textual_pandas_response: Optional[str] = None
+        pandas_response_raw: Any = None
         try:
+            logging.info(f"Processing data query with PandasAI: '{query[:100]}...' using {len(self.pandas_dfs)} DataFrame(s).")
+            # PandasAI's `chat` method can take multiple DataFrames.
+            # The `pai.DataFrame` objects are already stored in self.pandas_dfs.
+            # We pass the original pandas.DataFrame objects wrapped in pai.DataFrame to the agent.
+            # The `chat` method is called on these pai.DataFrame objects or globally with `pai.chat`.
+            dfs_to_query = list(self.pandas_dfs.values()) # List of pai.DataFrame objects
+            if not dfs_to_query:
+                 return "No dataframes available for PandasAI to query.", None, False
+            # Use pai.chat() for potentially multiple dataframes
+            # The *dfs_to_query unpacks the list of pai.DataFrame objects
+            pandas_response_raw = await asyncio.to_thread(pai.chat, query, *dfs_to_query)
+            # Check if a chart was generated and saved by PandasAI
+            # PandasAI should save charts to the path specified in its config.
+            charts_dir = pai.config.save_charts_path # Get configured path
+            if charts_dir and os.path.exists(charts_dir):
+                # Look for the most recently created chart file
+                chart_files = [os.path.join(charts_dir, f) for f in os.listdir(charts_dir) if f.endswith(('.png', '.jpg', '.jpeg', '.svg'))]
+                if chart_files:
+                    chart_files.sort(key=os.path.getmtime, reverse=True)
+                    latest_chart_path = chart_files[0] # Full path
+                    logging.info(f"Chart detected/generated by PandasAI: {latest_chart_path}")
+            # Determine the textual part of the PandasAI response
+            if isinstance(pandas_response_raw, str):
+                # If the raw response IS the chart path, then text is minimal.
+                # This can happen if PandasAI's primary output for a query is a chart.
+                if latest_chart_path and pandas_response_raw == latest_chart_path:
+                    textual_pandas_response = "A chart was generated to answer your query."
+                else:
+                    textual_pandas_response = pandas_response_raw.strip()
+            elif isinstance(pandas_response_raw, (int, float, bool)):
+                textual_pandas_response = str(pandas_response_raw)
+            elif isinstance(pandas_response_raw, pd.DataFrame):
+                # If PandasAI returns a DataFrame, summarize it.
+                # Avoid sending overly long strings to the LLM.
+                textual_pandas_response = f"PandasAI returned a data table with {len(pandas_response_raw)} rows. Here are the first few entries:\n{pandas_response_raw.head(3).to_string()}"
+            elif pandas_response_raw is not None: # Other types
+                textual_pandas_response = str(pandas_response_raw).strip()
+            # If no textual response formed yet, but a chart exists, make a note.
+            if not textual_pandas_response and latest_chart_path:
+                textual_pandas_response = "A chart was generated as a result of the analysis."
+            elif textual_pandas_response is None and latest_chart_path is None: # No output at all
+                 textual_pandas_response = "PandasAI processed the query but did not return a specific textual result or chart."
+            return textual_pandas_response, latest_chart_path, True
         except Exception as e:
+            logging.error(f"Error during PandasAI processing for query '{query[:100]}...': {e}", exc_info=True)
+            # Try to provide a more user-friendly error message if possible
+            error_msg = f"I encountered an issue while analyzing the data with the data tool: {type(e).__name__}."
+            if "duckdb" in str(e).lower() and "binder error" in str(e).lower():
+                error_msg += " This might be due to a mismatch in data types or an unsupported operation on the data."
+            return error_msg, None, False
     async def _generate_enhanced_response(self, query: str, pandas_result: str = "", query_type: str = "general") -> str:
         """Generate enhanced response combining PandasAI results with RAG context"""
             return False
         return True
+    async def process_query(self, user_query: str) -> Dict[str, Optional[str]]:
         """
+        Main method to process user queries.
+        Returns a dictionary: {"text": llm_response_string, "image_path": path_to_chart_or_none}
         """
         if not self._validate_query(user_query):
+            return {"text": "Please provide a valid query (3 to 3000 characters).", "image_path": None}
         if not self.is_ready:
             logging.warning("process_query called but agent is not ready. Attempting re-initialization.")
             init_success = await self.initialize()
             if not init_success:
+                return {"text": "The agent is not properly initialized and could not be started. Please check configuration and logs.", "image_path": None}
         try:
             query_type = self._classify_query_type(user_query)
             logging.info(f"Query classified as: {query_type}")
+            pandas_text_output: Optional[str] = None
+            pandas_chart_path: Optional[str] = None
+            pandas_success = False # Flag to track if PandasAI ran successfully
             # For data-related queries, try PandasAI first
             if query_type in ["data", "hybrid"] and self.pandas_agent:
                 logging.info("Attempting PandasAI analysis...")
+                pandas_text_output, pandas_chart_path, pandas_success = await self._generate_pandas_response(user_query)
                 if pandas_success:
+                    logging.info(f"PandasAI analysis successful. Text: '{str(pandas_text_output)[:100]}...', Chart: '{pandas_chart_path}'")
                 else:
+                    # pandas_text_output might contain the error message from PandasAI
+                    logging.warning(f"PandasAI analysis failed or returned no specific result. Message from PandasAI: {pandas_text_output}")
+            # Prepare the context from PandasAI for the LLM
+            llm_context_from_pandas = ""
+            if pandas_text_output: # This could be a success message or an error message from PandasAI
+                llm_context_from_pandas += f"Data Analysis Tool Output: {pandas_text_output}\n"
+                if pandas_chart_path and pandas_success: # Only mention chart path if PandasAI was successful
+                    llm_context_from_pandas += f"[A chart has been generated by the data tool and saved at '{pandas_chart_path}'. You should refer to this chart in your explanation if it's relevant to the user's query.]\n"
+            elif query_type in ["data", "hybrid"] and not self.pandas_agent:
+                llm_context_from_pandas += "Note: The data analysis tool is currently unavailable.\n"
+            # Always call the LLM to formulate the final response
+            final_llm_response = await self._generate_enhanced_response(
+                query=user_query,
+                pandas_result=llm_context_from_pandas, # Pass the textual summary from PandasAI
+                query_type=query_type
+            )
+            # Return the LLM's response and the chart path if PandasAI was successful and generated one.
+            # If PandasAI failed, pandas_chart_path would be None.
+            # The final_llm_response should ideally explain any failures if pandas_text_output contained an error.
+            return {"text": final_llm_response, "image_path": pandas_chart_path if pandas_success else None}
         except Exception as e:
+            logging.error(f"Critical error in process_query: {e}", exc_info=True)
+            return {"text": f"I encountered a critical error while processing your request: {type(e).__name__}. Please check the logs.", "image_path": None}
     def update_dataframes(self, new_dataframes: Dict[str, pd.DataFrame]):
         """Updates the agent's DataFrames and reinitializes PandasAI agent"""