Spaces:

GuglielmoTor
/

LinkedinMonitor

Sleeping

App Files Files Community

GuglielmoTor commited on Jun 11

Commit

762599c

verified ·

1 Parent(s): c473dc0

Delete features/insight_and_tasks

Browse files

Files changed (16) hide show

features/insight_and_tasks/__init__.py +0 -0
features/insight_and_tasks/agents/follower_agent.py +0 -509
features/insight_and_tasks/agents/mentions_agent.py +0 -397
features/insight_and_tasks/agents/post_agent.py +0 -538
features/insight_and_tasks/agents/task_extraction_agent.py +0 -400
features/insight_and_tasks/agents/task_extraction_model.py +0 -226
features/insight_and_tasks/agents/task_extraction_model_groq.py +0 -143
features/insight_and_tasks/coordinators/employer_branding_coordinator.py +0 -331
features/insight_and_tasks/data_models/__init__.py +0 -35
features/insight_and_tasks/data_models/metrics.py +0 -50
features/insight_and_tasks/data_models/tasks.py +0 -197
features/insight_and_tasks/orchestrators/linkedin_analytics_orchestrator.py +0 -299
features/insight_and_tasks/utils/__init__.py +0 -31
features/insight_and_tasks/utils/logging_config.py +0 -28
features/insight_and_tasks/utils/pandasai_setup.py +0 -54
features/insight_and_tasks/utils/retry_mechanism.py +0 -61

features/insight_and_tasks/__init__.py DELETED Viewed

File without changes

features/insight_and_tasks/agents/follower_agent.py DELETED Viewed

@@ -1,509 +0,0 @@
-# agents/follower_agent.py
-import pandas as pd
-from typing import Dict, List, Any, Optional
-import logging
-import pandasai as pai # Assuming pandasai is imported as pai globally or configured
-from google.adk.agents import LlmAgent # Assuming this is the correct import path
-# Project-specific imports
-from features.insight_and_tasks.utils.retry_mechanism import RetryMechanism
-from features.insight_and_tasks.data_models.metrics import AgentMetrics, TimeSeriesMetric
-# Configure logger for this module
-logger = logging.getLogger(__name__)
-# Define the model globally or pass it as a parameter. For now, using a constant.
-# Consider moving this to a shared config or environment variable.
-DEFAULT_AGENT_MODEL = "gemini-2.5-flash-preview-05-20" # Or your specific model like "gemini-1.5-flash-preview-05-20"
-class EnhancedFollowerAnalysisAgent:
-    """
-    Enhanced follower analysis agent with proper handling of different follower count types
-    and structured metric extraction.
-    """
-    AGENT_NAME = "follower_analyst"
-    AGENT_DESCRIPTION = "Expert analyst specializing in follower growth patterns and demographic analysis."
-    AGENT_INSTRUCTION = """
-    You are a specialized LinkedIn follower analytics expert focused on temporal patterns and demographic trends.
-    Your role includes:
-    1. FOLLOWER TREND ANALYSIS:
-       - Analyze follower growth trends over time (monthly data from 'follower_gains_monthly' type).
-       - Identify growth acceleration/deceleration periods.
-       - Calculate growth rates and velocity changes.
-       - Detect seasonal patterns and anomalies.
-       - Analyze organic vs paid follower counts over time.
-    2. DEMOGRAPHIC ANALYSIS (based on 'follower_industry', 'follower_seniority', etc.):
-       - Analyze follower distribution by industry, seniority, function, and geography.
-       - Compare organic vs paid followers across these demographic segments.
-       - Identify high-value audience segments based on counts and potential engagement.
-    3. TIME-BASED INSIGHTS:
-       - Provide month-over-month comparisons for growth data.
-       - Identify critical inflection points in follower growth.
-       - Calculate trend momentum and acceleration.
-    4. METRIC EXTRACTION (for the AgentMetrics structure):
-       - Extract time-series data for total, organic, and paid follower counts, and growth rates.
-       - Provide aggregate metrics like average monthly growth, total organic/paid followers.
-       - Provide demographic breakdowns as categorical metrics (e.g., top N industries by follower count).
-    Focus on separating temporal analysis (monthly) from demographic analysis.
-    When analyzing demographics, consider the top N segments (e.g., top 10 industries) for conciseness.
-    Ensure your analysis summary is comprehensive and insightful.
-    """
-    def __init__(self, api_key: str, model_name: Optional[str] = None):
-        """
-        Initializes the Follower Analysis Agent.
-        Args:
-            api_key: API key for LLM and potentially PandasAI.
-            model_name: Name of the language model to use. Defaults to DEFAULT_AGENT_MODEL.
-        """
-        self.api_key = api_key # May be used if PandasAI is configured per agent or for other API calls
-        self.model_name = model_name or DEFAULT_AGENT_MODEL
-        self.agent = LlmAgent(
-            name=self.AGENT_NAME,
-            model=self.model_name,
-            description=self.AGENT_DESCRIPTION,
-            instruction=self.AGENT_INSTRUCTION
-        )
-        self.retry_mechanism = RetryMechanism()
-        logger.info(f"{self.AGENT_NAME} initialized with model {self.model_name}.")
-    def _separate_follower_data_by_type(self, df: pd.DataFrame) -> Dict[str, pd.DataFrame]:
-        """Separate follower data by follower_count_type and process appropriately."""
-        separated_data = {}
-        if df is None or df.empty or 'follower_count_type' not in df.columns:
-            logger.warning("Input DataFrame is empty or 'follower_count_type' column is missing.")
-            return separated_data
-        # Define the expected follower count types
-        # These should match the 'follower_count_type' values in your Bubble data
-        follower_types = [
-            'follower_gains_monthly', # For time-series analysis
-            'follower_industry',      # For demographic analysis
-            'follower_seniority',
-            'follower_function',
-            'follower_geo'
-        ]
-        for ftype in follower_types:
-            type_data = df[df['follower_count_type'] == ftype].copy()
-            if not type_data.empty:
-                if ftype == 'follower_gains_monthly':
-                    type_data = self._process_monthly_data(type_data)
-                else: # Demographic data
-                    type_data = self._get_top_demographic_segments(type_data, top_n=10)
-                separated_data[ftype] = type_data
-            else:
-                logger.info(f"No data found for follower_count_type: {ftype}")
-        return separated_data
-    def _get_top_demographic_segments(self, demo_df: pd.DataFrame, top_n: int = 10) -> pd.DataFrame:
-        """Get top N demographic segments by total follower count (organic + paid)."""
-        if demo_df.empty:
-            return demo_df
-        # Ensure required columns exist and are numeric, fill NaNs with 0 for sum
-        demo_df = demo_df.copy() # Work on a copy
-        demo_df['follower_count_organic'] = pd.to_numeric(demo_df.get('follower_count_organic'), errors='coerce').fillna(0)
-        demo_df['follower_count_paid'] = pd.to_numeric(demo_df.get('follower_count_paid'), errors='coerce').fillna(0)
-        demo_df['total_followers'] = demo_df['follower_count_organic'] + demo_df['follower_count_paid']
-        # Sort by total followers and take top N
-        # 'category_name' usually holds the demographic label (e.g., industry name)
-        if 'category_name' not in demo_df.columns:
-            logger.warning("'_get_top_demographic_segments' expects 'category_name' column for grouping.")
-            return demo_df.drop(columns=['total_followers'], errors='ignore')
-        # Group by category_name if there are multiple entries for the same category, sum followers
-        # This step might be redundant if data is already aggregated per category_name
-        # demo_df_grouped = demo_df.groupby('category_name').agg(
-        #     follower_count_organic=('follower_count_organic', 'sum'),
-        #     follower_count_paid=('follower_count_paid', 'sum'),
-        #     total_followers=('total_followers', 'sum')
-        # ).reset_index()
-        top_segments = demo_df.nlargest(top_n, 'total_followers')
-        return top_segments.drop(columns=['total_followers'], errors='ignore')
-    def _process_monthly_data(self, monthly_df: pd.DataFrame) -> pd.DataFrame:
-        """Process monthly follower data: parse dates, sort."""
-        if monthly_df.empty or 'category_name' not in monthly_df.columns:
-            logger.warning("Monthly data DataFrame is empty or 'category_name' column is missing.")
-            return monthly_df
-        df_processed = monthly_df.copy()
-        # 'category_name' for monthly data is expected to be a date string like 'YYYY-MM-DD'
-        # Attempt to convert 'category_name' to datetime
-        df_processed['date_for_analysis'] = pd.to_datetime(df_processed['category_name'], errors='coerce')
-        # Drop rows where date conversion failed
-        df_processed.dropna(subset=['date_for_analysis'], inplace=True)
-        if df_processed.empty:
-            logger.warning("No valid dates found in 'category_name' for monthly data after processing.")
-            return df_processed
-        df_processed['year_month'] = df_processed['date_for_analysis'].dt.strftime('%Y-%m')
-        df_processed['month_name'] = df_processed['date_for_analysis'].dt.strftime('%B %Y')
-        # Ensure numeric types for follower counts
-        for col in ['follower_count_organic', 'follower_count_paid']:
-            if col in df_processed.columns:
-                df_processed[col] = pd.to_numeric(df_processed[col], errors='coerce').fillna(0)
-            else: # Add column with zeros if missing, to prevent errors in later calculations
-                df_processed[col] = 0
-        return df_processed.sort_values('date_for_analysis')
-    def _extract_time_series_metrics(self, monthly_df: pd.DataFrame) -> List[TimeSeriesMetric]:
-        """Extract time-series metrics from processed monthly follower data."""
-        ts_metrics = []
-        if monthly_df.empty or 'date_for_analysis' not in monthly_df.columns:
-            logger.info("Cannot extract time-series metrics: monthly DataFrame is empty or lacks 'date_for_analysis'.")
-            return ts_metrics
-        # Ensure data is sorted by date for correct growth rate calculation
-        monthly_df_sorted = monthly_df.sort_values('date_for_analysis').copy()
-        timestamps = monthly_df_sorted['year_month'].tolist()
-        # Calculate total followers
-        monthly_df_sorted['total_followers'] = monthly_df_sorted.get('follower_count_organic', 0) + \
-                                               monthly_df_sorted.get('follower_count_paid', 0)
-        metric_definitions = {
-            "total_follower_count": monthly_df_sorted['total_followers'],
-            "organic_follower_count": monthly_df_sorted.get('follower_count_organic', pd.Series(0, index=monthly_df_sorted.index)),
-            "paid_follower_count": monthly_df_sorted.get('follower_count_paid', pd.Series(0, index=monthly_df_sorted.index))
-        }
-        for name, values_series in metric_definitions.items():
-            ts_metrics.append(TimeSeriesMetric(
-                metric_name=name,
-                values=values_series.tolist(),
-                timestamps=timestamps,
-                metric_type="time_series",
-                time_granularity="monthly"
-            ))
-        # Calculate growth rate for total followers
-        if len(monthly_df_sorted) > 1:
-            # pct_change gives NaN for the first element, fill with 0
-            growth_rates = monthly_df_sorted['total_followers'].pct_change().fillna(0).tolist()
-            ts_metrics.append(TimeSeriesMetric(
-                metric_name="total_follower_growth_rate",
-                values=growth_rates,
-                timestamps=timestamps, # Timestamps align, first growth rate is vs non-existent previous point (so 0)
-                metric_type="time_series",
-                time_granularity="monthly",
-                unit="%"
-            ))
-        else:
-            logger.info("Not enough data points (<=1) to calculate growth rate.")
-        return ts_metrics
-    def _calculate_aggregate_metrics(self, separated_data: Dict[str, pd.DataFrame]) -> Dict[str, float]:
-        """Calculate aggregate metrics from all follower data."""
-        agg_metrics = {}
-        monthly_df = separated_data.get('follower_gains_monthly')
-        if monthly_df is not None and not monthly_df.empty:
-            total_organic = monthly_df['follower_count_organic'].sum()
-            total_paid = monthly_df['follower_count_paid'].sum()
-            total_all_followers = total_organic + total_paid
-            agg_metrics['total_organic_followers_gained_period'] = float(total_organic)
-            agg_metrics['total_paid_followers_gained_period'] = float(total_paid)
-            agg_metrics['overall_total_followers_gained_period'] = float(total_all_followers)
-            if total_all_followers > 0:
-                agg_metrics['overall_organic_follower_ratio_gained'] = float(total_organic / total_all_followers)
-                agg_metrics['overall_paid_follower_ratio_gained'] = float(total_paid / total_all_followers)
-            # Average monthly gain (if 'total_followers' represents gain, not cumulative)
-            # Assuming 'follower_count_organic/paid' in 'follower_gains_monthly' are indeed GAINS for that month
-            monthly_df['monthly_total_gain'] = monthly_df['follower_count_organic'] + monthly_df['follower_count_paid']
-            if not monthly_df['monthly_total_gain'].empty:
-                 agg_metrics['avg_monthly_follower_gain'] = float(monthly_df['monthly_total_gain'].mean())
-                 agg_metrics['max_monthly_follower_gain'] = float(monthly_df['monthly_total_gain'].max())
-                 agg_metrics['min_monthly_follower_gain'] = float(monthly_df['monthly_total_gain'].min())
-        # Count of distinct demographic segments identified (top N for each)
-        for demo_type in ['follower_industry', 'follower_seniority', 'follower_function', 'follower_geo']:
-            if demo_type in separated_data and not separated_data[demo_type].empty:
-                agg_metrics[f'distinct_{demo_type}_segments_analyzed'] = float(len(separated_data[demo_type]))
-        return agg_metrics
-    def _extract_demographic_metrics(self, separated_data: Dict[str, pd.DataFrame]) -> Dict[str, Any]:
-        """Extract demographic distributions (categorical metrics)."""
-        cat_metrics = {}
-        demographic_types_map = {
-            'follower_industry': 'industry_distribution',
-            'follower_seniority': 'seniority_distribution',
-            'follower_function': 'function_distribution',
-            'follower_geo': 'geographic_distribution'
-        }
-        for demo_type_key, metric_name_prefix in demographic_types_map.items():
-            demo_df = separated_data.get(demo_type_key)
-            if demo_df is not None and not demo_df.empty and 'category_name' in demo_df.columns:
-                distribution = {}
-                for _, row in demo_df.iterrows():
-                    category = row['category_name']
-                    organic = float(row.get('follower_count_organic', 0))
-                    paid = float(row.get('follower_count_paid', 0))
-                    total = organic + paid
-                    distribution[category] = {
-                        'total_followers': total,
-                        'organic_followers': organic,
-                        'paid_followers': paid,
-                        'organic_ratio': organic / total if total > 0 else 0.0
-                    }
-                # Sort by total followers descending for the distribution
-                sorted_distribution = dict(sorted(distribution.items(), key=lambda item: item[1]['total_followers'], reverse=True))
-                cat_metrics[metric_name_prefix] = sorted_distribution
-                # Summary for this demographic type
-                total_followers_in_type = sum(item['total_followers'] for item in distribution.values())
-                cat_metrics[f'{metric_name_prefix}_summary'] = {
-                    'total_followers_in_top_segments': total_followers_in_type,
-                    'number_of_segments_reported': len(distribution),
-                    'top_segment': list(sorted_distribution.keys())[0] if sorted_distribution else "N/A"
-                }
-        return cat_metrics
-    def _extract_time_periods(self, monthly_df: Optional[pd.DataFrame]) -> List[str]:
-        """Extract unique year-month time periods covered by the monthly data."""
-        if monthly_df is None or monthly_df.empty or 'year_month' not in monthly_df.columns:
-            return ["Data period not available or N/A"]
-        periods = sorted(monthly_df['year_month'].dropna().unique().tolist(), reverse=True)
-        return periods[:12] # Return up to the last 12 months if available
-    def analyze_follower_data(self, follower_stats_df: pd.DataFrame) -> AgentMetrics:
-        """
-        Generate comprehensive follower analysis using PandasAI and structured metric extraction.
-        """
-        if follower_stats_df is None or follower_stats_df.empty:
-            logger.warning("Follower statistics DataFrame is empty. Returning empty metrics.")
-            return AgentMetrics(
-                agent_name=self.AGENT_NAME,
-                analysis_summary="No follower data provided for analysis.",
-                time_periods_covered=["N/A"]
-            )
-        # 1. Pre-process and separate data
-        separated_data = self._separate_follower_data_by_type(follower_stats_df)
-        # Prepare a combined DataFrame for PandasAI if needed, or use the original one.
-        # For PandasAI, it's often better to provide a clean, understandable DataFrame.
-        # Let's use the original df for the textual analysis by PandasAI,
-        # as it contains all types and the LLM can be instructed to differentiate.
-        # Ensure PandasAI is configured (this should ideally be done once at orchestrator level)
-        # from utils.pandasai_setup import configure_pandasai
-        # configure_pandasai(self.api_key, self.model_name) # Or pass LLM object if configured outside
-        df_description = "LinkedIn follower statistics. Contains 'follower_count_type' indicating data category (e.g., 'follower_gains_monthly', 'follower_industry'), 'category_name' (e.g., date for monthly, industry name for industry type), 'follower_count_organic', 'follower_count_paid'."
-        # Create PandasAI DataFrame
-        # Check if pai.DataFrame is the correct way to initialize based on your pandasai version
-        try:
-            pandas_ai_df = pai.DataFrame(follower_stats_df, description=df_description)
-        except Exception as e:
-            logger.error(f"Failed to create PandasAI DataFrame: {e}", exc_info=True)
-            return AgentMetrics(
-                agent_name=self.AGENT_NAME,
-                analysis_summary=f"Error initializing PandasAI: {e}",
-                time_periods_covered=self._extract_time_periods(separated_data.get('follower_gains_monthly'))
-            )
-        # 2. Generate textual analysis using PandasAI via LlmAgent
-        # The LlmAgent itself doesn't directly use PandasAI's .chat() method.
-        # The instruction for LlmAgent should guide it to perform analysis.
-        # If direct PandasAI chat is needed, it's a separate call.
-        # The original code uses pandas_df.chat(analysis_query). This implies PandasAI is used directly.
-        # Let's stick to the direct PandasAI chat call as in the original structure.
-        analysis_query = f"""
-        Analyze the provided LinkedIn follower statistics. The DataFrame contains various 'follower_count_type' values.
-        Focus on:
-        1. For 'follower_gains_monthly': Analyze monthly follower growth trends (total, organic, paid). Identify key periods of growth or decline.
-        2. For demographic types (industry, seniority, function, geo): Describe the distribution of followers. Which are the top segments? How do organic vs paid compare?
-        3. Synthesize these findings into an overall summary of follower dynamics.
-        Consider the data structure: 'category_name' holds the date for monthly data or the demographic label.
-        'follower_count_organic' and 'follower_count_paid' are the key metrics.
-        """
-        analysis_result_text = "PandasAI analysis could not be performed." # Default
-        try:
-            def chat_operation():
-                # Ensure the LLM for PandasAI is correctly configured before this call
-                # This might involve re-calling configure_pandasai if it's not persistent
-                # or if the LLM object needs to be explicitly passed to PandasAI DataFrame.
-                # Check if LLM is configured using the proper config.get() method
-                config = pai.config.get()
-                logger.info(f"pai_config: {config}, Type of config: {type(config)}")
-                if not config.llm:
-                    logger.warning("PandasAI LLM not configured. Attempting to configure now.")
-                    # This assumes configure_pandasai is available and sets the LLM config
-                    from insight_and_tasks.utils.pandasai_setup import configure_pandasai
-                    configure_pandasai(self.api_key, self.model_name)
-                    # Re-check configuration after setup attempt
-                    config = pai.config.get()
-                    if not config.llm:
-                        raise RuntimeError("PandasAI LLM could not be configured for chat operation.")
-                logger.info(f"Executing PandasAI chat for follower analysis with LLM: {config.llm}")
-                return pandas_ai_df.chat(analysis_query)
-            analysis_result_raw = self.retry_mechanism.retry_with_backoff(
-                func=chat_operation,
-                max_retries=2, # Adjusted retries
-                base_delay=2.0,
-                exceptions=(Exception,) # Catch broader exceptions for PandasAI calls
-            )
-            analysis_result_text = str(analysis_result_raw) if analysis_result_raw else "No textual analysis generated by PandasAI."
-            logger.info("Follower analysis via PandasAI completed.")
-        except Exception as e:
-            logger.error(f"Follower analysis with PandasAI failed after retries: {e}", exc_info=True)
-            analysis_result_text = f"Follower analysis using PandasAI failed. Error: {str(e)[:200]}"
-        # 3. Extract structured metrics using the separated and processed data
-        monthly_data_for_metrics = separated_data.get('follower_gains_monthly', pd.DataFrame())
-        time_series_metrics = self._extract_time_series_metrics(monthly_data_for_metrics)
-        aggregate_metrics = self._calculate_aggregate_metrics(separated_data) # Uses all separated types
-        categorical_metrics = self._extract_demographic_metrics(separated_data) # Uses demographic types
-        time_periods = self._extract_time_periods(monthly_data_for_metrics)
-        return AgentMetrics(
-            agent_name=self.AGENT_NAME,
-            analysis_summary=analysis_result_text[:2000], # Truncate if too long
-            time_series_metrics=time_series_metrics,
-            aggregate_metrics=aggregate_metrics,
-            categorical_metrics=categorical_metrics,
-            time_periods_covered=time_periods,
-            data_sources_used=[f"follower_stats_df (shape: {follower_stats_df.shape})"]
-        )
-if __name__ == '__main__':
-    # This is for example and testing purposes.
-    # Ensure logging and other necessary setups are done.
-    try:
-        from utils.logging_config import setup_logging
-        setup_logging()
-        logger.info("Logging setup for EnhancedFollowerAnalysisAgent test.")
-    except ImportError:
-        logging.basicConfig(level=logging.INFO)
-        logger.warning("Could not import setup_logging. Using basicConfig.")
-    # Mock API Key and Model for testing
-    # IMPORTANT: For PandasAI to run, a valid API key and model setup are needed.
-    # This example might not fully execute PandasAI chat without proper environment setup.
-    MOCK_API_KEY = os.environ.get("GOOGLE_API_KEY", "test_api_key_followers")
-    MODEL_NAME = DEFAULT_AGENT_MODEL
-    # Configure PandasAI (essential for the .chat() part)
-    try:
-        from utils.pandasai_setup import configure_pandasai
-        if MOCK_API_KEY != "test_api_key_followers": # Only configure if a real key might be present
-            configure_pandasai(MOCK_API_KEY, MODEL_NAME)
-            logger.info("PandasAI configured for testing EnhancedFollowerAnalysisAgent.")
-        else:
-            logger.warning("Using mock API key. PandasAI chat will likely fail or use a default/mock LLM if available.")
-            # Mock pai.DataFrame if pandasai is not fully set up to avoid errors
-            class MockPandasAIDataFrame:
-                def __init__(self, df, description): self.df = df; self.description = description
-                def chat(self, query): return f"Mock PandasAI response to: {query}"
-            pai.DataFrame = MockPandasAIDataFrame
-    except ImportError:
-        logger.error("utils.pandasai_setup not found. PandasAI will not be configured.")
-        class MockPandasAIDataFrame:
-            def __init__(self, df, description): self.df = df; self.description = description
-            def chat(self, query): return f"Mock PandasAI response to: {query}"
-        pai.DataFrame = MockPandasAIDataFrame
-    # Sample Data
-    sample_follower_data = {
-        'follower_count_type': [
-            'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
-            'follower_industry', 'follower_industry', 'follower_industry', 'follower_industry',
-            'follower_seniority', 'follower_seniority'
-        ],
-        'category_name': [ # Dates for monthly, names for demographics
-            '2023-01-01', '2023-02-01', '2023-03-01',
-            'Technology', 'Finance', 'Healthcare', 'Retail',
-            'Senior', 'Entry-Level'
-        ],
-        'follower_count_organic': [
-            100, 120, 110,  # Monthly gains
-            500, 300, 200, 150, # Industry organic
-            600, 400         # Seniority organic
-        ],
-        'follower_count_paid': [
-            10, 15, 12,     # Monthly gains
-            50, 30, 20, 10, # Industry paid
-            60, 40          # Seniority paid
-        ]
-    }
-    sample_df = pd.DataFrame(sample_follower_data)
-    # Initialize agent
-    follower_agent = EnhancedFollowerAnalysisAgent(api_key=MOCK_API_KEY, model_name=MODEL_NAME)
-    logger.info("Analyzing sample follower data...")
-    metrics_result = follower_agent.analyze_follower_data(sample_df)
-    print("\n--- EnhancedFollowerAnalysisAgent Results ---")
-    print(f"Agent Name: {metrics_result.agent_name}")
-    print(f"Analysis Summary: {metrics_result.analysis_summary}")
-    print("\nTime Series Metrics:")
-    for ts_metric in metrics_result.time_series_metrics:
-        print(f"  - {ts_metric.metric_name}: {len(ts_metric.values)} data points, e.g., {ts_metric.values[:3]} for ts {ts_metric.timestamps[:3]}")
-    print("\nAggregate Metrics:")
-    for key, value in metrics_result.aggregate_metrics.items():
-        print(f"  - {key}: {value}")
-    print("\nCategorical Metrics:")
-    for key, value in metrics_result.categorical_metrics.items():
-        print(f"  - {key}: (details below)")
-        if isinstance(value, dict):
-            for sub_key, sub_value in list(value.items())[:2]: # Print first 2 items for brevity
-                 print(f"    - {sub_key}: {sub_value}")
-        else:
-            print(f"    {value}")
-    print(f"\nTime Periods Covered: {metrics_result.time_periods_covered}")
-    print(f"Data Sources Used: {metrics_result.data_sources_used}")
-    print(f"Generated Timestamp: {metrics_result.generation_timestamp}")
-    # Test with empty DataFrame
-    logger.info("\n--- Testing with empty DataFrame ---")
-    empty_metrics_result = follower_agent.analyze_follower_data(pd.DataFrame())
-    print(f"Empty DF Analysis Summary: {empty_metrics_result.analysis_summary}")

features/insight_and_tasks/agents/mentions_agent.py DELETED Viewed

@@ -1,397 +0,0 @@
-# agents/mentions_agent.py
-import pandas as pd
-from typing import Dict, List, Any, Optional, Mapping
-import logging
-import pandasai as pai # Assuming pandasai is imported as pai globally or configured
-from google.adk.agents import LlmAgent # Assuming this is the correct import path
-# Project-specific imports
-from features.insight_and_tasks.utils.retry_mechanism import RetryMechanism
-from features.insight_and_tasks.data_models.metrics import AgentMetrics, TimeSeriesMetric
-# Configure logger for this module
-logger = logging.getLogger(__name__)
-DEFAULT_AGENT_MODEL = "gemini-2.5-flash-preview-05-20"
-class EnhancedMentionsAnalysisAgent:
-    """
-    Enhanced mentions analysis agent with time-series metric extraction and sentiment processing.
-    """
-    AGENT_NAME = "mentions_analyst"
-    AGENT_DESCRIPTION = "Expert analyst specializing in brand mention trends and sentiment patterns."
-    AGENT_INSTRUCTION = """
-    You are a specialized LinkedIn brand mentions expert focused on sentiment trends and mention patterns over time.
-    Your role includes:
-    1. MENTION TREND ANALYSIS (monthly, using 'date' column):
-       - Analyze mention volume trends over time.
-       - Identify periods with significant spikes or dips in mention activity.
-    2. SENTIMENT PATTERN ANALYSIS (monthly, using 'date' and 'sentiment_label'):
-       - Track the evolution of sentiment (e.g., positive, negative, neutral) associated with mentions.
-       - Calculate and analyze the average sentiment score over time (if sentiment can be quantified).
-       - Identify shifts in overall sentiment and potential drivers for these changes.
-    3. CORRELATION (Conceptual):
-       - Consider if mention spikes/dips or sentiment shifts correlate with any known company activities, campaigns, or external events (though this data might not be in the input DataFrame, mention the need to investigate).
-    4. METRIC EXTRACTION (for AgentMetrics):
-       - Extract time-series data for monthly mention volume.
-       - Extract time-series data for monthly sentiment distribution (e.g., count of positive/negative/neutral mentions) and average sentiment score.
-       - Provide aggregate metrics like total mentions, overall sentiment distribution, and average sentiment score for the period.
-       - Include categorical metrics like the distribution of sentiment labels.
-    Focus on identifying actionable insights from mention data. How is the brand being perceived? Are there emerging reputational risks or opportunities?
-    Use the provided DataFrame columns: 'date' (for mentions), 'sentiment_label' (e.g., 'Positive 👍', 'Negative 👎', 'Neutral 😐'), and potentially 'mention_source' or 'mention_content' if available and relevant for deeper analysis (though focus on 'date' and 'sentiment_label' for core metrics).
-    """
-    # Standardized sentiment mapping (can be expanded)
-    # This mapping is crucial for converting labels to scores.
-    SENTIMENT_MAPPING = {
-        'Positive 👍': 1,
-        'Positive': 1, # Adding common variations
-        'Very Positive': 1.5, # Example for more granular sentiment
-        'Negative 👎': -1,
-        'Negative': -1,
-        'Very Negative': -1.5,
-        'Neutral 😐': 0,
-        'Neutral': 0,
-        'Mixed': 0, # Or handle mixed sentiment differently
-        'Unknown': 0 # Default score for unmapped or unknown sentiments
-    }
-    def __init__(self, api_key: str, model_name: Optional[str] = None):
-        self.api_key = api_key
-        self.model_name = model_name or DEFAULT_AGENT_MODEL
-        self.agent = LlmAgent(
-            name=self.AGENT_NAME,
-            model=self.model_name,
-            description=self.AGENT_DESCRIPTION,
-            instruction=self.AGENT_INSTRUCTION
-        )
-        self.retry_mechanism = RetryMechanism()
-        logger.info(f"{self.AGENT_NAME} initialized with model {self.model_name}.")
-    def _get_sentiment_score(self, sentiment_label: Optional[str]) -> float:
-        """Maps a sentiment label to a numerical score using SENTIMENT_MAPPING."""
-        if sentiment_label is None:
-            return self.SENTIMENT_MAPPING.get('Unknown', 0)
-        # Attempt to match known labels, case-insensitively for robustness if needed,
-        # but exact match is safer with the current emoji-inclusive keys.
-        return float(self.SENTIMENT_MAPPING.get(str(sentiment_label).strip(), self.SENTIMENT_MAPPING.get('Unknown',0)))
-    def _preprocess_mentions_data(self, df: pd.DataFrame) -> pd.DataFrame:
-        """Cleans and prepares mentions data for analysis."""
-        if df is None or df.empty:
-            return pd.DataFrame()
-        df_processed = df.copy()
-        # Convert 'date' to datetime
-        if 'date' in df_processed.columns:
-            df_processed['date'] = pd.to_datetime(df_processed['date'], errors='coerce')
-            # df_processed.dropna(subset=['date'], inplace=True) # Keep for other metrics even if date is NaT
-        else:
-            logger.warning("'date' column not found in mentions data. Time-series analysis will be limited.")
-            # df_processed['date'] = pd.NaT # Add placeholder if critical
-        # Process 'sentiment_label' and create 'sentiment_score'
-        if 'sentiment_label' in df_processed.columns:
-            df_processed['sentiment_label'] = df_processed['sentiment_label'].astype(str).fillna('Unknown')
-            df_processed['sentiment_score'] = df_processed['sentiment_label'].apply(self._get_sentiment_score)
-        else:
-            logger.info("'sentiment_label' column not found. Sentiment analysis will be limited.")
-            df_processed['sentiment_label'] = 'Unknown'
-            df_processed['sentiment_score'] = self._get_sentiment_score('Unknown')
-        return df_processed
-    def _extract_time_series_metrics(self, df_processed: pd.DataFrame) -> List[TimeSeriesMetric]:
-        """Extracts monthly time-series metrics from processed mentions data."""
-        ts_metrics = []
-        if df_processed.empty or 'date' not in df_processed.columns or df_processed['date'].isnull().all():
-            logger.info("Cannot extract time-series metrics for mentions: 'date' is missing or all null.")
-            return ts_metrics
-        df_ts = df_processed.dropna(subset=['date']).copy()
-        if df_ts.empty:
-            logger.info("No valid 'date' values for mentions time-series metrics after filtering NaT.")
-            return ts_metrics
-        df_ts['year_month'] = df_ts['date'].dt.strftime('%Y-%m')
-        # Monthly mention volume
-        monthly_volume = df_ts.groupby('year_month').size().reset_index(name='mention_count')
-        if not monthly_volume.empty:
-            ts_metrics.append(TimeSeriesMetric(
-                metric_name="monthly_mention_volume",
-                values=monthly_volume['mention_count'].tolist(),
-                timestamps=monthly_volume['year_month'].tolist(),
-                metric_type="time_series",
-                time_granularity="monthly",
-                unit="count"
-            ))
-        # Monthly average sentiment score
-        if 'sentiment_score' in df_ts.columns:
-            monthly_avg_sentiment = df_ts.groupby('year_month')['sentiment_score'].mean().reset_index()
-            if not monthly_avg_sentiment.empty:
-                ts_metrics.append(TimeSeriesMetric(
-                    metric_name="avg_monthly_sentiment_score",
-                    values=monthly_avg_sentiment['sentiment_score'].tolist(),
-                    timestamps=monthly_avg_sentiment['year_month'].tolist(),
-                    metric_type="time_series",
-                    time_granularity="monthly",
-                    unit="score" # Score range depends on SENTIMENT_MAPPING
-                ))
-        # Monthly distribution of sentiment labels
-        if 'sentiment_label' in df_ts.columns and df_ts['sentiment_label'].nunique() > 1:
-            # Ensure 'sentiment_label' is not all 'Unknown'
-            if not (df_ts['sentiment_label'] == 'Unknown').all():
-                sentiment_counts_by_month = df_ts.groupby(['year_month', 'sentiment_label']).size().unstack(fill_value=0)
-                for sentiment_val in sentiment_counts_by_month.columns:
-                    if sentiment_val == 'Unknown' and (sentiment_counts_by_month[sentiment_val] == 0).all():
-                        continue
-                    ts_metrics.append(TimeSeriesMetric(
-                        metric_name=f"monthly_mention_count_sentiment_{str(sentiment_val).lower().replace(' ', '_').replace('👍','positive').replace('👎','negative').replace('😐','neutral')}",
-                        values=sentiment_counts_by_month[sentiment_val].tolist(),
-                        timestamps=sentiment_counts_by_month.index.tolist(), # year_month is index
-                        metric_type="time_series",
-                        time_granularity="monthly",
-                        unit="count"
-                    ))
-            else:
-                logger.info("Sentiment label data is all 'Unknown', skipping sentiment distribution time series.")
-        return ts_metrics
-    def _calculate_aggregate_metrics(self, df_processed: pd.DataFrame) -> Dict[str, float]:
-        """Calculates aggregate metrics for mentions."""
-        agg_metrics = {}
-        if df_processed.empty:
-            return agg_metrics
-        agg_metrics['total_mentions_analyzed'] = float(len(df_processed))
-        if 'sentiment_score' in df_processed.columns and not df_processed['sentiment_score'].empty:
-            agg_metrics['overall_avg_sentiment_score'] = float(df_processed['sentiment_score'].mean())
-        if 'sentiment_label' in df_processed.columns:
-            total_valid_sentiments = len(df_processed.dropna(subset=['sentiment_label'])) # Count non-NaN labels
-            if total_valid_sentiments > 0:
-                # Iterate through our defined sentiment mapping to count occurrences
-                sentiment_counts = df_processed['sentiment_label'].value_counts()
-                for label, score_val in self.SENTIMENT_MAPPING.items():
-                    # Use a clean key for the metric name
-                    clean_label_key = str(label).lower().replace(' ', '_').replace('👍','positive').replace('👎','negative').replace('😐','neutral')
-                    if clean_label_key == "unknown" and score_val == 0: # Skip generic unknown if it's just a fallback
-                        if sentiment_counts.get(label, 0) == 0 and 'Unknown' not in label : continue
-                    count = sentiment_counts.get(label, 0)
-                    if count > 0 or label == 'Unknown': # Report if count > 0 or if it's the 'Unknown' category itself
-                        agg_metrics[f'{clean_label_key}_mention_ratio'] = float(count / total_valid_sentiments)
-                        agg_metrics[f'{clean_label_key}_mention_count'] = float(count)
-        # Mentions per day/week (if 'date' column is valid)
-        if 'date' in df_processed.columns and not df_processed['date'].isnull().all():
-            df_dated = df_processed.dropna(subset=['date']).sort_values('date')
-            if len(df_dated) > 1:
-                duration_days = (df_dated['date'].max() - df_dated['date'].min()).days
-                if duration_days > 0:
-                    agg_metrics['avg_mentions_per_day'] = float(len(df_dated) / duration_days)
-                    agg_metrics['avg_mentions_per_week'] = float(len(df_dated) / (duration_days / 7.0))
-            elif len(df_dated) == 1: # Single day with mentions
-                 agg_metrics['avg_mentions_per_day'] = float(len(df_dated))
-                 agg_metrics['avg_mentions_per_week'] = float(len(df_dated) * 7) # Extrapolate
-        return agg_metrics
-    def _extract_categorical_metrics(self, df_processed: pd.DataFrame) -> Dict[str, Any]:
-        """Extracts categorical distributions for mentions."""
-        cat_metrics = {}
-        if df_processed.empty:
-            return cat_metrics
-        # Sentiment label distribution (counts and percentages)
-        if 'sentiment_label' in df_processed.columns and df_processed['sentiment_label'].nunique() > 0:
-            cat_metrics['sentiment_label_distribution_percentage'] = df_processed['sentiment_label'].value_counts(normalize=True).apply(lambda x: f"{x:.2%}").to_dict()
-            cat_metrics['sentiment_label_counts'] = df_processed['sentiment_label'].value_counts().to_dict()
-        # Example: If 'mention_source' column existed:
-        # if 'mention_source' in df_processed.columns:
-        #     cat_metrics['mention_source_distribution'] = df_processed['mention_source'].value_counts(normalize=True).to_dict()
-        #     cat_metrics['mention_source_counts'] = df_processed['mention_source'].value_counts().to_dict()
-        return cat_metrics
-    def _extract_time_periods(self, df_processed: pd.DataFrame) -> List[str]:
-        """Extracts unique year-month time periods covered by the mentions data."""
-        if df_processed.empty or 'date' not in df_processed.columns or df_processed['date'].isnull().all():
-            return ["Data period not available or N/A"]
-        if 'year_month' in df_processed.columns: # If already created during TS extraction
-            periods = sorted(df_processed['year_month'].dropna().unique().tolist(), reverse=True)
-        elif 'date' in df_processed.columns: # Derive if not present
-            dates = df_processed['date'].dropna()
-            if not dates.empty:
-                periods = sorted(dates.dt.strftime('%Y-%m').unique().tolist(), reverse=True)
-            else: return ["N/A"]
-        else: return ["N/A"]
-        return periods[:12] # Return up to the last 12 months
-    def analyze_mentions_data(self, mentions_df: pd.DataFrame) -> AgentMetrics:
-        """
-        Generates comprehensive mentions analysis.
-        """
-        if mentions_df is None or mentions_df.empty:
-            logger.warning("Mentions DataFrame is empty. Returning empty metrics.")
-            return AgentMetrics(
-                agent_name=self.AGENT_NAME,
-                analysis_summary="No mentions data provided for analysis.",
-                time_periods_covered=["N/A"]
-            )
-        # 1. Preprocess data
-        df_processed = self._preprocess_mentions_data(mentions_df)
-        if df_processed.empty and not mentions_df.empty:
-             logger.warning("Mentions DataFrame became empty after preprocessing.")
-             return AgentMetrics(
-                agent_name=self.AGENT_NAME,
-                analysis_summary="Mentions data could not be processed.",
-                time_periods_covered=["N/A"]
-            )
-        elif df_processed.empty and mentions_df.empty:
-            return AgentMetrics(agent_name=self.AGENT_NAME, analysis_summary="No mentions data provided.")
-        # 2. Generate textual analysis using PandasAI
-        df_description_for_pandasai = "LinkedIn brand mentions data. Key columns: 'date' (date of mention), 'sentiment_label' (e.g., 'Positive 👍', 'Negative 👎', 'Neutral 😐'), 'sentiment_score' (numeric score from -1.5 to 1.5)."
-        analysis_result_text = "PandasAI analysis for mentions could not be performed."
-        try:
-            pandas_ai_df = pai.DataFrame(df_processed, description=df_description_for_pandasai)
-            analysis_query = f"""
-            Analyze the provided LinkedIn brand mentions data. Focus on:
-            1. Monthly trends in mention volume.
-            2. Monthly trends in sentiment (average 'sentiment_score' and distribution of 'sentiment_label').
-            3. Identify any significant spikes/dips in mentions or shifts in sentiment.
-            Provide a concise summary of brand perception based on this data.
-            """
-            def chat_operation():
-                config = pai.config.get()
-                logger.info(f"pai_config: {config}, Type of config: {type(config)}")
-                if not config.llm:
-                    logger.warning("PandasAI LLM not configured. Attempting to configure now.")
-                    # This assumes configure_pandasai is available and sets the LLM config
-                    from insight_and_tasks.utils.pandasai_setup import configure_pandasai
-                    configure_pandasai(self.api_key, self.model_name)
-                    # Re-check configuration after setup attempt
-                    config = pai.config.get()
-                    if not config.llm:
-                        raise RuntimeError("PandasAI LLM could not be configured for chat operation.")
-                logger.info(f"Executing PandasAI chat for follower analysis with LLM: {config.llm}")
-                return pandas_ai_df.chat(analysis_query)
-            analysis_result_raw = self.retry_mechanism.retry_with_backoff(
-                func=chat_operation, max_retries=2, base_delay=2.0, exceptions=(Exception,)
-            )
-            analysis_result_text = str(analysis_result_raw) if analysis_result_raw else "No textual analysis for mentions generated by PandasAI."
-            logger.info("Mentions analysis via PandasAI completed.")
-        except Exception as e:
-            logger.error(f"Mentions analysis with PandasAI failed: {e}", exc_info=True)
-            analysis_result_text = f"Mentions analysis using PandasAI failed. Error: {str(e)[:200]}"
-        # 3. Extract structured metrics
-        time_series_metrics = self._extract_time_series_metrics(df_processed)
-        aggregate_metrics = self._calculate_aggregate_metrics(df_processed)
-        categorical_metrics = self._extract_categorical_metrics(df_processed)
-        time_periods = self._extract_time_periods(df_processed)
-        return AgentMetrics(
-            agent_name=self.AGENT_NAME,
-            analysis_summary=analysis_result_text[:2000],
-            time_series_metrics=time_series_metrics,
-            aggregate_metrics=aggregate_metrics,
-            categorical_metrics=categorical_metrics,
-            time_periods_covered=time_periods,
-            data_sources_used=[f"mentions_df (shape: {mentions_df.shape}) -> df_processed (shape: {df_processed.shape})"]
-        )
-if __name__ == '__main__':
-    try:
-        from utils.logging_config import setup_logging
-        setup_logging()
-        logger.info("Logging setup for EnhancedMentionsAnalysisAgent test.")
-    except ImportError:
-        logging.basicConfig(level=logging.INFO)
-        logger.warning("Could not import setup_logging. Using basicConfig.")
-    MOCK_API_KEY = os.environ.get("GOOGLE_API_KEY", "test_api_key_mentions")
-    MODEL_NAME = DEFAULT_AGENT_MODEL
-    try:
-        from utils.pandasai_setup import configure_pandasai
-        if MOCK_API_KEY != "test_api_key_mentions":
-            configure_pandasai(MOCK_API_KEY, MODEL_NAME)
-            logger.info("PandasAI configured for testing EnhancedMentionsAnalysisAgent.")
-        else:
-            logger.warning("Using mock API key for mentions. PandasAI chat will likely fail or use a mock.")
-            class MockPandasAIDataFrame:
-                def __init__(self, df, description): self.df = df; self.description = description
-                def chat(self, query): return f"Mock PandasAI mentions response to: {query}"
-            pai.DataFrame = MockPandasAIDataFrame
-    except ImportError:
-        logger.error("utils.pandasai_setup not found. PandasAI will not be configured for mentions.")
-        class MockPandasAIDataFrame:
-            def __init__(self, df, description): self.df = df; self.description = description
-            def chat(self, query): return f"Mock PandasAI mentions response to: {query}"
-        pai.DataFrame = MockPandasAIDataFrame
-    sample_mentions_data = {
-        'date': pd.to_datetime(['2023-01-05', '2023-01-15', '2023-02-02', '2023-02-20', '2023-03-10', '2023-03-12']),
-        'sentiment_label': ['Positive 👍', 'Negative 👎', 'Neutral 😐', 'Positive 👍', 'Positive 👍', 'Unknown'],
-        # 'mention_content': ['Great product!', 'Service was slow.', 'Just a mention.', 'Love the new feature!', 'Highly recommend.', 'Seen this around.']
-    }
-    sample_df_mentions = pd.DataFrame(sample_mentions_data)
-    mentions_agent = EnhancedMentionsAnalysisAgent(api_key=MOCK_API_KEY, model_name=MODEL_NAME)
-    logger.info("Analyzing sample mentions data...")
-    mentions_metrics_result = mentions_agent.analyze_mentions_data(sample_df_mentions)
-    print("\n--- EnhancedMentionsAnalysisAgent Results ---")
-    print(f"Agent Name: {mentions_metrics_result.agent_name}")
-    print(f"Analysis Summary: {mentions_metrics_result.analysis_summary}")
-    print("\nTime Series Metrics (Mentions):")
-    for ts_metric in mentions_metrics_result.time_series_metrics:
-        print(f"  - {ts_metric.metric_name}: {len(ts_metric.values)} data points, e.g., {ts_metric.values[:3]} for ts {ts_metric.timestamps[:3]} (Unit: {ts_metric.unit})")
-    print("\nAggregate Metrics (Mentions):")
-    for key, value in mentions_metrics_result.aggregate_metrics.items():
-        print(f"  - {key}: {value}")
-    print("\nCategorical Metrics (Mentions):")
-    for key, value in mentions_metrics_result.categorical_metrics.items():
-        print(f"  - {key}:")
-        if isinstance(value, dict):
-            for sub_key, sub_value in list(value.items())[:2]: # Print first 2 for brevity
-                 print(f"    - {sub_key}: {sub_value}")
-        else:
-            print(f"    {value}")
-    print(f"\nTime Periods Covered (Mentions): {mentions_metrics_result.time_periods_covered}")
-    # Test with empty DataFrame
-    logger.info("\n--- Testing Mentions Agent with empty DataFrame ---")
-    empty_mentions_metrics = mentions_agent.analyze_mentions_data(pd.DataFrame())
-    print(f"Empty Mentions DF Analysis Summary: {empty_mentions_metrics.analysis_summary}")

features/insight_and_tasks/agents/post_agent.py DELETED Viewed

@@ -1,538 +0,0 @@
-# agents/post_agent.py
-import pandas as pd
-from typing import Dict, List, Any, Optional
-import logging
-import pandasai as pai # Assuming pandasai is imported as pai globally or configured
-from google.adk.agents import LlmAgent # Assuming this is the correct import path
-# Project-specific imports
-from features.insight_and_tasks.utils.retry_mechanism import RetryMechanism
-from features.insight_and_tasks.data_models.metrics import AgentMetrics, TimeSeriesMetric
-# Configure logger for this module
-logger = logging.getLogger(__name__)
-DEFAULT_AGENT_MODEL = "gemini-2.5-flash-preview-05-20"
-class EnhancedPostPerformanceAgent:
-    """
-    Enhanced post performance agent with time-series metric extraction and detailed analysis.
-    """
-    AGENT_NAME = "post_analyst"
-    AGENT_DESCRIPTION = "Expert analyst specializing in content performance trends and engagement patterns."
-    AGENT_INSTRUCTION = """
-    You are a specialized LinkedIn content performance expert focused on temporal engagement patterns,
-    content type effectiveness, and audience interaction.
-    Your role includes:
-    1. ENGAGEMENT TREND ANALYSIS (monthly, using 'published_at'):
-       - Analyze trends for key engagement metrics: likes, comments, shares, overall engagement ('engagement' column), impressions, clicks.
-       - Calculate and analyze engagement rate (engagement / impressionCount) over time.
-       - Calculate and analyze click-through rate (CTR: clickCount / impressionCount) over time.
-       - Identify periods of high/low engagement and potential drivers.
-    2. CONTENT TYPE & TOPIC PERFORMANCE:
-       - Compare performance across different media types (using 'media_type' column).
-       - Analyze performance by content topic/pillar (using 'li_eb_label' column).
-       - Identify which content types/topics drive the most engagement, impressions, or clicks.
-       - Analyze if the effectiveness of certain media types or topics changes over time.
-    3. POSTING BEHAVIOR ANALYSIS:
-       - Analyze posting frequency (e.g., posts per week/month) and its potential impact on overall engagement or reach.
-       - Identify if there are optimal posting times or days based on engagement patterns (if 'published_at' provides time detail).
-    4. SENTIMENT ANALYSIS (if 'sentiment' column is available):
-       - Analyze the distribution of sentiment (e.g., positive, negative, neutral) associated with posts.
-       - Track how average sentiment of posts evolves over time.
-    5. AD PERFORMANCE (if 'is_ad' column is available):
-       - Compare performance (engagement, reach, CTR) of ad posts vs. organic posts.
-    6. METRIC EXTRACTION (for AgentMetrics):
-       - Extract time-series data for average monthly engagement metrics (likes, comments, engagement rate, CTR, etc.).
-       - Provide aggregate performance metrics (e.g., overall average engagement rate, total impressions, top performing media type).
-       - Include distributions for content types, topics, and sentiment as categorical metrics.
-    Focus on actionable insights. What content resonates most? When is the audience most active? How can strategy be improved?
-    Structure your analysis clearly. Use the provided DataFrame columns ('published_at', 'media_type', 'li_eb_label',
-    'likeCount', 'commentCount', 'shareCount', 'engagement', 'impressionCount', 'clickCount', 'sentiment', 'is_ad').
-    """
-    def __init__(self, api_key: str, model_name: Optional[str] = None):
-        self.api_key = api_key
-        self.model_name = model_name or DEFAULT_AGENT_MODEL
-        self.agent = LlmAgent(
-            name=self.AGENT_NAME,
-            model=self.model_name,
-            description=self.AGENT_DESCRIPTION,
-            instruction=self.AGENT_INSTRUCTION
-        )
-        self.retry_mechanism = RetryMechanism()
-        logger.info(f"{self.AGENT_NAME} initialized with model {self.model_name}.")
-    def _preprocess_post_data(self, df: pd.DataFrame) -> pd.DataFrame:
-        """Cleans and prepares post data for analysis."""
-        if df is None or df.empty:
-            return pd.DataFrame()
-        df_processed = df.copy()
-        # Convert 'published_at' to datetime
-        if 'published_at' in df_processed.columns:
-            df_processed['published_at'] = pd.to_datetime(df_processed['published_at'], errors='coerce')
-            # df_processed.dropna(subset=['published_at'], inplace=True) # Keep rows even if date is NaT for other metrics
-        else:
-            logger.warning("'published_at' column not found. Time-series analysis will be limited.")
-            # Add a placeholder if critical for downstream, or handle absence gracefully
-            # df_processed['published_at'] = pd.NaT
-        # Ensure numeric types for engagement metrics, coercing errors and filling NaNs
-        metric_cols = ['likeCount', 'commentCount', 'shareCount', 'engagement', 'impressionCount', 'clickCount']
-        for col in metric_cols:
-            if col in df_processed.columns:
-                df_processed[col] = pd.to_numeric(df_processed[col], errors='coerce').fillna(0)
-            else:
-                logger.info(f"Metric column '{col}' not found in post data. Will be treated as 0.")
-                df_processed[col] = 0 # Add column with zeros if missing
-        # Calculate Engagement Rate and CTR where possible
-        if 'impressionCount' in df_processed.columns and 'engagement' in df_processed.columns:
-            df_processed['engagement_rate'] = df_processed.apply(
-                lambda row: (row['engagement'] / row['impressionCount']) if row['impressionCount'] > 0 else 0.0, axis=1
-            )
-        else:
-            df_processed['engagement_rate'] = 0.0
-        if 'impressionCount' in df_processed.columns and 'clickCount' in df_processed.columns:
-            df_processed['ctr'] = df_processed.apply(
-                lambda row: (row['clickCount'] / row['impressionCount']) if row['impressionCount'] > 0 else 0.0, axis=1
-            )
-        else:
-            df_processed['ctr'] = 0.0
-        # Handle 'is_ad' boolean conversion if it exists
-        if 'is_ad' in df_processed.columns:
-            df_processed['is_ad'] = df_processed['is_ad'].astype(bool)
-        else:
-            df_processed['is_ad'] = False # Assume organic if not specified
-        # Handle 'sentiment' - ensure it's string, fill NaNs
-        if 'sentiment' in df_processed.columns:
-            df_processed['sentiment'] = df_processed['sentiment'].astype(str).fillna('Unknown')
-        else:
-            df_processed['sentiment'] = 'Unknown'
-        # Handle 'media_type' and 'li_eb_label' - ensure string, fill NaNs
-        for col in ['media_type', 'li_eb_label']:
-            if col in df_processed.columns:
-                df_processed[col] = df_processed[col].astype(str).fillna('Unknown')
-            else:
-                df_processed[col] = 'Unknown'
-        return df_processed
-    def _extract_time_series_metrics(self, df_processed: pd.DataFrame) -> List[TimeSeriesMetric]:
-        """Extracts monthly time-series metrics from processed post data."""
-        ts_metrics = []
-        if df_processed.empty or 'published_at' not in df_processed.columns or df_processed['published_at'].isnull().all():
-            logger.info("Cannot extract time-series metrics for posts: 'published_at' is missing or all null.")
-            return ts_metrics
-        # Filter out rows where 'published_at' is NaT for time-series aggregation
-        df_ts = df_processed.dropna(subset=['published_at']).copy()
-        if df_ts.empty:
-            logger.info("No valid 'published_at' dates for post time-series metrics after filtering NaT.")
-            return ts_metrics
-        df_ts['year_month'] = df_ts['published_at'].dt.strftime('%Y-%m')
-        # Metrics to average monthly
-        metrics_to_agg = {
-            'likeCount': 'mean', 'commentCount': 'mean', 'shareCount': 'mean',
-            'engagement': 'mean', 'impressionCount': 'mean', 'clickCount': 'mean',
-            'engagement_rate': 'mean', 'ctr': 'mean'
-        }
-        # Filter out metrics not present in the DataFrame
-        available_metrics_to_agg = {k: v for k, v in metrics_to_agg.items() if k in df_ts.columns}
-        if not available_metrics_to_agg:
-            logger.info("No standard engagement metric columns found for time-series aggregation.")
-        else:
-            monthly_stats = df_ts.groupby('year_month').agg(available_metrics_to_agg).reset_index()
-            timestamps = monthly_stats['year_month'].tolist()
-            for metric_col, agg_type in available_metrics_to_agg.items():
-                # Use original column name, or a more descriptive one like "avg_monthly_likes"
-                ts_metrics.append(TimeSeriesMetric(
-                    metric_name=f"avg_monthly_{metric_col.lower()}",
-                    values=monthly_stats[metric_col].fillna(0).tolist(),
-                    timestamps=timestamps,
-                    metric_type="time_series",
-                    time_granularity="monthly",
-                    unit="%" if "_rate" in metric_col or "ctr" in metric_col else "count"
-                ))
-        # Time series for sentiment distribution (count of posts by sentiment per month)
-        if 'sentiment' in df_ts.columns and df_ts['sentiment'].nunique() > 1 : # if sentiment data exists
-            # Ensure 'sentiment' is not all 'Unknown'
-            if not (df_ts['sentiment'] == 'Unknown').all():
-                sentiment_by_month = df_ts.groupby(['year_month', 'sentiment']).size().unstack(fill_value=0)
-                for sentiment_value in sentiment_by_month.columns:
-                    if sentiment_value == 'Unknown' and (sentiment_by_month[sentiment_value] == 0).all():
-                        continue # Skip if 'Unknown' sentiment has no posts
-                    ts_metrics.append(TimeSeriesMetric(
-                        metric_name=f"monthly_post_count_sentiment_{str(sentiment_value).lower().replace(' ', '_')}",
-                        values=sentiment_by_month[sentiment_value].tolist(),
-                        timestamps=sentiment_by_month.index.tolist(), # year_month is the index
-                        metric_type="time_series",
-                        time_granularity="monthly",
-                        unit="count"
-                    ))
-            else:
-                logger.info("Sentiment data is all 'Unknown', skipping sentiment time series.")
-        # Time series for post count
-        monthly_post_counts = df_ts.groupby('year_month').size().reset_index(name='post_count')
-        if not monthly_post_counts.empty:
-            ts_metrics.append(TimeSeriesMetric(
-                metric_name="monthly_post_count",
-                values=monthly_post_counts['post_count'].tolist(),
-                timestamps=monthly_post_counts['year_month'].tolist(),
-                metric_type="time_series",
-                time_granularity="monthly",
-                unit="count"
-            ))
-        return ts_metrics
-    def _calculate_aggregate_metrics(self, df_processed: pd.DataFrame) -> Dict[str, Any]:
-        """Calculates aggregate performance metrics for posts."""
-        agg_metrics = {}
-        if df_processed.empty:
-            return agg_metrics
-        # Overall averages and totals
-        metric_cols_for_agg = ['likeCount', 'commentCount', 'shareCount', 'engagement',
-                               'impressionCount', 'clickCount', 'engagement_rate', 'ctr']
-        for col in metric_cols_for_agg:
-            if col in df_processed.columns and pd.api.types.is_numeric_dtype(df_processed[col]):
-                agg_metrics[f'overall_avg_{col.lower()}'] = float(df_processed[col].mean())
-                if col not in ['engagement_rate', 'ctr']: # Totals make sense for counts
-                     agg_metrics[f'overall_total_{col.lower()}'] = float(df_processed[col].sum())
-        agg_metrics['total_posts_analyzed'] = float(len(df_processed))
-        # Posting frequency (posts per week)
-        if 'published_at' in df_processed.columns and not df_processed['published_at'].isnull().all():
-            df_dated = df_processed.dropna(subset=['published_at']).sort_values('published_at')
-            if len(df_dated) > 1:
-                # Calculate total duration in days
-                duration_days = (df_dated['published_at'].max() - df_dated['published_at'].min()).days
-                if duration_days > 0:
-                    agg_metrics['avg_posts_per_week'] = float(len(df_dated) / (duration_days / 7.0))
-                elif len(df_dated) > 0: # All posts on the same day or within a day
-                    agg_metrics['avg_posts_per_week'] = float(len(df_dated) * 7) # Extrapolate
-            elif len(df_dated) == 1:
-                 agg_metrics['avg_posts_per_week'] = 7.0 # One post, extrapolate to 7 per week
-        # Performance by media type and topic (as tables/structured dicts)
-        agg_metrics['performance_by_media_type'] = self._create_performance_table(df_processed, 'media_type')
-        agg_metrics['performance_by_topic'] = self._create_performance_table(df_processed, 'li_eb_label')
-        return agg_metrics
-    def _create_performance_table(self, df: pd.DataFrame, group_column: str) -> Dict[str, Any]:
-        """Helper to create a structured performance table for categorical analysis."""
-        if group_column not in df.columns or df[group_column].isnull().all() or (df[group_column] == 'Unknown').all():
-            return {"message": f"No data or only 'Unknown' values for grouping by {group_column}."}
-        # Filter out 'Unknown' category if it's the only one or for cleaner tables
-        df_filtered = df[df[group_column] != 'Unknown']
-        if df_filtered.empty: # If filtering 'Unknown' leaves no data, use original df but acknowledge
-            df_filtered = df
-            logger.info(f"Performance table for {group_column} includes 'Unknown' as it's the only/main category.")
-        # Define metrics to aggregate
-        agg_config = {
-            'engagement': 'mean',
-            'impressionCount': 'mean',
-            'clickCount': 'mean',
-            'likeCount': 'mean',
-            'commentCount': 'mean',
-            'shareCount': 'mean',
-            'engagement_rate': 'mean',
-            'ctr': 'mean',
-            'published_at': 'count'  # To get number of posts per category
-        }
-        # Filter config for columns that actually exist in df_filtered
-        valid_agg_config = {k: v for k, v in agg_config.items() if k in df_filtered.columns or k == 'published_at'} # 'published_at' for count
-        if not valid_agg_config or 'published_at' not in valid_agg_config : # Need at least one metric or count
-             return {"message": f"Not enough relevant metric columns to create performance table for {group_column}."}
-        try:
-            # Group by the specified column and aggregate
-            # Rename 'published_at' count to 'num_posts' for clarity
-            grouped = df_filtered.groupby(group_column).agg(valid_agg_config).rename(
-                columns={'published_at': 'num_posts'}
-            ).reset_index()
-            # Sort by a primary engagement metric, e.g., average engagement rate or num_posts
-            sort_key = 'num_posts'
-            if 'engagement_rate' in grouped.columns:
-                sort_key = 'engagement_rate'
-            elif 'engagement' in grouped.columns:
-                 sort_key = 'engagement'
-            grouped = grouped.sort_values(by=sort_key, ascending=False)
-            # Prepare for JSON serializable output
-            table_data = []
-            for _, row in grouped.iterrows():
-                row_dict = {'category': row[group_column]}
-                for col in grouped.columns:
-                    if col == group_column: continue
-                    value = row[col]
-                    if isinstance(value, (int, float)):
-                        if "_rate" in col or "ctr" in col:
-                            row_dict[col] = f"{value:.2%}" # Percentage
-                        else:
-                            row_dict[col] = round(value, 2) if isinstance(value, float) else value
-                    else:
-                        row_dict[col] = str(value)
-                table_data.append(row_dict)
-            return {
-                "grouping_column": group_column,
-                "columns_reported": [col for col in grouped.columns.tolist() if col != group_column],
-                "data": table_data,
-                "note": f"Top categories by {sort_key}."
-            }
-        except Exception as e:
-            logger.error(f"Error creating performance table for {group_column}: {e}", exc_info=True)
-            return {"error": f"Could not generate table for {group_column}: {e}"}
-    def _extract_categorical_metrics(self, df_processed: pd.DataFrame) -> Dict[str, Any]:
-        """Extracts distributions and other categorical insights for posts."""
-        cat_metrics = {}
-        if df_processed.empty:
-            return cat_metrics
-        # Media type distribution
-        if 'media_type' in df_processed.columns and df_processed['media_type'].nunique() > 0:
-            cat_metrics['media_type_distribution'] = df_processed['media_type'].value_counts(normalize=True).apply(lambda x: f"{x:.2%}").to_dict()
-            cat_metrics['media_type_counts'] = df_processed['media_type'].value_counts().to_dict()
-        # Topic distribution (li_eb_label)
-        if 'li_eb_label' in df_processed.columns and df_processed['li_eb_label'].nunique() > 0:
-            cat_metrics['topic_distribution'] = df_processed['li_eb_label'].value_counts(normalize=True).apply(lambda x: f"{x:.2%}").to_dict()
-            cat_metrics['topic_counts'] = df_processed['li_eb_label'].value_counts().to_dict()
-        # Sentiment distribution
-        if 'sentiment' in df_processed.columns and df_processed['sentiment'].nunique() > 0:
-            cat_metrics['sentiment_distribution'] = df_processed['sentiment'].value_counts(normalize=True).apply(lambda x: f"{x:.2%}").to_dict()
-            cat_metrics['sentiment_counts'] = df_processed['sentiment'].value_counts().to_dict()
-        # Ad vs. Organic performance summary
-        if 'is_ad' in df_processed.columns:
-            ad_summary = {}
-            for ad_status in [True, False]:
-                subset = df_processed[df_processed['is_ad'] == ad_status]
-                if not subset.empty:
-                    label = "ad" if ad_status else "organic"
-                    ad_summary[f'{label}_post_count'] = int(len(subset))
-                    ad_summary[f'{label}_avg_engagement_rate'] = float(subset['engagement_rate'].mean())
-                    ad_summary[f'{label}_avg_impressions'] = float(subset['impressionCount'].mean())
-                    ad_summary[f'{label}_avg_ctr'] = float(subset['ctr'].mean())
-            if ad_summary:
-                cat_metrics['ad_vs_organic_summary'] = ad_summary
-        return cat_metrics
-    def _extract_time_periods(self, df_processed: pd.DataFrame) -> List[str]:
-        """Extracts unique year-month time periods covered by the post data."""
-        if df_processed.empty or 'published_at' not in df_processed.columns or df_processed['published_at'].isnull().all():
-            return ["Data period not available or N/A"]
-        # Use already created 'year_month' if available from preprocessing, or derive it
-        if 'year_month' in df_processed.columns:
-             periods = sorted(df_processed['year_month'].dropna().unique().tolist(), reverse=True)
-        elif 'published_at' in df_processed.columns: # Derive if not present
-            dates = df_processed['published_at'].dropna()
-            if not dates.empty:
-                periods = sorted(dates.dt.strftime('%Y-%m').unique().tolist(), reverse=True)
-            else: return ["N/A"]
-        else: return ["N/A"]
-        return periods[:12] # Return up to the last 12 months
-    def analyze_post_data(self, post_df: pd.DataFrame) -> AgentMetrics:
-        """
-        Generates comprehensive post performance analysis.
-        """
-        if post_df is None or post_df.empty:
-            logger.warning("Post DataFrame is empty. Returning empty metrics.")
-            return AgentMetrics(
-                agent_name=self.AGENT_NAME,
-                analysis_summary="No post data provided for analysis.",
-                time_periods_covered=["N/A"]
-            )
-        # 1. Preprocess data
-        df_processed = self._preprocess_post_data(post_df)
-        if df_processed.empty and not post_df.empty : # Preprocessing resulted in empty df
-             logger.warning("Post DataFrame became empty after preprocessing. Original data might have issues.")
-             return AgentMetrics(
-                agent_name=self.AGENT_NAME,
-                analysis_summary="Post data could not be processed (e.g., all dates invalid).",
-                time_periods_covered=["N/A"]
-            )
-        elif df_processed.empty and post_df.empty: # Was already empty
-            # This case is handled by the initial check, but as a safeguard:
-            return AgentMetrics(agent_name=self.AGENT_NAME, analysis_summary="No post data provided.")
-        # 2. Generate textual analysis using PandasAI (similar to follower agent)
-        df_description_for_pandasai = "LinkedIn post performance data. Key columns: 'published_at' (date of post), 'media_type' (e.g., IMAGE, VIDEO, ARTICLE), 'li_eb_label' (content topic/pillar), 'likeCount', 'commentCount', 'shareCount', 'engagement' (sum of reactions, comments, shares), 'impressionCount', 'clickCount', 'sentiment' (post sentiment), 'is_ad' (boolean), 'engagement_rate', 'ctr'."
-        analysis_result_text = "PandasAI analysis for posts could not be performed."
-        try:
-            # Ensure PandasAI is configured
-            pandas_ai_df = pai.DataFrame(df_processed, description=df_description_for_pandasai)
-            analysis_query = f"""
-            Analyze the provided LinkedIn post performance data. Focus on:
-            1. Monthly trends for key metrics (engagement, impressions, engagement rate, CTR).
-            2. Performance comparison by 'media_type' and 'li_eb_label'. Which ones are most effective?
-            3. Impact of posting frequency (if derivable from 'published_at' timestamps).
-            4. Sentiment trends and distribution.
-            5. Differences in performance between ad posts ('is_ad'=True) and organic posts.
-            Provide a concise summary of findings and actionable recommendations.
-            """
-            def chat_operation():
-                config = pai.config.get()
-                logger.info(f"pai_config: {config}, Type of config: {type(config)}")
-                if not config.llm:
-                    logger.warning("PandasAI LLM not configured. Attempting to configure now.")
-                    # This assumes configure_pandasai is available and sets the LLM config
-                    from insight_and_tasks.utils.pandasai_setup import configure_pandasai
-                    configure_pandasai(self.api_key, self.model_name)
-                    # Re-check configuration after setup attempt
-                    config = pai.config.get()
-                    if not config.llm:
-                        raise RuntimeError("PandasAI LLM could not be configured for chat operation.")
-                logger.info(f"Executing PandasAI chat for follower analysis with LLM: {config.llm}")
-                return pandas_ai_df.chat(analysis_query)
-            analysis_result_raw = self.retry_mechanism.retry_with_backoff(
-                func=chat_operation, max_retries=2, base_delay=2.0, exceptions=(Exception,)
-            )
-            analysis_result_text = str(analysis_result_raw) if analysis_result_raw else "No textual analysis for posts generated by PandasAI."
-            logger.info("Post performance analysis via PandasAI completed.")
-        except Exception as e:
-            logger.error(f"Post analysis with PandasAI failed: {e}", exc_info=True)
-            analysis_result_text = f"Post analysis using PandasAI failed. Error: {str(e)[:200]}"
-        # 3. Extract structured metrics
-        time_series_metrics = self._extract_time_series_metrics(df_processed)
-        aggregate_metrics = self._calculate_aggregate_metrics(df_processed)
-        categorical_metrics = self._extract_categorical_metrics(df_processed)
-        time_periods = self._extract_time_periods(df_processed)
-        return AgentMetrics(
-            agent_name=self.AGENT_NAME,
-            analysis_summary=analysis_result_text[:2000],
-            time_series_metrics=time_series_metrics,
-            aggregate_metrics=aggregate_metrics,
-            categorical_metrics=categorical_metrics,
-            time_periods_covered=time_periods,
-            data_sources_used=[f"post_df (shape: {post_df.shape}) -> df_processed (shape: {df_processed.shape})"]
-        )
-if __name__ == '__main__':
-    try:
-        from utils.logging_config import setup_logging
-        setup_logging()
-        logger.info("Logging setup for EnhancedPostPerformanceAgent test.")
-    except ImportError:
-        logging.basicConfig(level=logging.INFO)
-        logger.warning("Could not import setup_logging. Using basicConfig.")
-    MOCK_API_KEY = os.environ.get("GOOGLE_API_KEY", "test_api_key_posts")
-    MODEL_NAME = DEFAULT_AGENT_MODEL
-    try:
-        from utils.pandasai_setup import configure_pandasai
-        if MOCK_API_KEY != "test_api_key_posts":
-            configure_pandasai(MOCK_API_KEY, MODEL_NAME)
-            logger.info("PandasAI configured for testing EnhancedPostPerformanceAgent.")
-        else:
-            logger.warning("Using mock API key for posts. PandasAI chat will likely fail or use a mock.")
-            class MockPandasAIDataFrame:
-                def __init__(self, df, description): self.df = df; self.description = description
-                def chat(self, query): return f"Mock PandasAI post response to: {query}"
-            pai.DataFrame = MockPandasAIDataFrame
-    except ImportError:
-        logger.error("utils.pandasai_setup not found. PandasAI will not be configured for posts.")
-        class MockPandasAIDataFrame:
-            def __init__(self, df, description): self.df = df; self.description = description
-            def chat(self, query): return f"Mock PandasAI post response to: {query}"
-        pai.DataFrame = MockPandasAIDataFrame
-    sample_post_data = {
-        'published_at': pd.to_datetime(['2023-01-15', '2023-01-20', '2023-02-10', '2023-02-25', '2023-03-05', None]),
-        'media_type': ['IMAGE', 'VIDEO', 'IMAGE', 'ARTICLE', 'IMAGE', 'IMAGE'],
-        'li_eb_label': ['Product Update', 'Company Culture', 'Product Update', 'Industry Insights', 'Company Culture', 'Product Update'],
-        'likeCount': [100, 150, 120, 80, 200, 50],
-        'commentCount': [10, 20, 15, 5, 25, 3],
-        'shareCount': [5, 10, 8, 2, 12, 1],
-        'engagement': [115, 180, 143, 87, 237, 54], # Sum of likes, comments, shares
-        'impressionCount': [1000, 1500, 1200, 900, 2000, 600],
-        'clickCount': [50, 70, 60, 30, 90, 20],
-        'sentiment': ['Positive 👍', 'Positive 👍', 'Neutral 😐', 'Positive 👍', 'Negative 👎', 'Positive 👍'],
-        'is_ad': [False, False, True, False, False, True]
-    }
-    sample_df_posts = pd.DataFrame(sample_post_data)
-    post_agent = EnhancedPostPerformanceAgent(api_key=MOCK_API_KEY, model_name=MODEL_NAME)
-    logger.info("Analyzing sample post data...")
-    post_metrics_result = post_agent.analyze_post_data(sample_df_posts)
-    print("\n--- EnhancedPostPerformanceAgent Results ---")
-    print(f"Agent Name: {post_metrics_result.agent_name}")
-    print(f"Analysis Summary: {post_metrics_result.analysis_summary}")
-    print("\nTime Series Metrics (Post):")
-    for ts_metric in post_metrics_result.time_series_metrics:
-         print(f"  - {ts_metric.metric_name}: {len(ts_metric.values)} data points, e.g., {ts_metric.values[:3]} for ts {ts_metric.timestamps[:3]} (Unit: {ts_metric.unit})")
-    print("\nAggregate Metrics (Post):")
-    for key, value in post_metrics_result.aggregate_metrics.items():
-        if isinstance(value, dict) and 'data' in value: # Performance table
-            print(f"  - {key}: (Table - {value.get('grouping_column', '')}) - {len(value['data'])} categories")
-            for item in value['data'][:1]: # Print first item for brevity
-                print(f"      Example Category '{item.get('category')}': { {k:v for k,v in item.items() if k!='category'} }")
-        else:
-            print(f"  - {key}: {value}")
-    print("\nCategorical Metrics (Post):")
-    for key, value in post_metrics_result.categorical_metrics.items():
-        print(f"  - {key}:")
-        if isinstance(value, dict):
-            for sub_key, sub_value in list(value.items())[:2]:
-                 print(f"    - {sub_key}: {sub_value}")
-        else:
-             print(f"    {value}")
-    print(f"\nTime Periods Covered (Post): {post_metrics_result.time_periods_covered}")
-    # Test with empty DataFrame
-    logger.info("\n--- Testing Post Agent with empty DataFrame ---")
-    empty_post_metrics = post_agent.analyze_post_data(pd.DataFrame())
-    print(f"Empty Post DF Analysis Summary: {empty_post_metrics.analysis_summary}")

features/insight_and_tasks/agents/task_extraction_agent.py DELETED Viewed

@@ -1,400 +0,0 @@
-# agents/task_extraction_agent.py
-import logging
-from typing import Optional
-from datetime import datetime, date # Ensure date is imported if used for type hints
-from google.adk.agents import LlmAgent
-from google.adk.runners import InMemoryRunner # Assuming this is used for direct agent running
-from google.genai import types as genai_types # For constructing ADK agent inputs
-# Project-specific imports
-from features.insight_and_tasks.data_models.tasks import (
-    TaskExtractionOutput,
-    OKR,
-    KeyResult,
-    Task,
-    EffortLevel,
-    TimelineCategory,
-    PriorityLevel,
-    TaskType,
-    DataSubject # Ensure all are imported
-)
-from features.insight_and_tasks.utils.retry_mechanism import RetryMechanism # If retries are needed for ADK calls
-# Configure logger for this module
-logger = logging.getLogger(__name__)
-DEFAULT_AGENT_MODEL = "gemini-2.5-flash-preview-05-20" # Or your specific model
-class TaskExtractionAgent:
-    """
-    Agent specialized in extracting actionable tasks and OKRs from analysis insights,
-    with awareness of the current date and quarter.
-    """
-    AGENT_NAME = "task_extractor"
-    AGENT_DESCRIPTION = "Specialist in converting strategic insights into specific, time-aware actionable tasks and OKRs."
-    def __init__(self, api_key: str, model_name: Optional[str] = None, current_date: Optional[date] = None):
-        """
-        Initializes the TaskExtractionAgent.
-        Args:
-            api_key: API key (may be used by LlmAgent configuration or future needs).
-            model_name: Name of the language model to use.
-            current_date: The current date to use for quarter calculations. Defaults to today.
-        """
-        self.api_key = api_key # Store if needed by LlmAgent or other components
-        self.model_name = model_name or DEFAULT_AGENT_MODEL
-        self.current_date = current_date or datetime.utcnow().date() # Use date object for consistency
-        # LlmAgent is initialized with dynamic instruction and output schema
-        self.agent = LlmAgent(
-            name=self.AGENT_NAME,
-            model=self.model_name,
-            description=self.AGENT_DESCRIPTION,
-            instruction=self._get_instruction_prompt(), # Instruction generated dynamically
-            output_schema=TaskExtractionOutput, # Pydantic model for structured output
-            output_key="extracted_tasks_okrs"   # Key where LlmAgent stores structured output in state
-        )
-        self.retry_mechanism = RetryMechanism() # For retrying ADK runner if needed
-        logger.info(f"{self.AGENT_NAME} initialized for Q{self._get_quarter(self.current_date)}, "
-                    f"{self._days_until_quarter_end(self.current_date)} days remaining in quarter. Model: {self.model_name}")
-    def _get_quarter(self, d: date) -> int:
-        """Calculates the quarter for a given date."""
-        return (d.month - 1) // 3 + 1
-    def _days_until_quarter_end(self, d: date) -> int:
-        """Calculates the number of days remaining in the current quarter from date d."""
-        current_q = self._get_quarter(d)
-        year = d.year
-        if current_q == 1:
-            quarter_end_date = date(year, 3, 31)
-        elif current_q == 2:
-            quarter_end_date = date(year, 6, 30)
-        elif current_q == 3:
-            quarter_end_date = date(year, 9, 30)
-        else:  # Quarter 4
-            quarter_end_date = date(year, 12, 31)
-        days_remaining = (quarter_end_date - d).days
-        return max(0, days_remaining) # Ensure non-negative
-    def _get_instruction_prompt(self) -> str:
-        """Generates the dynamic instruction string for the LLM agent."""
-        quarter = self._get_quarter(self.current_date)
-        days_remaining = self._days_until_quarter_end(self.current_date)
-        return f"""
-        You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and KeyResults (OKRs) framework.
-        CURRENT CONTEXTUAL INFORMATION (CRITICAL - Use these exact values in your output where specified):
-        - Current Quarter: Q{quarter}
-        - Days remaining in current quarter: {days_remaining}
-        - Today's Date (for your context only, not for direct output unless specified by a schema field): {self.current_date.isoformat()}
-         For EACH 'OKR' object, you MUST generate a 'key_results' array containing 1 to 3 'KeyResult' objects.
-         For EACH 'KeyResult' object, you MUST generate a 'tasks' array containing 1 to 3 'Task' objects.
-         It is CRITICAL that you populate the 'key_results' list for every OKR, and the 'tasks' list for every KeyResult.
-        KEY GUIDELINES FOR QUALITY AND ACCURACY:
-        - Actionability: All descriptions (Objective, Key Result, Task) must be clear, concise, and define concrete actions or measurable outcomes.
-        - Measurability: Key Results and Task 'success_criteria_metrics' must be specific and quantifiable.
-        - Completeness: Ensure all REQUIRED fields in every Pydantic model are present in your JSON output. Optional fields can be omitted or set to null if not applicable.
-        INPUT:
-        You will receive a 'comprehensive_analysis' text.
-        OUTPUT FORMAT:
-        # Example of the overall JSON structure (content is illustrative; refer to schemas for full details):
-        {{
-          "current_quarter_info": "Q{quarter}, {days_remaining} days remaining",
-          "okrs": [
-            {{
-              "objective_description": "Example: Elevate brand visibility and engagement across key digital channels.",
-              "objective_timeline": "{TimelineCategory.SHORT_TERM.value}",
-              "objective_owner": "Marketing Department",
-              "key_results": [
-                {{
-                  "key_result_description": "Example: Increase organic reach on LinkedIn by 15%.",
-                  "target_metric": "LinkedIn Organic Reach Percentage Increase",
-                  "target_value": "15%",
-                  "tasks": [
-                    {{
-                      "task_category": "Content Strategy",
-                      "task_description": "Develop and schedule a 4-week content calendar for LinkedIn focusing on industry insights.",
-                      "objective_deliverable": "Deliverable: A finalized 4-week content calendar with 3 posts per week, approved and scheduled.",
-                      "effort": "{EffortLevel.MEDIUM.value}",
-                      "timeline": "{TimelineCategory.IMMEDIATE.value}",
-                      "responsible_party": "Content Marketing Manager",
-                      "success_criteria_metrics": "Content calendar completed and approved by [Date]. All posts scheduled by [Date].",
-                      "dependencies_prerequisites": "Completion of Q{quarter} keyword research and audience persona refinement.",
-                      "priority": "{PriorityLevel.HIGH.value}",
-                      "priority_justification": "Critical for maintaining consistent brand voice and achieving engagement targets for the quarter.",
-                      "why_proposed": "Analysis of LinkedIn insights report (Page 3) showed a 20% drop in engagement last month, attributed to inconsistent posting schedule and lack of targeted content themes.",
-                      "task_type": "{TaskType.INITIATIVE.value}",
-                      "data_subject": "{DataSubject.POSTS.value}"
-                    }}
-                  ]
-                }}
-              ]
-            }}
-          ],
-          "overall_strategic_focus": "Example: Focus on data-driven content strategy and proactive community engagement to boost Q{quarter} performance.",
-          "generation_timestamp": "{datetime.utcnow().isoformat()}Z"
-        }}
-        Focus on precision, quality, actionability, and strict adherence to the specified JSON output schema and all constraints.
-        Ensure all string values in the JSON are properly escaped if they contain special characters (e.g., newlines, quotes).
-        """
-    async def extract_tasks(self, comprehensive_analysis: str) -> TaskExtractionOutput:
-        """
-        Extracts time-aware actionable tasks from the comprehensive analysis text.
-        Args:
-            comprehensive_analysis: The text analysis from which to extract tasks.
-        Returns:
-            A TaskExtractionOutput Pydantic model instance.
-        """
-        if not comprehensive_analysis or not comprehensive_analysis.strip():
-            logger.warning("Comprehensive analysis text is empty. Cannot extract tasks.")
-            return TaskExtractionOutput(
-                current_quarter_info=f"Q{self._get_quarter(self.current_date)}, {self._days_until_quarter_end(self.current_date)} days remaining",
-                okrs=[],
-                overall_strategic_focus="No analysis provided to extract tasks."
-            )
-        # The LlmAgent's instruction already contains the dynamic date info and output format.
-        # The input to the agent's run method will be the comprehensive_analysis.
-        prompt_for_adk_agent = f"""
-        Comprehensive Analysis for Task Extraction:
-        ---
-        {comprehensive_analysis}
-        ---
-        Based on the analysis above, and adhering strictly to your primary instructions (especially regarding current quarter context, task field requirements, and JSON output schema 'TaskExtractionOutput'), generate the OKRs and tasks.
-        Ensure the 'current_quarter_info' field in your JSON output is exactly: "Q{self._get_quarter(self.current_date)}, {self._days_until_quarter_end(self.current_date)} days remaining".
-        """
-        user_input_content = genai_types.Content(
-            role="user",
-            parts=[genai_types.Part(text=prompt_for_adk_agent)]
-        )
-        # Using InMemoryRunner as per original structure for LlmAgent with output_schema
-        runner = InMemoryRunner(agent=self.agent, app_name=f"{self.AGENT_NAME}Runner")
-        # Generate a unique user_id for each run to ensure fresh session state if needed.
-        user_id = f"system_user_task_extractor_{int(datetime.utcnow().timestamp())}"
-        session = await runner.session_service.create_session(
-            app_name=f"{self.AGENT_NAME}Runner",
-            user_id=user_id
-        )
-        extracted_data_dict = None
-        full_response_text_for_debug = "" # To capture raw text if parsing fails
-        try:
-            logger.info(f"Running TaskExtractionAgent for user_id: {user_id}, session_id: {session.id}")
-            # Fix: Use regular for loop instead of async for, since runner.run() returns a generator
-            run_result = runner.run(
-                user_id=user_id,
-                session_id=session.id,
-                new_message=user_input_content
-            )
-            # Check if it's an async iterator or regular generator
-            if hasattr(run_result, '__aiter__'):
-                # It's an async iterator, use async for
-                async for event in run_result:
-                    if (hasattr(event, 'actions') and event.actions and
-                        hasattr(event.actions, 'state_delta') and
-                        isinstance(event.actions.state_delta, dict) and
-                        self.agent.output_key in event.actions.state_delta):
-                        extracted_data_dict = event.actions.state_delta[self.agent.output_key]
-                        logger.info(f"Successfully extracted structured data via LlmAgent state_delta.")
-                        break
-                    # Capture text parts for debugging if direct structured output isn't found first
-                    if hasattr(event, 'content') and event.content and event.content.parts:
-                        for part in event.content.parts:
-                            if hasattr(part, 'text'):
-                                 full_response_text_for_debug += part.text
-            else:
-                # It's a regular generator, use regular for loop
-                for event in run_result:
-                    if (hasattr(event, 'actions') and event.actions and
-                        hasattr(event.actions, 'state_delta') and
-                        isinstance(event.actions.state_delta, dict) and
-                        self.agent.output_key in event.actions.state_delta):
-                        extracted_data_dict = event.actions.state_delta[self.agent.output_key]
-                        logger.info(f"Successfully extracted structured data via LlmAgent state_delta.")
-                        break
-                    # Capture text parts for debugging if direct structured output isn't found first
-                    if hasattr(event, 'content') and event.content and event.content.parts:
-                        for part in event.content.parts:
-                            if hasattr(part, 'text'):
-                                 full_response_text_for_debug += part.text
-            if not extracted_data_dict and full_response_text_for_debug:
-                logger.warning("LlmAgent did not produce structured output in state_delta. Raw text response was: %s",
-                               full_response_text_for_debug[:500] + "...")
-        except Exception as e:
-            logger.error(f"Error during TaskExtractionAgent execution: {e}", exc_info=True)
-        finally:
-            try:
-                await runner.session_service.delete_session(
-                    app_name=f"{self.AGENT_NAME}Runner", user_id=user_id, session_id=session.id
-                )
-            except Exception as session_del_e:
-                logger.error(f"Error deleting task extractor session: {session_del_e}")
-        if extracted_data_dict:
-            if isinstance(extracted_data_dict, TaskExtractionOutput): # Already a Pydantic model
-                return extracted_data_dict
-            elif isinstance(extracted_data_dict, dict): # If it's a dict, parse it
-                try:
-                    return TaskExtractionOutput(**extracted_data_dict)
-                except Exception as pydantic_error:
-                    logger.error(f"Error parsing extracted dictionary into TaskExtractionOutput: {pydantic_error}", exc_info=True)
-                    logger.error(f"Problematic dictionary data: {extracted_data_dict}")
-            else:
-                logger.error(f"Extracted data is not a dictionary or TaskExtractionOutput model: {type(extracted_data_dict)}")
-        # Fallback if no valid data extracted
-        logger.warning("No valid structured data extracted by TaskExtractionAgent.")
-        return TaskExtractionOutput(
-            current_quarter_info=f"Q{self._get_quarter(self.current_date)}, {self._days_until_quarter_end(self.current_date)} days remaining",
-            okrs=[],
-            overall_strategic_focus="Failed to extract tasks or no tasks were identified.",
-            generation_timestamp=datetime.utcnow().isoformat()
-        )
-    def update_current_date(self, new_date: date):
-        """
-        Updates the current date for the agent and re-initializes the LlmAgent
-        to reflect the new date context in its instructions.
-        """
-        self.current_date = new_date
-        # Re-initialize the LlmAgent with the new instruction based on the new date
-        self.agent = LlmAgent(
-            name=self.AGENT_NAME,
-            model=self.model_name,
-            description=self.AGENT_DESCRIPTION,
-            instruction=self._get_instruction_prompt(), # Get updated instruction
-            output_schema=TaskExtractionOutput,
-            output_key="extracted_tasks_okrs"
-        )
-        logger.info(f"{self.AGENT_NAME} date updated. New context: Q{self._get_quarter(self.current_date)}, "
-                    f"{self._days_until_quarter_end(self.current_date)} days remaining.")
-if __name__ == '__main__':
-    import asyncio
-    # (Ensure logging_config.py is in the same directory or PYTHONPATH is set for this example to run standalone)
-    try:
-        from utils.logging_config import setup_logging
-        setup_logging()
-        logger.info("Logging setup for TaskExtractionAgent test.")
-    except ImportError:
-        logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-        logger.warning("logging_config.py not found, using basicConfig for logging.")
-    MOCK_API_KEY = os.environ.get("GOOGLE_API_KEY", "test_api_key_task_extractor") # Use your actual key or env var
-    MODEL_NAME = DEFAULT_AGENT_MODEL
-    # Example comprehensive analysis text (replace with actual analysis output)
-    sample_analysis_text = """
-    Overall Summary: Follower growth is steady at 5% MoM. Post engagement is highest for video content
-    (avg 8% engagement rate) published on weekdays. However, mentions sentiment dipped in the last month
-    (-0.2 avg score) due to complaints about customer service response times.
-    Key opportunity: Improve customer service communication and leverage video content more effectively.
-    Strategic Recommendation: Launch a 'Customer First' initiative and create a video series showcasing customer success stories.
-    """
-    # Test with a specific date
-    test_date = date(2025, 4, 15) # Example: Mid-Q2 2025
-    task_agent = TaskExtractionAgent(api_key=MOCK_API_KEY, model_name=MODEL_NAME, current_date=test_date)
-    logger.info(f"Task Agent Instruction for test_date {test_date}:\n{task_agent._get_instruction_prompt()[:500]}...")
-    async def run_extraction():
-        logger.info("Extracting tasks from sample analysis...")
-        # In a real scenario, ensure GOOGLE_API_KEY is set if the LlmAgent makes actual calls.
-        # For local tests without real API calls, the LlmAgent might behave as a mock or require specific test setup.
-        if MOCK_API_KEY == "test_api_key_task_extractor":
-             logger.warning("Using a mock API key. LlmAgent behavior might be limited or mocked for task extraction.")
-             # Mock the runner if no real API call should be made
-             class MockADKRunner:
-                def __init__(self, agent, app_name): self.agent = agent
-                async def session_service_create_session(self, app_name, user_id):
-                    class MockSession: id = "mock_session_id"
-                    return MockSession()
-                async def run(self, user_id, session_id, new_message):
-                    # Simulate a response structure
-                    mock_okr = OKR(
-                        objective_description="Improve Customer Satisfaction",
-                        key_results=[KeyResult(
-                            key_result_description="Reduce negative mentions by 10%",
-                            tasks=[Task(
-                                task_category="Customer Service", task_description="Respond to all negative mentions within 2 hours.",
-                                objective_deliverable="Improved response time.", effort=EffortLevel.MEDIUM, timeline=TimelineCategory.IMMEDIATE,
-                                responsible_party="Support Team", success_criteria_metrics="Avg response time < 2hrs.",
-                                priority=PriorityLevel.HIGH, priority_justification="Critical for reputation.",
-                                why_proposed="Analysis showed dip in sentiment due to slow responses.", task_type=TaskType.INITIATIVE,
-                                data_subject=DataSubject.MENTIONS
-                            )]
-                        )],
-                        objective_timeline=TimelineCategory.SHORT_TERM
-                    )
-                    mock_output = TaskExtractionOutput(
-                        current_quarter_info=f"Q{task_agent._get_quarter(task_agent.current_date)}, {task_agent._days_until_quarter_end(task_agent.current_date)} days remaining",
-                        okrs=[mock_okr],
-                        overall_strategic_focus="Focus on customer service improvement."
-                    )
-                    # Simulate the event structure LlmAgent with output_schema would produce
-                    class MockEvent:
-                        def __init__(self):
-                            self.actions = type('Actions', (), {'state_delta': {task_agent.agent.output_key: mock_output.model_dump()}})() # .model_dump() for Pydantic v2
-                    yield MockEvent()
-                async def session_service_delete_session(self, app_name, user_id, session_id): pass
-             # Monkey patch the InMemoryRunner for this test if using mock key
-             global InMemoryRunner
-             OriginalInMemoryRunner = InMemoryRunner
-             InMemoryRunner = MockADKRunner
-        extracted_okrs_output = await task_agent.extract_tasks(sample_analysis_text)
-        # Restore InMemoryRunner if it was patched
-        if MOCK_API_KEY == "test_api_key_task_extractor" and 'OriginalInMemoryRunner' in globals():
-            InMemoryRunner = OriginalInMemoryRunner
-        print("\n--- TaskExtractionAgent Results ---")
-        if extracted_okrs_output:
-            print(f"Current Quarter Info: {extracted_okrs_output.current_quarter_info}")
-            print(f"Overall Strategic Focus: {extracted_okrs_output.overall_strategic_focus}")
-            print(f"Generated Timestamp: {extracted_okrs_output.generation_timestamp}")
-            print("\nOKRs Extracted:")
-            # Use .model_dump_json() for Pydantic v2 for pretty printing
-            print(extracted_okrs_output.model_dump_json(indent=2))
-        else:
-            print("No OKRs extracted or an error occurred.")
-    if __name__ == '__main__': # This check is technically inside another if __name__ == '__main__'
-        asyncio.run(run_extraction())
-        # Example of updating date
-        logger.info("\n--- Updating date for Task Agent ---")
-        new_test_date = date(2025, 10, 5) # Q4
-        task_agent.update_current_date(new_test_date)
-        # The instruction within task_agent.agent is now updated.
-        # logger.info(f"Task Agent NEW Instruction for test_date {new_test_date}:\n{task_agent.agent.instruction[:500]}...")
-        # A new call to extract_tasks would use this updated context.

features/insight_and_tasks/agents/task_extraction_model.py DELETED Viewed

@@ -1,226 +0,0 @@
-import enum
-import json
-from typing import List, Optional, Literal
-from pydantic import BaseModel, Field, field_validator, ValidationInfo
-from datetime import datetime, date
-try:
-    from google import genai
-except ImportError:
-    print("Warning: 'google.generai' library not found. Please install it.")
-    print("If you are using the standard Gemini API, try: pip install google-generativeai")
-    print("If using Vertex AI, ensure the Google Cloud SDK is configured.")
-    genai = None # Placeholder to allow script to be parsed
-from features.insight_and_tasks.data_models.tasks import (
-    TaskExtractionOutput,
-    OKR,
-    KeyResult,
-    Task,
-    EffortLevel,
-    TimelineCategory,
-    PriorityLevel,
-    TaskType,
-    DataSubject # Ensure all are imported
-)
-def create_example_structure():
-    """
-    Creates a valid example structure that conforms to the Pydantic models
-    to show the AI what the output should look like.
-    """
-    return {
-        "current_quarter_info": "Q2 2025, 24 days remaining",
-        "okrs": [
-            {
-                "objective_description": "Significantly improve our LinkedIn employer branding performance to attract top-tier talent and establish our company as a thought leader in the tech industry.",
-                "objective_timeline": "Short-term",
-                "objective_owner": "Marketing Department",
-                "key_results": [
-                    {
-                        # CORRECTION: Description expanded to satisfy the 'min_length=100' validation rule.
-                        "key_result_description": "Achieve a sustained 50% increase in the rate of monthly follower growth on our company LinkedIn page, demonstrating enhanced audience engagement and brand reach.",
-                        "target_metric": "Monthly Follower Growth Rate",
-                        "target_value": "50% increase",
-                        # CORRECTION: Extra 'current_value' field removed as it's not in the Pydantic model.
-                        # CORRECTION: Value changed from "performance" to "PERFORMANCE" to match 'KeyResultType' enum.
-                        "key_result_type": "PERFORMANCE",
-                        # CORRECTION: Value changed from "posts" to "FOLLOWER_STATS" to match 'DataSubject' enum and better reflect the key result.
-                        "data_subject": "FOLLOWER_STATS",
-                        "tasks": [
-                            {
-                                "task_description": "Increase posting frequency to a consistent, high-quality schedule.",
-                                "objective_deliverable": "Post a minimum of 3 high-quality, relevant articles or updates per week.",
-                                "task_category": "Content Creation",
-                                # CORRECTION: Added the missing required field 'task_type' with a valid 'TaskType' enum value.
-                                "task_type": "INITIATIVE",
-                                # CORRECTION: Value changed from "high" to "High" to match the 'PriorityLevel' enum.
-                                "priority": "High",
-                                # CORRECTION: Added the missing required field 'priority_justification'.
-                                "priority_justification": "Increasing post frequency is a primary driver for engagement and follower growth, directly impacting the key result.",
-                                # CORRECTION: Value changed from "medium" to "Medium" to match the 'EffortLevel' enum.
-                                "effort": "Medium",
-                                # CORRECTION: Value changed from "this_quarter" to "Short-term" to match the 'TimelineCategory' enum.
-                                "timeline": "Short-term",
-                                # CORRECTION: Value changed from "linkedin_performance" to "POSTS" to match the 'DataSubject' enum.
-                                "data_subject": "POSTS",
-                                "responsible_party": "Social Media Manager",
-                                "success_criteria_metrics": "A weekly average of 3 or more posts is maintained over the quarter.",
-                                "dependencies_prerequisites": "A finalized content calendar for the quarter.",
-                                "why_proposed": "Historical data analysis shows a direct correlation between low posting frequency and stagnant follower gains. This task addresses the root cause."
-                            }
-                        ]
-                    }
-                ]
-            }
-        ],
-        "overall_strategic_focus": "Accelerate follower growth and enhance brand authority on LinkedIn."
-    }
-# --- Helper Function for Date Calculations ---
-def get_quarter_info():
-    """Calculates current quarter, year, and days remaining in the quarter."""
-    today = date.today()
-    current_year = today.year
-    current_quarter = (today.month - 1) // 3 + 1
-    # Determine the end date of the current quarter
-    if current_quarter == 1:
-        end_of_quarter_date = date(current_year, 3, 31)
-    elif current_quarter == 2:
-        end_of_quarter_date = date(current_year, 6, 30)
-    elif current_quarter == 3:
-        end_of_quarter_date = date(current_year, 9, 30)
-    else:  # current_quarter == 4
-        end_of_quarter_date = date(current_year, 12, 31)
-    days_remaining = (end_of_quarter_date - today).days
-    days_remaining = max(0, days_remaining) # Ensure it's not negative
-    return current_quarter, current_year, days_remaining, today
-# --- Main Task Extraction Function ---
-def extract_tasks_from_text(user_text_input: str, api_key: str) -> TaskExtractionOutput:
-    """
-    Extracts tasks from input text using Gemini API and structures them as TaskExtractionOutput.
-    Args:
-        user_text_input: The text to analyze.
-        api_key: The Gemini API key.
-    Returns:
-        A TaskExtractionOutput Pydantic model instance.
-    Raises:
-        ValueError: If API call fails or response parsing is unsuccessful.
-        ImportError: If 'google.generai' is not available.
-    """
-    if not genai:
-        raise ImportError("The 'google.generai' library is not available. Please install and configure it.")
-    # Initialize the Gemini client (as per user's example structure)
-    # This specific client initialization might vary based on the exact 'google.generai' library version/origin.
-    try:
-        client = genai.Client(api_key=api_key)
-    except AttributeError:
-         # Fallback for standard google-generativeai SDK if genai.Client is not found
-        try:
-            genai.configure(api_key=api_key)
-            # This function will then need to use genai.GenerativeModel('gemini-2.0-flash')
-            # For simplicity, sticking to user's client.models.generate_content structure.
-            # This part would need significant rework if genai.Client is not the correct interface.
-            print("Warning: genai.Client not found. The API call structure might be incorrect for your 'google.generai' version.")
-            print("Assuming a client object with 'models.generate_content' method is expected.")
-            # This is a placeholder; actual client setup depends on the specific library.
-            # If this is google-generativeai, the user should adapt to use genai.GenerativeModel.
-            raise NotImplementedError("genai.Client not found. Please adapt API call to your SDK version.")
-        except Exception as e:
-            raise ImportError(f"Failed to initialize Gemini client or configure API key: {e}")
-    quarter, year, days_remaining, current_date_obj = get_quarter_info()
-    current_date_iso = current_date_obj.isoformat()
-    example_structure = create_example_structure()
-    # Construct the detailed prompt for the LLM
-    prompt = f"""You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and KeyResults (OKRs) framework.
-Your output MUST be a valid JSON object that strictly conforms to the 'TaskExtractionOutput' schema provided.
-CURRENT CONTEXTUAL INFORMATION (CRITICAL - Use these exact values in your output where specified):
-- Current Quarter: Q{quarter}
-- Current Year: {year}
-- Days remaining in current quarter: {days_remaining}
-- Today's Date (for your context only, not for direct output unless specified by a schema field): {current_date_iso}
-When populating the 'current_quarter_info' field in the TaskExtractionOutput, use the format: 'Q{quarter} {year}, {days_remaining} days remaining'.
-GENERATION RULES:
-1. Create 1-3 OKR objects based on the input text
-2. For each OKR, create 1-3 KeyResult objects (MANDATORY - cannot be empty)
-3. For each KeyResult, create 1-3 Task objects (MANDATORY - cannot be empty)
-4. Make tasks specific, actionable, and directly related to the insights in the input text
-5. No repetitive text allowed
-6. Complete JSON object with proper closing braces
-7. Maximum response length: 5000 characters
-Now, analyze the following text and generate the structured output:
----
-TEXT TO ANALYZE:
-{user_text_input}
----
-"""
-    try:
-        response = client.models.generate_content(
-            model="gemini-2.5-flash-preview-05-20",  # As per user's example
-            contents=prompt,
-            config={
-                'response_mime_type': 'application/json',
-                'response_schema': TaskExtractionOutput, # Pass the Pydantic model class
-                'temperature': 0.1,
-                'top_p': 0.8,
-            },
-        )
-    except Exception as e:
-        raise ValueError(f"Gemini API call failed: {e}")
-    # Process the response
-    # Based on user's example `print(response.text)`, we assume .text contains the JSON.
-    # However, standard Gemini API often has it in response.candidates[0].content.parts[0].text.
-    response_json_text = None
-    if hasattr(response, 'text') and response.text:
-        response_json_text = response.text
-    elif hasattr(response, 'candidates') and response.candidates:
-        try:
-            part = response.candidates[0].content.parts[0]
-            if hasattr(part, 'text') and part.text:
-                 response_json_text = part.text
-        except (IndexError, AttributeError):
-            pass # Could not find text in candidates
-    if response_json_text:
-        try:
-            # Validate and parse the JSON response using the Pydantic model
-            task_output = TaskExtractionOutput.model_validate_json(response_json_text)
-            return task_output, quarter, year, days_remaining
-        except Exception as e: # Catch Pydantic validation errors or JSON parsing errors
-            raise ValueError(f"Failed to parse or validate API response: {e}\nRaw response text: {response_json_text}")
-    else:
-        # Handle cases where the response is empty or indicates an error
-        feedback_message = ""
-        if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
-            feedback_message = f"Prompt feedback: {response.prompt_feedback}. "
-        raise ValueError(f"Failed to generate content or response text is empty. {feedback_message}Full response: {response}")

features/insight_and_tasks/agents/task_extraction_model_groq.py DELETED Viewed

@@ -1,143 +0,0 @@
-import enum
-import json
-import os
-from typing import List, Optional, Literal
-from pydantic import BaseModel, Field, field_validator, ValidationInfo
-from datetime import datetime, date
-# Import Groq and instructor for structured output
-try:
-    from groq import Groq, RateLimitError
-    import instructor
-except ImportError:
-    print("Warning: 'groq' or 'instructor' library not found. Please install them.")
-    print("Try: pip install groq instructor")
-    Groq = None
-    instructor = None
-from features.insight_and_tasks.data_models.tasks import (
-    TaskExtractionOutput,
-    OKR,
-    KeyResult,
-    Task,
-    EffortLevel,
-    TimelineCategory,
-    PriorityLevel,
-    TaskType,
-    DataSubject # Ensure all are imported
-)
-# --- Groq Client Initialization with Instructor ---
-# Ensure GROQ_API_KEY is set in your environment variables before running
-if Groq and instructor:
-    try:
-        api_key = os.getenv('GROQ_API_KEY')
-        if not api_key:
-            raise ValueError("GROQ_API_KEY environment variable not set. Please set it to your Groq API key.")
-        # Create a single, patched Groq client for structured output using instructor
-        # Mode.JSON ensures the output is a valid JSON object based on the Pydantic model
-        client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON)
-    except Exception as e:
-        print(f"Failed to initialize Groq client: {e}")
-        client = None
-else:
-    client = None
-# --- Helper Function for Date Calculations (Unchanged) ---
-def get_quarter_info():
-    """Calculates current quarter, year, and days remaining in the quarter."""
-    today = date.today()
-    current_year = today.year
-    current_quarter = (today.month - 1) // 3 + 1
-    if current_quarter == 1:
-        end_of_quarter_date = date(current_year, 3, 31)
-    elif current_quarter == 2:
-        end_of_quarter_date = date(current_year, 6, 30)
-    elif current_quarter == 3:
-        end_of_quarter_date = date(current_year, 9, 30)
-    else:  # current_quarter == 4
-        end_of_quarter_date = date(current_year, 12, 31)
-    days_remaining = (end_of_quarter_date - today).days
-    days_remaining = max(0, days_remaining)
-    return current_quarter, current_year, days_remaining, today
-# --- Main Task Extraction Function (Refactored for Groq) ---
-def extract_tasks_from_text_groq(user_text_input: str) -> (Optional[TaskExtractionOutput], int, int, int):
-    """
-    Extracts tasks from input text using the Groq API and structures them
-    using instructor.
-    Args:
-        user_text_input: The text to analyze.
-    Returns:
-        A tuple containing:
-        - A TaskExtractionOutput Pydantic model instance, or None on failure.
-        - The current quarter number.
-        - The current year.
-        - The number of days remaining in the quarter.
-    Raises:
-        ValueError: If the Groq client is not initialized or if the API call fails.
-        RateLimitError: If the Groq API rate limit is exceeded.
-    """
-    if not client:
-        raise ValueError("Groq client is not initialized. Check your API key and library installations.")
-    quarter, year, days_remaining, current_date_obj = get_quarter_info()
-    # The prompt structure remains the same as it is effective.
-    # We explicitly tell the model its role and the structure we expect.
-    prompt = f"""You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and Key Results (OKRs) framework.
-Your output MUST be a valid JSON object that strictly conforms to the 'TaskExtractionOutput' Pydantic schema.
-CURRENT CONTEXTUAL INFORMATION:
-- Use this exact string for the 'current_quarter_info' field: 'Q{quarter} {year}, {days_remaining} days remaining'.
-GENERATION RULES:
-1.  Your primary goal is to identify every distinct, high-level strategic objective from the input text. For each and every distinct objective you find, you must create a corresponding OKR object.
-2.  For each OKR, extract all relevant Key Results. Key Results must be measurable outcomes.
-3.  For each KeyResult, extract all specific and actionable Tasks that are directly derived from the input text.
-4.  Considering the days remaining in the quarter, prioritize tasks with the highest immediate impact where possible.
-5.  Tasks must be specific, actionable, and directly derived from the input text.
-6.  Do not create redundant or repetitive content.
-7.  Ensure the final output is a complete JSON object.
-Now, analyze the following text and generate the structured JSON output:
----
-TEXT TO ANALYZE:
-{user_text_input}
----
-"""
-    try:
-        # Use the instructor-patched client to make the call.
-        # Pass the Pydantic model to `response_model`.
-        # Instructor will handle the validation and parsing automatically.
-        task_output = client.chat.completions.create(
-            model="llama-3.3-70b-versatile",  # A powerful model available on Groq
-            response_model=TaskExtractionOutput,
-            messages=[
-                {"role": "user", "content": prompt},
-            ],
-            temperature=0.1,
-            top_p=0.8,
-            max_retries=3, # Instructor can automatically retry on validation errors
-        )
-        return task_output, quarter, year, days_remaining
-    except RateLimitError as e:
-        print(f"Error: Groq API rate limit exceeded. Please wait and try again. Details: {e}")
-        raise  # Re-raise the specific error
-    except Exception as e:
-        # This can catch Pydantic validation errors or other API issues.
-        print(f"An unexpected error occurred during the Groq API call or data validation: {e}")
-        raise ValueError(f"Failed to process text with Groq: {e}")

features/insight_and_tasks/coordinators/employer_branding_coordinator.py DELETED Viewed

@@ -1,331 +0,0 @@
-# coordinators/employer_branding_coordinator.py
-import json
-import logging
-from typing import Optional, Dict, Any # Added Dict, Any
-from dataclasses import asdict # For converting dataclasses (like AgentMetrics) to dict
-import os
-from datetime import datetime
-from google.adk.agents import LlmAgent
-from google.adk.runners import InMemoryRunner
-from google.genai import types as genai_types # For ADK agent inputs
-# Project-specific imports
-from features.insight_and_tasks.agents.follower_agent import EnhancedFollowerAnalysisAgent
-from features.insight_and_tasks.agents.post_agent import EnhancedPostPerformanceAgent
-from features.insight_and_tasks.agents.mentions_agent import EnhancedMentionsAnalysisAgent
-from features.insight_and_tasks.data_models.metrics import AgentMetrics # To type hint inputs
-from features.insight_and_tasks.utils.retry_mechanism import RetryMechanism # If ADK calls need retry
-# Configure logger for this module
-logger = logging.getLogger(__name__)
-DEFAULT_COORDINATOR_MODEL = "gemini-2.5-flash-preview-05-20" # Use a more capable model for synthesis
-os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "False"
-GOOGLE_API_KEY = os.environ.get("GEMINI_API_KEY")
-os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
-class EnhancedEmployerBrandingCoordinator:
-    """
-    Enhanced coordinator for synthesizing insights from multiple agent metrics,
-    identifying correlations, and generating integrated strategic recommendations.
-    """
-    COORDINATOR_AGENT_NAME = "employer_branding_coordinator"
-    COORDINATOR_AGENT_DESCRIPTION = (
-        "Strategic coordinator that analyzes metrics from Follower, Post Performance, and Mentions agents "
-        "to find correlations, suggest potential causal links, and generate integrated strategies."
-    )
-    COORDINATOR_AGENT_INSTRUCTION = """
-    You are the Enhanced Employer Branding Coordinator. Your primary mission is to synthesize analyses and
-    structured metrics (TimeSeries, Aggregate, Categorical) from three specialized agents: Follower Analysis,
-    Post Performance, and Mentions Analysis. Your goal is to provide a holistic, integrated understanding of
-    the LinkedIn employer branding performance.
-    You MUST focus on:
-    1.  Cross-Agent Correlations: Analyze how metrics from different agents relate to each other over time.
-        Pay close attention to the 'time_series_metrics' provided by each agent.
-        -   Identify positive or negative correlations (e.g., "Follower growth rate increased by X% when posts about 'company culture' (Post Agent) were published, coinciding with a Y% rise in positive mentions (Mentions Agent)").
-        -   Note any leading or lagging indicators (e.g., "A spike in negative mentions often preceded a dip in follower growth by approximately 2 weeks.").
-        -   Look for relationships between specific content types/topics (from Post Agent) and follower engagement/growth (Follower Agent) or brand sentiment (Mentions Agent).
-    2.  Potential Causal Insights & Hypotheses: Based on observed correlations and temporal sequences, suggest plausible causal relationships.
-        These are hypotheses, not definitive conclusions.
-        -   Example: "The Q2 campaign focusing on 'employee testimonials' (Post Agent data) likely contributed to the observed 15% increase in organic follower acquisition (Follower Agent data) and the shift towards more positive sentiment in mentions (Mentions Agent data) during the same period."
-    3.  Root Cause Analysis (Conceptual): For significant performance changes (e.g., sudden engagement drops, unexpected follower spikes, sharp sentiment shifts), attempt to identify potential root causes by cross-referencing data and summaries from all three agents.
-    4.  Predictive Insights (High-Level): Based on established trends and correlations, what are potential future performance trajectories or risks?
-        -   Example: "If the current trend of declining engagement on text-only posts continues, overall reach may decrease by X% next quarter unless content strategy is diversified."
-    5.  Integrated Strategic Recommendations: Formulate actionable, strategic advice that leverages insights from ALL THREE data sources to optimize overall employer branding.
-        -   Recommendations should be specific (e.g., "Increase frequency of video posts related to 'Team Achievements' as this format shows high engagement and correlates with positive mention spikes.").
-        -   Prioritize recommendations based on their potential impact, supported by the cross-agent analysis.
-        -   Suggest A/B tests or further investigations where appropriate.
-    INPUT: You will receive structured 'AgentMetrics' data (JSON format) from each of the three agents. This includes their own analysis summaries, time-series data, aggregate figures, and categorical breakdowns.
-    OUTPUT: A comprehensive, well-structured report covering:
-        I.  Overall Executive Summary: A brief (2-3 paragraph) overview of the most critical findings and strategic implications derived from the integrated analysis.
-        II. Detailed Cross-Agent Correlation Analysis: Elaborate on specific correlations found, with examples.
-        III.Key Causal Hypotheses: Present the most compelling potential causal links.
-        IV. Noteworthy Performance Shifts & Potential Root Causes: Discuss any major changes and their likely drivers.
-        V.  Forward-Looking Predictive Insights: Offer high-level predictions.
-        VI. Actionable Integrated Strategic Recommendations: Provide clear, prioritized recommendations.
-    Your analysis must be grounded in the provided data. Refer to specific metrics and agent findings to support your conclusions.
-    Be insightful and strategic. The goal is to provide a unified view that is more valuable than the sum of the individual agent analyses.
-    """
-    def __init__(self, api_key: str, model_name: Optional[str] = None):
-        self.api_key = api_key # Stored for LlmAgent or if agents need it passed explicitly
-        self.model_name = model_name or DEFAULT_COORDINATOR_MODEL
-        # Initialize individual agents. The coordinator will use their output.
-        # These agents are internal to the coordinator's process of getting data to synthesize.
-        self.follower_agent = EnhancedFollowerAnalysisAgent(api_key=api_key, model_name=model_name) # Pass down model if needed
-        self.post_agent = EnhancedPostPerformanceAgent(api_key=api_key, model_name=model_name)
-        self.mentions_agent = EnhancedMentionsAnalysisAgent(api_key=api_key, model_name=model_name)
-        # The LLM agent for the coordinator itself, responsible for synthesis
-        self.coordinator_llm_agent = LlmAgent(
-            name=self.COORDINATOR_AGENT_NAME,
-            model=self.model_name, # Use the coordinator's (potentially more powerful) model
-            description=self.COORDINATOR_AGENT_DESCRIPTION,
-            instruction=self.COORDINATOR_AGENT_INSTRUCTION
-        )
-        self.retry_mechanism = RetryMechanism()
-        logger.info(f"{self.COORDINATOR_AGENT_NAME} initialized with model {self.model_name}.")
-        logger.info(f"It internally uses: Follower Agent ({self.follower_agent.model_name}), "
-                    f"Post Agent ({self.post_agent.model_name}), Mentions Agent ({self.mentions_agent.model_name}).")
-    async def generate_comprehensive_analysis(
-        self,
-        follower_metrics: AgentMetrics,
-        post_metrics: AgentMetrics,
-        mentions_metrics: AgentMetrics
-    ) -> str:
-        """
-        Generates a comprehensive analysis by synthesizing metrics from all specialized agents.
-        Args:
-            follower_metrics: Metrics from the EnhancedFollowerAnalysisAgent.
-            post_metrics: Metrics from the EnhancedPostPerformanceAgent.
-            mentions_metrics: Metrics from the EnhancedMentionsAnalysisAgent.
-        Returns:
-            A string containing the comprehensive analysis report.
-        """
-        # Prepare the input prompt for the coordinator's LlmAgent
-        # Serialize the AgentMetrics objects (which are dataclasses) to dictionaries
-        # then to JSON strings for clean inclusion in the prompt.
-        try:
-            follower_metrics_dict = asdict(follower_metrics)
-            post_metrics_dict = asdict(post_metrics)
-            mentions_metrics_dict = asdict(mentions_metrics)
-        except Exception as e:
-            logger.error(f"Error converting AgentMetrics to dict: {e}", exc_info=True)
-            return "Error: Could not process input metrics for coordination."
-        # Truncate individual agent summaries if they are too long to avoid overly large prompts
-        max_summary_len = 500 # Max characters for individual agent summaries in the prompt
-        follower_metrics_dict['analysis_summary'] = follower_metrics_dict.get('analysis_summary', '')[:max_summary_len]
-        post_metrics_dict['analysis_summary'] = post_metrics_dict.get('analysis_summary', '')[:max_summary_len]
-        mentions_metrics_dict['analysis_summary'] = mentions_metrics_dict.get('analysis_summary', '')[:max_summary_len]
-        synthesis_prompt = f"""
-        Please synthesize the following LinkedIn analytics insights, which are structured as 'AgentMetrics'
-        from three specialized agents. Your primary task is to identify cross-metric correlations,
-        deduce potential causal relationships, and provide integrated strategic recommendations based on
-        your core instructions.
-        DATA FROM SPECIALIZED AGENTS:
-        1. Follower Analysis Agent Metrics:
-           - Agent Name: {follower_metrics_dict.get('agent_name')}
-           - Agent's Analysis Summary: {follower_metrics_dict.get('analysis_summary')}
-           - Time Series Metrics: {json.dumps([asdict(m) for m in follower_metrics.time_series_metrics], indent=2, default=str)}
-           - Aggregate Metrics: {json.dumps(follower_metrics_dict.get('aggregate_metrics'), indent=2, default=str)}
-           - Categorical Metrics: {json.dumps(follower_metrics_dict.get('categorical_metrics'), indent=2, default=str)}
-           - Time Periods Covered: {json.dumps(follower_metrics_dict.get('time_periods_covered'), default=str)}
-           - Key Insights by Agent: {json.dumps(follower_metrics_dict.get('key_insights'), default=str)}
-        2. Post Performance Agent Metrics:
-           - Agent Name: {post_metrics_dict.get('agent_name')}
-           - Agent's Analysis Summary: {post_metrics_dict.get('analysis_summary')}
-           - Time Series Metrics: {json.dumps([asdict(m) for m in post_metrics.time_series_metrics], indent=2, default=str)}
-           - Aggregate Metrics: {json.dumps(post_metrics_dict.get('aggregate_metrics'), indent=2, default=str)}
-           - Categorical Metrics: {json.dumps(post_metrics_dict.get('categorical_metrics'), indent=2, default=str)}
-           - Time Periods Covered: {json.dumps(post_metrics_dict.get('time_periods_covered'), default=str)}
-           - Key Insights by Agent: {json.dumps(post_metrics_dict.get('key_insights'), default=str)}
-        3. Mentions Analysis Agent Metrics:
-           - Agent Name: {mentions_metrics_dict.get('agent_name')}
-           - Agent's Analysis Summary: {mentions_metrics_dict.get('analysis_summary')}
-           - Time Series Metrics: {json.dumps([asdict(m) for m in mentions_metrics.time_series_metrics], indent=2, default=str)}
-           - Aggregate Metrics: {json.dumps(mentions_metrics_dict.get('aggregate_metrics'), indent=2, default=str)}
-           - Categorical Metrics: {json.dumps(mentions_metrics_dict.get('categorical_metrics'), indent=2, default=str)}
-           - Time Periods Covered: {json.dumps(mentions_metrics_dict.get('time_periods_covered'), default=str)}
-           - Key Insights by Agent: {json.dumps(mentions_metrics_dict.get('key_insights'), default=str)}
-        COORDINATION TASK:
-        Based on ALL the data presented above from the three agents, generate a comprehensive synthesis report.
-        Follow your core instructions meticulously, focusing on cross-agent correlations (especially using the
-        time_series_metrics), causal hypotheses, root cause considerations for major shifts, predictive insights,
-        and actionable, integrated strategic recommendations.
-        Structure your output as a detailed report with the specified sections.
-        """
-        user_input_content = genai_types.Content(
-            role="user",
-            parts=[genai_types.Part(text=synthesis_prompt)]
-        )
-        runner = InMemoryRunner(agent=self.coordinator_llm_agent, app_name=f"{self.COORDINATOR_AGENT_NAME}Runner")
-        user_id = f"system_user_coordinator_{int(datetime.utcnow().timestamp())}" # Unique ID for the run
-        session = await runner.session_service.create_session(
-            app_name=f"{self.COORDINATOR_AGENT_NAME}Runner",
-            user_id=user_id
-        )
-        result_text_parts = []
-        try:
-            logger.info(f"Running {self.COORDINATOR_AGENT_NAME} for synthesis. User ID: {user_id}, Session ID: {session.id}")
-            # Using retry for the ADK runner execution part
-            async def run_adk_coordinator():
-                temp_result_parts = []
-                async for event in runner.run(
-                    user_id=user_id,
-                    session_id=session.id,
-                    new_message=user_input_content
-                ):
-                    if hasattr(event, 'content') and event.content and event.content.parts:
-                        for part in event.content.parts:
-                            if hasattr(part, 'text'):
-                                temp_result_parts.append(part.text)
-                if not temp_result_parts:
-                    # This could happen if the LLM returns no content or an error not caught by ADK
-                    logger.warning(f"{self.COORDINATOR_AGENT_NAME} produced no text output from ADK run.")
-                    # Consider raising a specific error or returning a default message
-                    # For now, it will result in an empty string if no parts are collected.
-                return "".join(temp_result_parts)
-            # The retry_with_backoff expects a synchronous function.
-            # For async, you'd typically handle retries within the async logic or use an async retry library.
-            # For simplicity here, we'll run it once. If retries are critical for ADK calls,
-            # the ADK runner itself might have retry mechanisms, or this part needs adjustment.
-            # The original code didn't show retry for this ADK call, so keeping it direct.
-            # Direct call without retry for the async ADK runner:
-            for event in runner.run(
-                user_id=user_id,
-                session_id=session.id,
-                new_message=user_input_content
-            ):
-                if hasattr(event, 'content') and event.content and event.content.parts:
-                    for part in event.content.parts:
-                        if hasattr(part, 'text'):
-                            result_text_parts.append(part.text)
-            final_result_text = "".join(result_text_parts)
-            if not final_result_text.strip():
-                 logger.warning(f"{self.COORDINATOR_AGENT_NAME} synthesis resulted in an empty string.")
-                 final_result_text = "Coordinator analysis did not produce output. Please check logs."
-        except Exception as e:
-            logger.error(f"Error during {self.COORDINATOR_AGENT_NAME} LLM agent execution: {e}", exc_info=True)
-            final_result_text = f"Error in coordinator synthesis: {str(e)}"
-        finally:
-            try:
-                await runner.session_service.delete_session(
-                    app_name=f"{self.COORDINATOR_AGENT_NAME}Runner", user_id=user_id, session_id=session.id
-                )
-            except Exception as session_del_e:
-                logger.error(f"Error deleting coordinator session: {session_del_e}")
-        return final_result_text
-if __name__ == '__main__':
-    import asyncio
-    import pandas as pd # For creating dummy data
-    from datetime import datetime # For dummy data AgentMetrics
-    try:
-        from utils.logging_config import setup_logging
-        setup_logging()
-        logger.info("Logging setup for EnhancedEmployerBrandingCoordinator test.")
-    except ImportError:
-        logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-        logger.warning("logging_config.py not found, using basicConfig for logging.")
-    MOCK_API_KEY = os.environ.get("GOOGLE_API_KEY", "test_api_key_coordinator")
-    MODEL_NAME = DEFAULT_COORDINATOR_MODEL # Or a specific test model
-    # Create dummy AgentMetrics data for testing
-    dummy_ts_metric = TimeSeriesMetric(metric_name="dummy_visits", values=[10.0,20.0], timestamps=["2023-01","2023-02"])
-    follower_metrics_data = AgentMetrics(
-        agent_name="follower_analyst_test",
-        analysis_summary="Followers grew steadily. Demographic: Young professionals.",
-        time_series_metrics=[dummy_ts_metric],
-        aggregate_metrics={"avg_growth_rate": 0.05},
-        categorical_metrics={"top_industry": "Tech"},
-        time_periods_covered=["2023-01", "2023-02"],
-        key_insights=["Organic growth is strong."]
-    )
-    post_metrics_data = AgentMetrics(
-        agent_name="post_analyst_test",
-        analysis_summary="Video posts performed best. Engagement rate is 3%.",
-        time_series_metrics=[TimeSeriesMetric(metric_name="dummy_engagement", values=[0.03,0.035], timestamps=["2023-01","2023-02"], unit="%")],
-        aggregate_metrics={"avg_engagement_rate_overall": 0.032},
-        categorical_metrics={"top_media_type": "VIDEO"},
-        time_periods_covered=["2023-01", "2023-02"],
-        key_insights=["Video content is key for engagement."]
-    )
-    mentions_metrics_data = AgentMetrics(
-        agent_name="mentions_analyst_test",
-        analysis_summary="Mentions are mostly neutral. Sentiment score avg 0.1.",
-        time_series_metrics=[TimeSeriesMetric(metric_name="dummy_sentiment_score", values=[0.1,0.12], timestamps=["2023-01","2023-02"])],
-        aggregate_metrics={"overall_avg_sentiment": 0.11},
-        categorical_metrics={"dominant_sentiment": "Neutral"},
-        time_periods_covered=["2023-01", "2023-02"],
-        key_insights=["Brand perception is stable but not overly positive."]
-    )
-    coordinator = EnhancedEmployerBrandingCoordinator(api_key=MOCK_API_KEY, model_name=MODEL_NAME)
-    async def run_coordination():
-        logger.info("Generating comprehensive analysis from dummy metrics...")
-        # For local tests without real API calls, the LlmAgent might behave as a mock.
-        if MOCK_API_KEY == "test_api_key_coordinator":
-             logger.warning("Using a mock API key. Coordinator LlmAgent behavior might be limited or mocked.")
-             # Mock the ADK runner for the coordinator's LLM agent if needed
-             class MockCoordinatorADKRunner:
-                def __init__(self, agent, app_name): self.agent = agent
-                async def session_service_create_session(self, app_name, user_id):
-                    class MockSession: id = "mock_coord_session_id"
-                    return MockSession()
-                async def run(self, user_id, session_id, new_message):
-                    # Simulate a response from the coordinator LLM
-                    yield genai_types.Content(parts=[genai_types.Part(text="Mock Coordinator Synthesis Report: Blah blah correlation. Recommendation: Do X.")])
-                async def session_service_delete_session(self, app_name, user_id, session_id): pass
-             global InMemoryRunner # Make sure we are modifying the correct InMemoryRunner
-             OriginalInMemoryRunnerCoord = InMemoryRunner
-             InMemoryRunner = MockCoordinatorADKRunner
-        report = await coordinator.generate_comprehensive_analysis(
-            follower_metrics_data,
-            post_metrics_data,
-            mentions_metrics_data
-        )
-        if MOCK_API_KEY == "test_api_key_coordinator" and 'OriginalInMemoryRunnerCoord' in globals():
-            InMemoryRunner = OriginalInMemoryRunnerCoord # Restore
-        print("\n--- EnhancedEmployerBrandingCoordinator Report ---")
-        print(report)
-    if __name__ == '__main__': # Inner check
-        asyncio.run(run_coordination())

features/insight_and_tasks/data_models/__init__.py DELETED Viewed

@@ -1,35 +0,0 @@
-# data_models/__init__.py
-# This file makes the 'data_models' directory a Python package.
-# Expose key models at the package level for easier importing.
-from .metrics import TimeSeriesMetric, AgentMetrics, MetricType, TimeGranularity
-from .tasks import (
-    EffortLevel,
-    TaskType,
-    DataSubject,
-    TimelineCategory,
-    PriorityLevel,
-    Task,
-    KeyResult,
-    OKR,
-    TaskExtractionOutput
-)
-__all__ = [
-    # From metrics.py
-    "TimeSeriesMetric",
-    "AgentMetrics",
-    "MetricType",
-    "TimeGranularity",
-    # From tasks.py
-    "EffortLevel",
-    "TaskType",
-    "DataSubject",
-    "TimelineCategory",
-    "PriorityLevel",
-    "Task",
-    "KeyResult",
-    "OKR",
-    "TaskExtractionOutput"
-]

features/insight_and_tasks/data_models/metrics.py DELETED Viewed

@@ -1,50 +0,0 @@
-# data_models/metrics.py
-from dataclasses import dataclass, field
-from typing import List, Dict, Any, Literal, Optional
-from datetime import datetime
-# Define literal types for more specific type hinting
-MetricType = Literal['time_series', 'aggregate', 'categorical']
-TimeGranularity = Literal['daily', 'weekly', 'monthly', 'yearly', 'other'] # Added 'yearly' and 'other'
-@dataclass
-class TimeSeriesMetric:
-    """Structure for time-series based metrics"""
-    metric_name: str
-    values: List[float] = field(default_factory=list)
-    timestamps: List[str] = field(default_factory=list) # Consider using datetime objects or ISO format strings
-    metric_type: MetricType = 'time_series'
-    time_granularity: TimeGranularity = 'monthly'
-    unit: Optional[str] = None # e.g., 'count', '%', 'USD'
-    description: Optional[str] = None # Optional description of the metric
-    def __post_init__(self):
-        if len(self.values) != len(self.timestamps):
-            # Or log a warning, or handle as appropriate for your application
-            raise ValueError(f"Length of values ({len(self.values)}) and timestamps ({len(self.timestamps)}) must match for metric '{self.metric_name}'.")
-@dataclass
-class AgentMetrics:
-    """
-    Enhanced structure for agent metrics with time-awareness and more details.
-    """
-    agent_name: str
-    analysis_summary: str # Summary text from the agent's analysis
-    # Specific metric categories
-    time_series_metrics: List[TimeSeriesMetric] = field(default_factory=list)
-    aggregate_metrics: Dict[str, float] = field(default_factory=dict) # Key-value pairs for single value metrics
-    categorical_metrics: Dict[str, Any] = field(default_factory=dict) # For distributions, counts by category, etc.
-                                                                    # Example: {'industry_distribution': {'Tech': 100, 'Finance': 50}}
-    # Contextual information
-    time_periods_covered: List[str] = field(default_factory=list) # e.g., ["2023-01", "2023-02"] or ["Q1 2023", "Q2 2023"]
-    data_sources_used: List[str] = field(default_factory=list) # Information about the input data
-    generation_timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat()) # When these metrics were generated
-    # Optional fields for richer reporting
-    key_insights: List[str] = field(default_factory=list) # Bullet points of key findings
-    potential_errors_or_warnings: List[str] = field(default_factory=list) # Any issues encountered during analysis

features/insight_and_tasks/data_models/tasks.py DELETED Viewed

@@ -1,197 +0,0 @@
-# data_models/tasks.py
-from enum import Enum
-from typing import List, Optional, Literal
-from pydantic import BaseModel, Field, field_validator
-from datetime import datetime
-# Using Literal for more precise string enums if preferred over Enum class for Pydantic
-# However, Enum provides better structure and can be used with Field choices.
-class EffortLevel(str, Enum):
-    """Estimated effort level for a task."""
-    SMALL = "Small"
-    MEDIUM = "Medium"
-    LARGE = "Large"
-class TaskType(str, Enum):
-    """Type of task, indicating its nature."""
-    INITIATIVE = "initiative"  # Action-oriented, new projects/changes
-    TRACKING = "tracking"      # Measurement-focused, monitoring existing metrics/processes
-class KeyResultType(str, Enum):
-    PERFORMANCE = "performance"
-    COMPLETION = "completion"
-class DataSubject(str, Enum):
-    """Specifies the primary data domain a tracking task relates to."""
-    FOLLOWER_STATS = "follower_stats"
-    POSTS = "posts"
-    MENTIONS = "mentions"
-    GENERAL = "general" # For initiatives or tasks not tied to a single data type
-class TimelineCategory(str, Enum):
-    """Categorization of task timelines."""
-    IMMEDIATE = "Immediate"       # (e.g., 1-2 weeks)
-    SHORT_TERM = "Short-term"     # (e.g., rest of current quarter, up to 3 months)
-    MEDIUM_TERM = "Medium-term"   # (e.g., next quarter, 3-6 months)
-    LONG_TERM = "Long-term"       # (e.g., 6+ months)
-class PriorityLevel(str, Enum):
-    """Priority level for tasks."""
-    HIGH = "High"
-    MEDIUM = "Medium"
-    LOW = "Low"
-class Task(BaseModel):
-    """
-    Represents a single actionable task derived from analysis.
-    """
-    task_category: str = Field(
-        description="The broader category or theme of the task (e.g., Content Strategy, Audience Engagement, Reputation Management, Performance Monitoring)."
-    )
-    task_description: str = Field( # Renamed from 'task' for clarity
-        description="A concise yet clear description of the specific action to be taken."
-    )
-    objective_deliverable: str = Field(
-        description="The clear, measurable objective this task aims to achieve and the specific deliverable(s) expected upon completion."
-    )
-    effort: EffortLevel = Field(
-        description="Estimated effort required to complete the task (Small, Medium, Large)."
-    )
-    timeline: TimelineCategory = Field(
-        description="Projected timeline for task completion, considering urgency and dependencies."
-    )
-    responsible_party: str = Field(
-        description="The team, role, or individual suggested to be responsible for executing this task (e.g., Marketing Team, Content Creation Lead, Social Media Manager)."
-    )
-    success_criteria_metrics: str = Field(
-        description="Specific, measurable criteria and metrics that will be used to determine if the task was successfully completed and achieved its objective."
-    )
-    dependencies_prerequisites: Optional[str] = Field(
-        default=None,
-        description="Any other tasks, resources, or conditions that must be met before this task can begin or be completed."
-    )
-    priority: PriorityLevel = Field(
-        description="The priority level of the task (High, Medium, Low)."
-    )
-    priority_justification: str = Field(
-        description="A brief explanation for the assigned priority level, linking it to impact or urgency."
-    )
-    why_proposed: str = Field(
-        description="The rationale behind proposing this task, clearly linking it back to specific findings or insights from the data analysis."
-    )
-    task_type: TaskType = Field(
-        description="Indicates whether this task is a new 'initiative' or ongoing 'tracking' of performance/metrics."
-    )
-    data_subject: Optional[DataSubject] = Field(
-        default=None,
-        description="For 'tracking' tasks, specifies the primary data subject (e.g., follower_stats, posts, mentions). Can be 'general' or null for 'initiative' tasks."
-    )
-    @field_validator('data_subject')
-    @classmethod
-    def check_data_subject_for_tracking(cls, value: Optional[DataSubject], values) -> Optional[DataSubject]:
-        # Pydantic v2 uses `values.data` to get other field values if needed before validation
-        # For Pydantic v1, it would be `values.get('task_type')`
-        # This example assumes Pydantic v2 structure for `values` if needed, but here we only need `task_type`
-        # which should already be validated or available.
-        # For simplicity, accessing it via `values.data.get('task_type')` in Pydantic v2 context.
-        # If using Pydantic v1, it's `values.get('task_type')`.
-        # Let's assume `values` is a dict-like object containing other fields.
-        # The validator structure depends on Pydantic version.
-        # For Pydantic v2, it's `info: ValidationInfo` and `info.data.get('task_type')`
-        # For Pydantic v1, `values` is a dict.
-        # For this example, let's assume `values` is a dict of the fields.
-        task_type_value = None
-        if hasattr(values, 'data'): # Pydantic v2 way
-             task_type_value = values.data.get('task_type')
-        elif isinstance(values, dict): # Pydantic v1 way (or if it's passed as a dict)
-             task_type_value = values.get('task_type')
-        if task_type_value == TaskType.TRACKING and value is None:
-            raise ValueError("For 'tracking' tasks, 'data_subject' must be specified.")
-        if task_type_value == TaskType.INITIATIVE and value is DataSubject.GENERAL:
-            # This is acceptable, or you might want to enforce it to be None
-            pass
-        return value
-class KeyResult(BaseModel):
-    """
-    A measurable outcome that contributes to an Objective.
-    """
-    key_result_description: str = Field( # Renamed from 'key_result'
-        description="A clear, specific, and measurable description of the key result."
-    )
-    tasks: List[Task] = Field(
-        default_factory=list,
-        description="A list of specific tasks that will be undertaken to achieve this key result."
-    )
-    target_metric: Optional[str] = Field(
-        default=None,
-        description="The primary metric used to measure the achievement of this key result (e.g., 'Follower Growth Rate', 'Average Engagement Rate')."
-    )
-    target_value: Optional[str] = Field( # Can be numeric or descriptive (e.g., "Increase by 10%", "Achieve 5%")
-        default=None,
-        description="The specific target value for the metric (e.g., '5%', '1000 new followers')."
-    )
-    key_result_type: KeyResultType = Field(
-        description=(
-            "Indicates the nature of the Key Result. "
-            "PERFORMANCE: Focused on achieving a specific, measurable level for a defined metric. "
-            "Its core metric can typically be extracted and monitored directly via a data source, such as the LinkedIn API "
-            "(e.g., monthly post count, engagement rate, follower gains, impressions, CTR, mention volume). "
-            "The goal is to hit or exceed a target for this metric. "
-            "COMPLETION: Focused on finishing a distinct project, delivering a specific output, or establishing a new process. "
-            "Progress is primarily tracked by the successful completion of the defined scope of work. "
-            "Generally, the primary outcome of a COMPLETION Key Result is not a metric continuously tracked via an automated "
-            "data source like the LinkedIn API, or the 'metric' itself describes the state of completion (e.g., 'report delivered', 'process established')."
-        )
-    )
-    data_subject: Optional[DataSubject] = Field(
-        default=None,
-        description="For 'performance' key results, specifies the primary data subject (e.g., follower_stats, posts, mentions). Can be 'general' or null for 'completion' tasks."
-    )
-class OKR(BaseModel):
-    """
-    Defines an Objective and its associated Key Results (OKRs).
-    """
-    objective_description: str = Field( # Renamed from 'objective'
-        description="A high-level, qualitative goal that the team aims to achieve. Should be aspirational and motivating."
-    )
-    key_results: List[KeyResult] = Field(
-        default_factory=list,
-        description="A list of 2-5 specific, measurable, achievable, relevant, and time-bound (SMART) key results that define success for the objective."
-    )
-    objective_timeline: TimelineCategory = Field(
-        description="The overall timeline category for achieving this objective."
-    )
-    objective_owner: Optional[str] = Field(
-        default=None,
-        description="The team name",
-        max_length=50
-    )
-class TaskExtractionOutput(BaseModel):
-    """
-    Structured output from the TaskExtractionAgent, including context and OKRs.
-    """
-    current_quarter_info: str = Field(
-        description="Information about the current quarter and days remaining (e.g., 'Q2 2025, 45 days remaining')."
-    )
-    okrs: List[OKR] = Field(
-        default_factory=list,
-        description="A list of Objectives and Key Results (OKRs) derived from the analysis."
-    )
-    overall_strategic_focus: Optional[str] = Field(
-        default=None,
-        description="A brief summary of the main strategic focus areas identified from the tasks."
-    )
-    generation_timestamp: str = Field(
-        default_factory=lambda: datetime.utcnow().isoformat(),
-        description="Timestamp of when this task extraction output was generated."
-    )

features/insight_and_tasks/orchestrators/linkedin_analytics_orchestrator.py DELETED Viewed

@@ -1,299 +0,0 @@
-# orchestrators/linkedin_analytics_orchestrator.py
-import pandas as pd
-import logging
-from typing import Dict, Any, Optional
-from datetime import date, datetime # For TaskExtractionAgent date
-from dataclasses import asdict # For converting AgentMetrics to dict if needed for final output
-import os
-os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "False"
-GOOGLE_API_KEY = os.environ.get("GEMINI_API_KEY")
-os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
-# Project-specific imports
-from features.insight_and_tasks.utils.pandasai_setup import configure_pandasai # Centralized PandasAI config
-from features.insight_and_tasks.coordinators.employer_branding_coordinator import EnhancedEmployerBrandingCoordinator
-from features.insight_and_tasks.agents.task_extraction_agent import TaskExtractionAgent
-from features.insight_and_tasks.data_models.metrics import AgentMetrics # For type hinting
-from features.insight_and_tasks.data_models.tasks import TaskExtractionOutput # For type hinting
-from features.insight_and_tasks.agents.task_extraction_model_groq import extract_tasks_from_text_groq
-# Configure logger for this module
-logger = logging.getLogger(__name__)
-class EnhancedLinkedInAnalyticsOrchestrator:
-    """
-    Orchestrates the end-to-end LinkedIn analytics process, from data input through
-    specialized agent analysis, coordinator synthesis, and actionable task extraction.
-    """
-    def __init__(self, api_key: str, llm_model_name: Optional[str] = None, current_date_for_tasks: Optional[date] = None):
-        """
-        Initializes the orchestrator.
-        Args:
-            api_key: The API key for Google services (used by PandasAI and LlmAgents).
-            llm_model_name: Optional. The primary LLM model name to be used by agents.
-                            Specific agents/coordinator might override with their defaults if not set.
-            current_date_for_tasks: Optional. The date to be used by TaskExtractionAgent for quarter calculations. Defaults to today.
-        """
-        self.api_key = api_key
-        self.llm_model_name = llm_model_name # Can be passed down or agents use their defaults
-        # Configure PandasAI globally at the start of orchestration.
-        # Pass the model_name if specified, otherwise pandasai_setup might use its own default.
-        try:
-            configure_pandasai(api_key=self.api_key, model_name=self.llm_model_name)
-            logger.info(f"PandasAI configured by orchestrator with model hint: {self.llm_model_name or 'default'}.")
-        except Exception as e:
-            logger.error(f"Failed to configure PandasAI in orchestrator: {e}", exc_info=True)
-            # Decide if this is a critical failure or if agents can proceed (they might try to reconfigure)
-        # Initialize the coordinator, which in turn initializes its specialized agents.
-        # Pass the model_name hint to the coordinator.
-        self.coordinator = EnhancedEmployerBrandingCoordinator(api_key=self.api_key, model_name=self.llm_model_name)
-        # Initialize the TaskExtractionAgent.
-        # It uses its own default model unless overridden here.
-        self.task_extractor = TaskExtractionAgent(
-            api_key=self.api_key,
-            model_name=self.llm_model_name, # Pass model hint
-            current_date=current_date_for_tasks # Defaults to today if None
-        )
-        logger.info("EnhancedLinkedInAnalyticsOrchestrator initialized.")
-    async def generate_full_analysis_and_tasks(
-        self,
-        follower_stats_df: pd.DataFrame,
-        post_df: pd.DataFrame,
-        mentions_df: pd.DataFrame
-    ) -> Dict[str, Any]:
-        """
-        Executes the full pipeline: agent analyses, coordinator synthesis, and task extraction.
-        Args:
-            follower_stats_df: DataFrame containing follower statistics.
-            post_df: DataFrame containing post performance data.
-            mentions_df: DataFrame containing brand mentions data.
-        Returns:
-            A dictionary containing the comprehensive analysis text, actionable tasks (OKRs),
-            and the detailed metrics from each specialized agent.
-        """
-        logger.info("Starting full analysis and task generation pipeline...")
-        # Step 1: Get analyses and metrics from specialized agents.
-        # The coordinator's internal agents are used here.
-        logger.info("Running follower analysis...")
-        follower_agent_metrics: AgentMetrics = self.coordinator.follower_agent.analyze_follower_data(follower_stats_df)
-        logger.info(f"Follower analysis complete. Summary: {follower_agent_metrics.analysis_summary[:100]}...")
-        logger.info("Running post performance analysis...")
-        post_agent_metrics: AgentMetrics = self.coordinator.post_agent.analyze_post_data(post_df)
-        logger.info(f"Post analysis complete. Summary: {post_agent_metrics.analysis_summary[:100]}...")
-        logger.info("Running mentions analysis...")
-        mentions_agent_metrics: AgentMetrics = self.coordinator.mentions_agent.analyze_mentions_data(mentions_df)
-        logger.info(f"Mentions analysis complete. Summary: {mentions_agent_metrics.analysis_summary[:100]}...")
-        # Step 2: Coordinator synthesizes these metrics into a comprehensive analysis text.
-        logger.info("Running coordinator for synthesis...")
-        comprehensive_analysis_text: str = await self.coordinator.generate_comprehensive_analysis(
-            follower_agent_metrics, post_agent_metrics, mentions_agent_metrics
-        )
-        logger.info(f"Coordinator synthesis complete. Report length: {len(comprehensive_analysis_text)} chars.")
-        if not comprehensive_analysis_text or comprehensive_analysis_text.startswith("Error"):
-            logger.error(f"Coordinator synthesis failed or produced an error message: {comprehensive_analysis_text}")
-            # Potentially stop here or proceed with task extraction on whatever text was generated.
-        # Step 3: TaskExtractionAgent extracts actionable tasks (OKRs) from the comprehensive text.
-        logger.info("Running task extraction...")
-        #actionable_tasks_okrs, quarter, year, days_remaining = extract_tasks_from_text(comprehensive_analysis_text, GOOGLE_API_KEY)
-        actionable_tasks_okrs, quarter, year, days_remaining = extract_tasks_from_text_groq(comprehensive_analysis_text)
-        logger.info(f"Task extraction complete. Number of OKRs: {len(actionable_tasks_okrs.okrs) if actionable_tasks_okrs else 'Error'}.")
-        # Step 4: Compile and return all results.
-        # Convert Pydantic/dataclass objects to dicts for easier JSON serialization if the final output needs it.
-        # The `actionable_tasks_okrs` is already a Pydantic model, which can be serialized with .model_dump() / .json().
-        # `AgentMetrics` are dataclasses, use `asdict`.
-        final_results = {
-            "comprehensive_analysis_report": comprehensive_analysis_text,
-            "actionable_okrs_and_tasks": actionable_tasks_okrs.model_dump() if actionable_tasks_okrs else None, # Pydantic v2
-            "quarter": quarter,
-            "year": year,
-            "days_remaining": days_remaining,
-            # "actionable_okrs_and_tasks": actionable_tasks_okrs.dict() if actionable_tasks_okrs else None, # Pydantic v1
-            "detailed_metrics": {
-                "follower_agent": asdict(follower_agent_metrics) if follower_agent_metrics else None,
-                "post_agent": asdict(post_agent_metrics) if post_agent_metrics else None,
-                "mentions_agent": asdict(mentions_agent_metrics) if mentions_agent_metrics else None,
-            }
-        }
-        logger.info("Full analysis and task generation pipeline finished successfully.")
-        return final_results
-# Example usage (similar to the original script's main execution block)
-if __name__ == '__main__':
-    import asyncio
-    import os
-    from utils.logging_config import setup_logging
-    from utils.data_fetching import fetch_linkedin_data_from_bubble, VALID_DATA_TYPES
-    setup_logging() # Configure logging for the application
-    # --- Configuration ---
-    # Attempt to get API key from environment variable
-    # IMPORTANT: Set GOOGLE_API_KEY and BUBBLE_API_KEY in your environment for this to run.
-    GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
-    BUBBLE_API_KEY_ENV = os.environ.get("BUBBLE_API_KEY") # Used by data_fetching
-    if not GOOGLE_API_KEY:
-        logger.critical("GOOGLE_API_KEY environment variable not set. Orchestrator cannot initialize LLM agents.")
-        exit(1)
-    if not BUBBLE_API_KEY_ENV: # data_fetching will also check, but good to note here
-        logger.warning("BUBBLE_API_KEY environment variable not set. Data fetching from Bubble will fail.")
-        # You might want to exit or use mock data if Bubble is essential.
-    # Set the Google Vertex AI environment variable if not using Vertex AI (as in original)
-    os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "False"
-    # Orchestrator settings
-    ORG_URN_EXAMPLE = "urn:li:organization:19010008" # Example, replace with actual
-    # Specify a model or let orchestrator/agents use their defaults
-    # LLM_MODEL_FOR_ORCHESTRATION = "gemini-2.5-flash-preview-05-20" # Example: use a powerful model
-    LLM_MODEL_FOR_ORCHESTRATION = None # Let agents use their defaults or pass a specific one
-    # --- Initialize Orchestrator ---
-    orchestrator = EnhancedLinkedInAnalyticsOrchestrator(
-        api_key=GOOGLE_API_KEY,
-        llm_model_name=LLM_MODEL_FOR_ORCHESTRATION,
-        current_date_for_tasks=datetime.utcnow().date() # Use today for task planning
-    )
-    # --- Data Fetching ---
-    logger.info(f"Fetching data for organization URN: {ORG_URN_EXAMPLE}")
-    # Helper to fetch and log
-    def get_data(data_type: VALID_DATA_TYPES, org_urn: str) -> pd.DataFrame:
-        df, error = fetch_linkedin_data_from_bubble(org_urn=org_urn, data_type=data_type)
-        if error:
-            logger.error(f"Error fetching {data_type}: {error}. Using empty DataFrame.")
-            return pd.DataFrame()
-        if df is None: # Should not happen if error is None, but as a safeguard
-            logger.warning(f"Fetched {data_type} is None but no error reported. Using empty DataFrame.")
-            return pd.DataFrame()
-        logger.info(f"Successfully fetched {data_type} with {len(df)} rows.")
-        return df
-    follower_stats_df_raw = get_data("li_follower_stats", ORG_URN_EXAMPLE)
-    posts_df_raw = get_data("LI_posts", ORG_URN_EXAMPLE) # Contains post content, media_type, etc.
-    mentions_df_raw = get_data("Li_mentions", ORG_URN_EXAMPLE)
-    post_stats_df_raw = get_data("LI_post_stats", ORG_URN_EXAMPLE) # Contains engagement numbers for posts
-    # --- Data Preprocessing/Merging (as in original example) ---
-    # Select relevant columns for follower_stats_df
-    if not follower_stats_df_raw.empty:
-        follower_stats_df = follower_stats_df_raw[[
-            'category_name', "follower_count_organic", "follower_count_paid", "follower_count_type"
-        ]].copy()
-    else:
-        follower_stats_df = pd.DataFrame() # Ensure it's an empty DF if raw is empty
-    # Merge posts_df and post_stats_df
-    # This logic assumes 'id' in posts_df_raw and 'post_id' in post_stats_df_raw
-    merged_posts_df = pd.DataFrame()
-    if not posts_df_raw.empty and not post_stats_df_raw.empty:
-        if 'id' in posts_df_raw.columns and 'post_id' in post_stats_df_raw.columns:
-            # Ensure 'id' in posts_df_raw is unique before merge if it's a left table key
-            # posts_df_raw.drop_duplicates(subset=['id'], keep='first', inplace=True)
-            merged_posts_df = pd.merge(posts_df_raw, post_stats_df_raw, left_on='id', right_on='post_id', how='left', suffixes=('', '_stats'))
-            logger.info(f"Merged posts_df ({len(posts_df_raw)}) and post_stats_df ({len(post_stats_df_raw)}) into merged_posts_df ({len(merged_posts_df)}).")
-        else:
-            logger.warning("Cannot merge posts_df and post_stats_df due to missing 'id' or 'post_id'. Using posts_df_raw.")
-            merged_posts_df = posts_df_raw.copy() # Fallback to posts_df_raw
-    elif not posts_df_raw.empty:
-        logger.info("post_stats_df is empty. Using posts_df_raw for post analysis.")
-        merged_posts_df = posts_df_raw.copy()
-    else:
-        logger.warning("Both posts_df_raw and post_stats_df_raw are empty.")
-        merged_posts_df = pd.DataFrame() # Empty DF
-    # Select and ensure essential columns for merged_posts_df
-    # These are columns expected by EnhancedPostPerformanceAgent
-    expected_post_cols = [
-        'li_eb_label', 'media_type', 'is_ad', 'id', 'published_at', 'sentiment',
-        'engagement', 'impressionCount', 'clickCount', 'likeCount', 'commentCount', 'shareCount'
-    ]
-    if not merged_posts_df.empty:
-        final_post_df_cols = {}
-        for col in expected_post_cols:
-            if col in merged_posts_df.columns:
-                final_post_df_cols[col] = merged_posts_df[col]
-            elif f"{col}_stats" in merged_posts_df.columns: # Check for suffixed columns from merge
-                 final_post_df_cols[col] = merged_posts_df[f"{col}_stats"]
-            else:
-                logger.debug(f"Expected column '{col}' not found in merged_posts_df. Will be created as empty/default by agent if needed.")
-                # Agent preprocessing should handle missing columns by creating them with defaults (0 or 'Unknown')
-        # Create the final DataFrame with only the selected/available columns
-        # This ensures that if a column is missing, it doesn't cause an error here,
-        # but the agent's preprocessing will handle it.
-        # However, it's better to ensure they exist with NAs if the agent expects them.
-        temp_post_df = pd.DataFrame(final_post_df_cols)
-        # Ensure all expected columns are present, filling with NA if missing from selection
-        for col in expected_post_cols:
-            if col not in temp_post_df.columns:
-                temp_post_df[col] = pd.NA # Or appropriate default like 0 for numeric, 'Unknown' for categorical
-        merged_posts_df = temp_post_df[expected_post_cols].copy() # Ensure correct order and all columns
-    else: # If merged_posts_df started empty and stayed empty
-        merged_posts_df = pd.DataFrame(columns=expected_post_cols)
-    # Mentions DataFrame - select relevant columns if necessary, or pass as is
-    # Assuming mentions_df_raw is already in the correct shape or agent handles it.
-    # For example, if it needs specific columns:
-    # mentions_df = mentions_df_raw[['date', 'sentiment_label', 'mention_content']].copy() if not mentions_df_raw.empty else pd.DataFrame()
-    mentions_df = mentions_df_raw.copy() # Pass as is, agent will preprocess
-    # --- Run Orchestration ---
-    async def main_orchestration():
-        if follower_stats_df.empty and merged_posts_df.empty and mentions_df.empty:
-            logger.error("All input DataFrames are empty. Aborting orchestration.")
-            return None
-        logger.info("Orchestrator starting generate_full_analysis_and_tasks...")
-        results = await orchestrator.generate_full_analysis_and_tasks(
-            follower_stats_df=follower_stats_df,
-            post_df=merged_posts_df,
-            mentions_df=mentions_df
-        )
-        return results
-    orchestration_results = asyncio.run(main_orchestration())
-    # --- Output Results ---
-    if orchestration_results:
-        print("\n\n" + "="*30 + " COMPREHENSIVE ANALYSIS REPORT " + "="*30)
-        print(orchestration_results.get("comprehensive_analysis_report", "Report not generated."))
-        print("\n\n" + "="*30 + " ACTIONABLE TASKS (OKRs) " + "="*30)
-        okrs_data = orchestration_results.get("actionable_okrs_and_tasks")
-        if okrs_data:
-            # okrs_data is already a dict from .model_dump()
-            print(json.dumps(okrs_data, indent=2))
-        else:
-            print("No actionable tasks (OKRs) generated or an error occurred.")
-        print("\n\n" + "="*30 + " DETAILED AGENT METRICS " + "="*30)
-        detailed_metrics = orchestration_results.get("detailed_metrics", {})
-        for agent_name, metrics_dict in detailed_metrics.items():
-            print(f"\n--- {agent_name.replace('_', ' ').title()} Metrics ---")
-            if metrics_dict:
-                print(json.dumps(metrics_dict, indent=2, default=str)) # default=str for any non-serializable types
-            else:
-                print("Metrics not available for this agent.")
-    else:
-        logger.info("Orchestration did not produce results (likely due to empty input data).")
-    logger.info("Orchestration example finished.")

features/insight_and_tasks/utils/__init__.py DELETED Viewed

@@ -1,31 +0,0 @@
-# utils/__init__.py
-# This file makes the 'utils' directory a Python package.
-# You can choose to expose certain classes or functions directly at the package level
-# for easier importing, if desired.
-# For example:
-# from .retry_mechanism import RetryMechanism
-# from .pandasai_setup import configure_pandasai
-# from .data_fetching import fetch_linkedin_data_from_bubble
-# from .logging_config import setup_logging
-# Or, you can let users import them directly from the modules:
-# from utils.retry_mechanism import RetryMechanism
-# For now, keeping it simple and allowing module-level imports.
-# setup_logging() # Optionally call setup_logging() when the utils package is imported.
-                  # However, it's often better to call this explicitly at the application entry point.
-__all__ = [
-    "RetryMechanism",
-    "configure_pandasai",
-    "fetch_linkedin_data_from_bubble",
-    "setup_logging"
-]
-# Import them here to make them available when 'from utils import *' is used,
-# or for direct access like 'utils.RetryMechanism'.
-from .retry_mechanism import RetryMechanism
-from .pandasai_setup import configure_pandasai
-from .logging_config import setup_logging

features/insight_and_tasks/utils/logging_config.py DELETED Viewed

@@ -1,28 +0,0 @@
-# utils/logging_config.py
-import logging
-import os
-def setup_logging():
-    """
-    Configures basic logging for the application.
-    Logs to console.
-    """
-    log_level_str = os.environ.get("LOG_LEVEL", "INFO").upper()
-    log_level = getattr(logging, log_level_str, logging.INFO)
-    logging.basicConfig(
-        level=log_level,
-        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-        datefmt="%Y-%m-%d %H:%M:%S"
-    )
-    # You can also direct logs to a file if needed:
-    # file_handler = logging.FileHandler("app.log")
-    # file_handler.setLevel(log_level)
-    # file_handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
-    # logging.getLogger().addHandler(file_handler)
-    # Silence overly verbose libraries if necessary
-    # logging.getLogger("some_verbose_library").setLevel(logging.WARNING)
-    logger = logging.getLogger(__name__)
-    logger.info(f"Logging configured with level: {log_level_str}")

features/insight_and_tasks/utils/pandasai_setup.py DELETED Viewed

@@ -1,54 +0,0 @@
-# utils/pandasai_setup.py
-import os
-import logging
-import pandasai as pai
-from pandasai_litellm import LiteLLM # Ensure this import matches your installed library
-# Configure logger for this module
-logger = logging.getLogger(__name__)
-# It's good practice to define constants at the top or in a config file
-DEFAULT_PANDASAI_MODEL = "gemini/gemini-2.5-flash-preview-05-20" # Using a common default
-def configure_pandasai(api_key: str, model_name: str = None):
-    """
-    Configures PandasAI with LiteLLM using the provided API key and model.
-    Args:
-        api_key: The Google API key.
-        model_name: The specific model to use (e.g., "gemini/gemini-1.5-flash-latest").
-                    If None, uses DEFAULT_PANDASAI_MODEL.
-    """
-    if not api_key:
-        logger.error("PandasAI Configuration Error: API key is missing.")
-        # Depending on strictness, you might raise an error or just log
-        # raise ValueError("API key must be provided for PandasAI configuration")
-        return
-    os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "False"
-    os.environ["GOOGLE_API_KEY"] = api_key
-    selected_model = model_name if model_name else DEFAULT_PANDASAI_MODEL
-    try:
-        llm = LiteLLM(
-            model=DEFAULT_PANDASAI_MODEL , # Use the selected model
-            api_key=api_key
-        )
-        # PandasAI configuration
-        pai.config.set({
-            "llm": llm,
-            "temperature": 0.3,  # Lower temperature for more consistent results
-            "max_retries": 3
-        })
-        logger.info(f"PandasAI configured successfully with model: {selected_model}")
-        logger.info(f"PandasAI LLM object: {llm}")
-    except ImportError:
-        logger.error("PandasAI or pandasai_litellm is not installed. Please install the required packages.")
-    except Exception as e:
-        logger.error(f"Error configuring PandasAI: {e}", exc_info=True)

features/insight_and_tasks/utils/retry_mechanism.py DELETED Viewed

@@ -1,61 +0,0 @@
-# utils/retry_mechanism.py
-import time
-import logging
-from typing import Callable, Any, Tuple
-# Configure logger for this module
-logger = logging.getLogger(__name__)
-class RetryMechanism:
-    """External retry mechanism with exponential backoff"""
-    @staticmethod
-    def retry_with_backoff(
-        func: Callable,
-        max_retries: int = 3,
-        base_delay: float = 1.0,
-        exceptions: Tuple[type[Exception], ...] = (Exception,) # More specific type hint
-    ) -> Any:
-        """
-        Retries a function call with exponential backoff.
-        Args:
-            func: The function to call.
-            max_retries: Maximum number of retries.
-            base_delay: Base delay in seconds for backoff.
-            exceptions: A tuple of exception types to catch and retry on.
-        Returns:
-            The result of the function call if successful.
-        Raises:
-            The last exception encountered if all retries fail.
-        """
-        last_exception = None
-        current_delay = base_delay
-        for attempt in range(max_retries + 1):  # +1 for initial attempt
-            try:
-                logger.info(f"Attempt {attempt + 1}/{max_retries + 1} for function {func.__name__}")
-                result = func()
-                if attempt > 0: # Log if a retry was successful
-                    logger.info(f"Function {func.__name__} succeeded on attempt {attempt + 1}")
-                return result
-            except exceptions as e:
-                last_exception = e
-                logger.warning(f"Attempt {attempt + 1} for {func.__name__} failed: {str(e)}")
-                if attempt < max_retries:
-                    logger.info(f"Waiting {current_delay:.2f} seconds before retrying {func.__name__}...")
-                    time.sleep(current_delay)
-                    current_delay *= 2  # Exponential backoff
-                else:
-                    logger.error(f"All {max_retries + 1} attempts for {func.__name__} failed.")
-        # If loop finishes, all retries failed, raise the last exception
-        if last_exception is not None:
-            raise last_exception
-        else:
-            # This case should ideally not be reached if func always raises on failure
-            # or returns successfully. Added for completeness.
-            raise RuntimeError(f"Function {func.__name__} failed after all retries without a specific exception.")