Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on 19 days ago

Commit

9edb5c4

verified ·

1 Parent(s): af334ac

Update features/insight_and_tasks/agents/task_extraction_agent.py

Browse files

Files changed (1) hide show

features/insight_and_tasks/agents/task_extraction_agent.py +22 -99

features/insight_and_tasks/agents/task_extraction_agent.py CHANGED Viewed

@@ -1,5 +1,4 @@
 # agents/task_extraction_agent.py
-from enum import Enum
 import logging
 from typing import Optional
 from datetime import datetime, date # Ensure date is imported if used for type hints
@@ -27,10 +26,6 @@ logger = logging.getLogger(__name__)
 DEFAULT_AGENT_MODEL = "gemini-2.5-flash-preview-05-20" # Or your specific model
-def _get_enum_values_str(enum_class: type[Enum]) -> str:
-    """Helper to format enum values for inclusion in the prompt."""
-    return ", ".join([f"'{e.value}'" for e in enum_class])
 class TaskExtractionAgent:
     """
     Agent specialized in extracting actionable tasks and OKRs from analysis insights,
@@ -48,7 +43,7 @@ class TaskExtractionAgent:
             current_date: The current date to use for quarter calculations. Defaults to today.
         """
         self.api_key = api_key # Store if needed by LlmAgent or other components
-        self.model_name = "gemini-1.5-pro" #model_name or DEFAULT_AGENT_MODEL
         self.current_date = current_date or datetime.utcnow().date() # Use date object for consistency
         # LlmAgent is initialized with dynamic instruction and output schema
@@ -89,61 +84,6 @@ class TaskExtractionAgent:
         quarter = self._get_quarter(self.current_date)
         days_remaining = self._days_until_quarter_end(self.current_date)
-        # Dynamically get enum values to include in the prompt
-        effort_levels_str = _get_enum_values_str(EffortLevel)
-        task_types_str = _get_enum_values_str(TaskType)
-        data_subjects_str = _get_enum_values_str(DataSubject)
-        timeline_categories_str = _get_enum_values_str(TimelineCategory)
-        priority_levels_str = _get_enum_values_str(PriorityLevel)
-        # Detailed schema descriptions based on your Pydantic models.
-        # These are manually transcribed from your model descriptions for this example.
-        # For maximum robustness, consider a helper function to generate these
-        # strings directly by introspecting your Pydantic models if they change frequently.
-        task_schema_details = f"""
-        'Task' Model Schema: Represents a single, actionable item.
-        (Refer to your Pydantic 'Task' model for exact field definitions and descriptions)
-        - task_category (string, required): The broader strategic category or theme (e.g., Content Strategy, Audience Engagement, Reputation Management, Performance Monitoring). Helps in organizing and reporting tasks.
-        - task_description (string, required): A concise, clear, and actionable description of what needs to be done for this specific task.
-        - objective_deliverable (string, required): The specific, measurable outcome or output expected from completing this task. Clearly defines what 'done' looks like.
-        - effort (string, required): An estimation of the resources (time, complexity) required. Allowed values: {effort_levels_str}.
-        - timeline (string, required): The projected timeframe for completing this task. Allowed values: {timeline_categories_str}.
-        - responsible_party (string, required): The designated team, role, or individual accountable for execution.
-        - success_criteria_metrics (string, required): Specific, measurable criteria and KPIs to determine task success.
-        - dependencies_prerequisites (string, optional): Any tasks, resources, or conditions that must be met before this task can begin or be completed. If not applicable, omit or use null.
-        - priority (string, required): The assigned priority level. Allowed values: {priority_levels_str}.
-        - priority_justification (string, required): A brief rationale for the assigned priority level, linking to impact or urgency.
-        - why_proposed (string, required): The core reason for proposing this task, clearly linking back to specific findings or insights from the data analysis.
-        - task_type (string, required): Classifies the task. Allowed values: {task_types_str}.
-        - data_subject (string, conditional): For 'tracking' tasks, specifies the primary data domain (e.g., follower_stats, posts). Allowed values: {data_subjects_str}. This field MUST be specified if 'task_type' is '{TaskType.TRACKING.value}'. For '{TaskType.INITIATIVE.value}' tasks, it can be '{DataSubject.GENERAL.value}', null, or omitted if not specific.
-        """
-        key_result_schema_details = f"""
-        'KeyResult' Model Schema: A specific, measurable outcome contributing to an Objective.
-        (Refer to your Pydantic 'KeyResult' model for exact field definitions and descriptions)
-        - key_result_description (string, required): A clear, specific, measurable, achievable, relevant, and time-bound (SMART) description of the desired outcome.
-        - tasks (array of 'Task' objects, required, can be empty if no tasks defined yet): A list of specific, actionable tasks to achieve this key result.
-        - target_metric (string, optional): The primary metric to quantify achievement (e.g., 'Follower Growth Rate').
-        - target_value (string, optional): The specific target for the 'target_metric' (e.g., '5%', '1000 new followers').
-        """
-        okr_schema_details = f"""
-        'OKR' Model Schema: Defines an Objective and its associated Key Results.
-        (Refer to your Pydantic 'OKR' model for exact field definitions and descriptions)
-        - objective_description (string, required): A high-level, qualitative, and aspirational goal.
-        - key_results (array of 'KeyResult' objects, required): A list of 2-5 specific and measurable Key Results that define success for the objective.
-        - objective_timeline (string, required): The overall projected timeline category for achieving this objective. Allowed values: {timeline_categories_str}.
-        - objective_owner (string, optional): The team or individual primarily responsible for this objective.
-        """
-        task_extraction_output_schema_details = f"""
-        'TaskExtractionOutput' Model Schema: This is the root JSON object you MUST return.
-        (Refer to your Pydantic 'TaskExtractionOutput' model for exact field definitions and descriptions)
-        - current_quarter_info (string, required): Information about the current quarter. YOU MUST USE THIS EXACT VALUE: "Q{quarter}, {days_remaining} days remaining".
-        - okrs (array of 'OKR' objects, required): A list of Objectives and Key Results derived from the analysis.
-        - overall_strategic_focus (string, optional): A brief summary of the main strategic focus areas identified.
-        - generation_timestamp (string, required, ISO 8601 format e.g., "YYYY-MM-DDTHH:MM:SS.ffffffZ"): Timestamp of when this output was generated. You should generate this.
-        """
         return f"""
         You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and KeyResults (OKRs) framework.
@@ -153,21 +93,9 @@ class TaskExtractionAgent:
         - Days remaining in current quarter: {days_remaining}
         - Today's Date (for your context only, not for direct output unless specified by a schema field): {self.current_date.isoformat()}
-        YOUR MANDATE:
-        1.  Thoroughly analyze the provided 'comprehensive_analysis' text.
-        2.  Define clear, aspirational Objectives. These become the 'objective_description' in 'OKR' objects.
-        3.  For each Objective, formulate 2-3 specific, measurable Key Results. These populate the 'key_results' list within each 'OKR' object.
-        4.  Under each KeyResult, detail the actionable Tasks required to achieve it. These populate the 'tasks' list within each 'KeyResult' object.
-        5.  Strict Schema Adherence: Your entire output MUST be a single, valid JSON object that strictly conforms to the 'TaskExtractionOutput' Pydantic schema. All nested objects ('OKR', 'KeyResult', 'Task') MUST also strictly conform to their respective schemas. Pay extremely close attention to required fields, data types (string, array, etc.), and valid enum values.
-        DETAILED SCHEMA DEFINITIONS (Your output MUST precisely follow these structures):
-        {task_extraction_output_schema_details}
-        {okr_schema_details}
-        {key_result_schema_details}
-        {task_schema_details}
         KEY GUIDELINES FOR QUALITY AND ACCURACY:
         - Task Timelines: Must be realistic considering the {days_remaining} days left in Q{quarter}. Prioritize actions that can achieve significant progress or be completed within this timeframe. Use 'timeline' values exclusively from: {timeline_categories_str}.
@@ -183,8 +111,7 @@ class TaskExtractionAgent:
         You will receive a 'comprehensive_analysis' text.
         OUTPUT FORMAT:
-        You MUST return a SINGLE JSON object. This object must be a valid instance of the 'TaskExtractionOutput' Pydantic schema.
-        Example of the overall JSON structure (content is illustrative; refer to schemas for full details):
         {{
           "current_quarter_info": "Q{quarter}, {days_remaining} days remaining",
           "okrs": [
@@ -198,32 +125,28 @@ class TaskExtractionAgent:
                   "target_metric": "LinkedIn Organic Reach Percentage Increase",
                   "target_value": "15%",
                   "tasks": [
-                    // ... Array of Task objects, each following the 'Task' schema ...
-                    // See detailed Task example below.
                   ]
                 }}
               ]
             }}
           ],
-          "overall_strategic_focus": "Example: Focus on data-driven content strategy and proactive community engagement to boost Q{quarter} performance.", // Optional
-          "generation_timestamp": "{datetime.utcnow().isoformat()}Z" // Generate an ISO 8601 UTC timestamp
-        }}
-        Detailed Example of a single 'Task' object (ensure all fields are covered as per schema):
-        {{
-          "task_category": "Content Strategy",
-          "task_description": "Develop and schedule a 4-week content calendar for LinkedIn focusing on industry insights.",
-          "objective_deliverable": "Deliverable: A finalized 4-week content calendar with 3 posts per week, approved and scheduled.",
-          "effort": "{EffortLevel.MEDIUM.value}",
-          "timeline": "{TimelineCategory.IMMEDIATE.value}",
-          "responsible_party": "Content Marketing Manager",
-          "success_criteria_metrics": "Content calendar completed and approved by [Date]. All posts scheduled by [Date].",
-          "dependencies_prerequisites": "Completion of Q{quarter} keyword research and audience persona refinement.",
-          "priority": "{PriorityLevel.HIGH.value}",
-          "priority_justification": "Critical for maintaining consistent brand voice and achieving engagement targets for the quarter.",
-          "why_proposed": "Analysis of LinkedIn insights report (Page 3) showed a 20% drop in engagement last month, attributed to inconsistent posting schedule and lack of targeted content themes.",
-          "task_type": "{TaskType.INITIATIVE.value}",
-          "data_subject": "{DataSubject.POSTS.value}" // Could be 'general' for broader initiatives too
         }}
         Focus on precision, quality, actionability, and strict adherence to the specified JSON output schema and all constraints.

 # agents/task_extraction_agent.py
 import logging
 from typing import Optional
 from datetime import datetime, date # Ensure date is imported if used for type hints
 DEFAULT_AGENT_MODEL = "gemini-2.5-flash-preview-05-20" # Or your specific model
 class TaskExtractionAgent:
     """
     Agent specialized in extracting actionable tasks and OKRs from analysis insights,
             current_date: The current date to use for quarter calculations. Defaults to today.
         """
         self.api_key = api_key # Store if needed by LlmAgent or other components
+        self.model_name = model_name or DEFAULT_AGENT_MODEL
         self.current_date = current_date or datetime.utcnow().date() # Use date object for consistency
         # LlmAgent is initialized with dynamic instruction and output schema
         quarter = self._get_quarter(self.current_date)
         days_remaining = self._days_until_quarter_end(self.current_date)
         return f"""
         You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and KeyResults (OKRs) framework.
         - Days remaining in current quarter: {days_remaining}
         - Today's Date (for your context only, not for direct output unless specified by a schema field): {self.current_date.isoformat()}
+         For EACH 'OKR' object, you MUST generate a 'key_results' array containing 1 to 3 'KeyResult' objects.
+         For EACH 'KeyResult' object, you MUST generate a 'tasks' array containing 1 to 3 'Task' objects.
+         It is CRITICAL that you populate the 'key_results' list for every OKR, and the 'tasks' list for every KeyResult.
         KEY GUIDELINES FOR QUALITY AND ACCURACY:
         - Task Timelines: Must be realistic considering the {days_remaining} days left in Q{quarter}. Prioritize actions that can achieve significant progress or be completed within this timeframe. Use 'timeline' values exclusively from: {timeline_categories_str}.
         You will receive a 'comprehensive_analysis' text.
         OUTPUT FORMAT:
+        # Example of the overall JSON structure (content is illustrative; refer to schemas for full details):
         {{
           "current_quarter_info": "Q{quarter}, {days_remaining} days remaining",
           "okrs": [
                   "target_metric": "LinkedIn Organic Reach Percentage Increase",
                   "target_value": "15%",
                   "tasks": [
+                    {{
+                      "task_category": "Content Strategy",
+                      "task_description": "Develop and schedule a 4-week content calendar for LinkedIn focusing on industry insights.",
+                      "objective_deliverable": "Deliverable: A finalized 4-week content calendar with 3 posts per week, approved and scheduled.",
+                      "effort": "{EffortLevel.MEDIUM.value}",
+                      "timeline": "{TimelineCategory.IMMEDIATE.value}",
+                      "responsible_party": "Content Marketing Manager",
+                      "success_criteria_metrics": "Content calendar completed and approved by [Date]. All posts scheduled by [Date].",
+                      "dependencies_prerequisites": "Completion of Q{quarter} keyword research and audience persona refinement.",
+                      "priority": "{PriorityLevel.HIGH.value}",
+                      "priority_justification": "Critical for maintaining consistent brand voice and achieving engagement targets for the quarter.",
+                      "why_proposed": "Analysis of LinkedIn insights report (Page 3) showed a 20% drop in engagement last month, attributed to inconsistent posting schedule and lack of targeted content themes.",
+                      "task_type": "{TaskType.INITIATIVE.value}",
+                      "data_subject": "{DataSubject.POSTS.value}"
+                    }}
                   ]
                 }}
               ]
             }}
           ],
+          "overall_strategic_focus": "Example: Focus on data-driven content strategy and proactive community engagement to boost Q{quarter} performance.",
+          "generation_timestamp": "{datetime.utcnow().isoformat()}Z"
         }}
         Focus on precision, quality, actionability, and strict adherence to the specified JSON output schema and all constraints.