Spaces:

GuglielmoTor
/

LinkedinMonitor

Sleeping

App Files Files Community

GuglielmoTor commited on Jun 4

Commit

bf08717

verified ·

1 Parent(s): f3a46a9

Update features/insight_and_tasks/agents/task_extraction_agent.py

Browse files

Files changed (1) hide show

features/insight_and_tasks/agents/task_extraction_agent.py +134 -48

features/insight_and_tasks/agents/task_extraction_agent.py CHANGED Viewed

@@ -43,7 +43,7 @@ class TaskExtractionAgent:
             current_date: The current date to use for quarter calculations. Defaults to today.
         """
         self.api_key = api_key # Store if needed by LlmAgent or other components
-        self.model_name = model_name or DEFAULT_AGENT_MODEL
         self.current_date = current_date or datetime.utcnow().date() # Use date object for consistency
         # LlmAgent is initialized with dynamic instruction and output schema
@@ -80,63 +80,149 @@ class TaskExtractionAgent:
         return max(0, days_remaining) # Ensure non-negative
     def _get_instruction_prompt(self) -> str:
-        """Generates the dynamic instruction string for the LlmAgent."""
         quarter = self._get_quarter(self.current_date)
         days_remaining = self._days_until_quarter_end(self.current_date)
-        # Dynamically include Pydantic model field descriptions for better LLM guidance
-        # This part can be complex if done fully automatically. For now, manually summarizing key fields.
-        task_fields_summary = (
-            "Each Task must include: task_category (e.g., Content Strategy), task_description, "
-            "objective_deliverable, effort (Small, Medium, Large), timeline (Immediate, Short-term, Medium-term, Long-term), "
-            "responsible_party, success_criteria_metrics, dependencies_prerequisites (optional), "
-            "priority (High, Medium, Low) with priority_justification, why_proposed (linking to analysis), "
-            "task_type (initiative or tracking), data_subject (for tracking tasks: follower_stats, posts, mentions, general)."
-        )
         return f"""
-        You are a Time-Aware Task Extraction Specialist. Your primary function is to meticulously analyze strategic insights
-        derived from LinkedIn analytics and transform them into a structured set of actionable tasks. These tasks should be
-        organized within an Objectives and Key Results (OKRs) framework.
-        CURRENT CONTEXTUAL INFORMATION (DO NOT CHANGE THIS IN YOUR OUTPUT):
         - Current Quarter: Q{quarter}
         - Days remaining in current quarter: {days_remaining}
-        - Today's Date (for context): {self.current_date.isoformat()}
         YOUR MANDATE:
-        1.  Define clear, aspirational Objectives (qualitative goals).
-        2.  For each Objective, formulate 2-3 specific, measurable Key Results.
-        3.  Under each Key Result, list detailed, actionable Tasks required to achieve it.
-        4.  CRITICAL: Each Task MUST strictly adhere to the 'Task' Pydantic model fields. This means providing values for ALL required fields: {task_fields_summary}
-        5.  Task Timelines: Must be realistic given the {days_remaining} days left in Q{quarter}. Prioritize actions that can make significant progress or be completed within this timeframe. Use TimelineCategory enum values.
-        6.  Task Types: Clearly distinguish between 'initiative' (new actions/projects) and 'tracking' (ongoing monitoring/measurement).
-        7.  Data Subjects: For 'tracking' tasks, accurately specify the relevant 'data_subject'. For 'initiative' tasks, this can be 'general' or null if not specific to one data type.
-        8.  Rationale ('why_proposed'): This is crucial. Each task's proposal must be explicitly justified by and linked back to specific findings, trends, or recommendations mentioned in the input 'comprehensive_analysis'.
-        9.  Priority: Assign a priority (High, Medium, Low) to each task and provide a 'priority_justification'.
-        INPUT: You will receive a 'comprehensive_analysis' text.
         OUTPUT FORMAT:
-        You MUST return a single JSON object that strictly conforms to the 'TaskExtractionOutput' Pydantic schema.
-        This JSON object will contain:
-        - 'current_quarter_info': A string exactly like "Q{quarter}, {days_remaining} days remaining". (This is fixed based on the context above).
-        - 'okrs': A list, where each item is an 'OKR' object.
-        - 'overall_strategic_focus': (Optional) A brief summary of the main strategic themes emerging from the OKRs.
-        - 'generation_timestamp': (This will be auto-filled if you conform to the schema, or you can provide an ISO timestamp).
-        Example of a Task (ensure all fields from the Pydantic model are covered):
         {{
-            "task_category": "Content Creation",
-            "task_description": "Launch a 3-part blog series on AI in Marketing.",
-            "objective_deliverable": "Objective: Increase thought leadership in AI marketing. Deliverable: 3 published blog posts.",
-            "effort": "Medium",
-            "timeline": "Short-term",
-            "responsible_party": "Content Team Lead",
-            "success_criteria_metrics": "Average 500 views per post, 10+ shares per post.",
-            "dependencies_prerequisites": "Keyword research for AI marketing topics completed.",
-            "priority": "High",
-            "priority_justification": "Addresses key strategic goal of establishing AI expertise.",
-            "why_proposed": "Analysis highlighted a gap in content related to AI, a high-interest area for our target audience.",
-            "task_type": "initiative",
-            "data_subject": "general"
         }}
-        Focus on quality, actionability, and strict adherence to the output schema.
         """
     async def extract_tasks(self, comprehensive_analysis: str) -> TaskExtractionOutput:

             current_date: The current date to use for quarter calculations. Defaults to today.
         """
         self.api_key = api_key # Store if needed by LlmAgent or other components
+        self.model_name = "gemini-2.5-pro-preview-03-25" #model_name or DEFAULT_AGENT_MODEL
         self.current_date = current_date or datetime.utcnow().date() # Use date object for consistency
         # LlmAgent is initialized with dynamic instruction and output schema
         return max(0, days_remaining) # Ensure non-negative
     def _get_instruction_prompt(self) -> str:
+        """Generates the dynamic instruction string for the LLM agent."""
         quarter = self._get_quarter(self.current_date)
         days_remaining = self._days_until_quarter_end(self.current_date)
+        # Dynamically get enum values to include in the prompt
+        effort_levels_str = _get_enum_values_str(EffortLevel)
+        task_types_str = _get_enum_values_str(TaskType)
+        data_subjects_str = _get_enum_values_str(DataSubject)
+        timeline_categories_str = _get_enum_values_str(TimelineCategory)
+        priority_levels_str = _get_enum_values_str(PriorityLevel)
+        # Detailed schema descriptions based on your Pydantic models.
+        # These are manually transcribed from your model descriptions for this example.
+        # For maximum robustness, consider a helper function to generate these
+        # strings directly by introspecting your Pydantic models if they change frequently.
+        task_schema_details = f"""
+        'Task' Model Schema: Represents a single, actionable item.
+        (Refer to your Pydantic 'Task' model for exact field definitions and descriptions)
+        - task_category (string, required): The broader strategic category or theme (e.g., Content Strategy, Audience Engagement, Reputation Management, Performance Monitoring). Helps in organizing and reporting tasks.
+        - task_description (string, required): A concise, clear, and actionable description of what needs to be done for this specific task.
+        - objective_deliverable (string, required): The specific, measurable outcome or output expected from completing this task. Clearly defines what 'done' looks like.
+        - effort (string, required): An estimation of the resources (time, complexity) required. Allowed values: {effort_levels_str}.
+        - timeline (string, required): The projected timeframe for completing this task. Allowed values: {timeline_categories_str}.
+        - responsible_party (string, required): The designated team, role, or individual accountable for execution.
+        - success_criteria_metrics (string, required): Specific, measurable criteria and KPIs to determine task success.
+        - dependencies_prerequisites (string, optional): Any tasks, resources, or conditions that must be met before this task can begin or be completed. If not applicable, omit or use null.
+        - priority (string, required): The assigned priority level. Allowed values: {priority_levels_str}.
+        - priority_justification (string, required): A brief rationale for the assigned priority level, linking to impact or urgency.
+        - why_proposed (string, required): The core reason for proposing this task, clearly linking back to specific findings or insights from the data analysis.
+        - task_type (string, required): Classifies the task. Allowed values: {task_types_str}.
+        - data_subject (string, conditional): For 'tracking' tasks, specifies the primary data domain (e.g., follower_stats, posts). Allowed values: {data_subjects_str}. This field MUST be specified if 'task_type' is '{TaskType.TRACKING.value}'. For '{TaskType.INITIATIVE.value}' tasks, it can be '{DataSubject.GENERAL.value}', null, or omitted if not specific.
+        """
+        key_result_schema_details = f"""
+        'KeyResult' Model Schema: A specific, measurable outcome contributing to an Objective.
+        (Refer to your Pydantic 'KeyResult' model for exact field definitions and descriptions)
+        - key_result_description (string, required): A clear, specific, measurable, achievable, relevant, and time-bound (SMART) description of the desired outcome.
+        - tasks (array of 'Task' objects, required, can be empty if no tasks defined yet): A list of specific, actionable tasks to achieve this key result.
+        - target_metric (string, optional): The primary metric to quantify achievement (e.g., 'Follower Growth Rate').
+        - target_value (string, optional): The specific target for the 'target_metric' (e.g., '5%', '1000 new followers').
+        """
+        okr_schema_details = f"""
+        'OKR' Model Schema: Defines an Objective and its associated Key Results.
+        (Refer to your Pydantic 'OKR' model for exact field definitions and descriptions)
+        - objective_description (string, required): A high-level, qualitative, and aspirational goal.
+        - key_results (array of 'KeyResult' objects, required): A list of 2-5 specific and measurable Key Results that define success for the objective.
+        - objective_timeline (string, required): The overall projected timeline category for achieving this objective. Allowed values: {timeline_categories_str}.
+        - objective_owner (string, optional): The team or individual primarily responsible for this objective.
+        """
+        task_extraction_output_schema_details = f"""
+        'TaskExtractionOutput' Model Schema: This is the root JSON object you MUST return.
+        (Refer to your Pydantic 'TaskExtractionOutput' model for exact field definitions and descriptions)
+        - current_quarter_info (string, required): Information about the current quarter. YOU MUST USE THIS EXACT VALUE: "Q{quarter}, {days_remaining} days remaining".
+        - okrs (array of 'OKR' objects, required): A list of Objectives and Key Results derived from the analysis.
+        - overall_strategic_focus (string, optional): A brief summary of the main strategic focus areas identified.
+        - generation_timestamp (string, required, ISO 8601 format e.g., "YYYY-MM-DDTHH:MM:SS.ffffffZ"): Timestamp of when this output was generated. You should generate this.
+        """
         return f"""
+        You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and KeyResults (OKRs) framework.
+        CURRENT CONTEXTUAL INFORMATION (CRITICAL - Use these exact values in your output where specified):
         - Current Quarter: Q{quarter}
         - Days remaining in current quarter: {days_remaining}
+        - Today's Date (for your context only, not for direct output unless specified by a schema field): {self.current_date.isoformat()}
         YOUR MANDATE:
+        1.  Thoroughly analyze the provided 'comprehensive_analysis' text.
+        2.  Define clear, aspirational Objectives. These become the 'objective_description' in 'OKR' objects.
+        3.  For each Objective, formulate 2-3 specific, measurable Key Results. These populate the 'key_results' list within each 'OKR' object.
+        4.  Under each KeyResult, detail the actionable Tasks required to achieve it. These populate the 'tasks' list within each 'KeyResult' object.
+        5.  Strict Schema Adherence: Your entire output MUST be a single, valid JSON object that strictly conforms to the 'TaskExtractionOutput' Pydantic schema. All nested objects ('OKR', 'KeyResult', 'Task') MUST also strictly conform to their respective schemas. Pay extremely close attention to required fields, data types (string, array, etc.), and valid enum values.
+        DETAILED SCHEMA DEFINITIONS (Your output MUST precisely follow these structures):
+        {task_extraction_output_schema_details}
+        {okr_schema_details}
+        {key_result_schema_details}
+        {task_schema_details}
+        KEY GUIDELINES FOR QUALITY AND ACCURACY:
+        - Task Timelines: Must be realistic considering the {days_remaining} days left in Q{quarter}. Prioritize actions that can achieve significant progress or be completed within this timeframe. Use 'timeline' values exclusively from: {timeline_categories_str}.
+        - Task Types: Clearly distinguish between '{TaskType.INITIATIVE.value}' (new actions/projects) and '{TaskType.TRACKING.value}' (ongoing monitoring/measurement). Use 'task_type' values exclusively from: {task_types_str}.
+        - Data Subjects for Tracking: If 'task_type' is '{TaskType.TRACKING.value}', the 'data_subject' field is MANDATORY and must be one of: {data_subjects_str}. For '{TaskType.INITIATIVE.value}' tasks, 'data_subject' can be '{DataSubject.GENERAL.value}', null, or omitted if not specific to one data type.
+        - Rationale ('why_proposed'): This is CRUCIAL. Each task's 'why_proposed' field must provide a clear justification, explicitly linking the task back to specific findings, trends, or recommendations mentioned in the input 'comprehensive_analysis'. Avoid generic statements.
+        - Priority & Justification: Assign a 'priority' (from {priority_levels_str}) to each task and provide a concise 'priority_justification' explaining its importance.
+        - Actionability: All descriptions (Objective, Key Result, Task) must be clear, concise, and define concrete actions or measurable outcomes.
+        - Measurability: Key Results and Task 'success_criteria_metrics' must be specific and quantifiable.
+        - Completeness: Ensure all REQUIRED fields in every Pydantic model are present in your JSON output. Optional fields can be omitted or set to null if not applicable.
+        INPUT:
+        You will receive a 'comprehensive_analysis' text.
         OUTPUT FORMAT:
+        You MUST return a SINGLE JSON object. This object must be a valid instance of the 'TaskExtractionOutput' Pydantic schema.
+        Example of the overall JSON structure (content is illustrative; refer to schemas for full details):
         {{
+          "current_quarter_info": "Q{quarter}, {days_remaining} days remaining",
+          "okrs": [
+            {{
+              "objective_description": "Example: Elevate brand visibility and engagement across key digital channels.",
+              "objective_timeline": "{TimelineCategory.SHORT_TERM.value}",
+              "objective_owner": "Marketing Department",
+              "key_results": [
+                {{
+                  "key_result_description": "Example: Increase organic reach on LinkedIn by 15%.",
+                  "target_metric": "LinkedIn Organic Reach Percentage Increase",
+                  "target_value": "15%",
+                  "tasks": [
+                    // ... Array of Task objects, each following the 'Task' schema ...
+                    // See detailed Task example below.
+                  ]
+                }}
+              ]
+            }}
+          ],
+          "overall_strategic_focus": "Example: Focus on data-driven content strategy and proactive community engagement to boost Q{quarter} performance.", // Optional
+          "generation_timestamp": "{datetime.utcnow().isoformat()}Z" // Generate an ISO 8601 UTC timestamp
         }}
+        Detailed Example of a single 'Task' object (ensure all fields are covered as per schema):
+        {{
+          "task_category": "Content Strategy",
+          "task_description": "Develop and schedule a 4-week content calendar for LinkedIn focusing on industry insights.",
+          "objective_deliverable": "Deliverable: A finalized 4-week content calendar with 3 posts per week, approved and scheduled.",
+          "effort": "{EffortLevel.MEDIUM.value}",
+          "timeline": "{TimelineCategory.IMMEDIATE.value}",
+          "responsible_party": "Content Marketing Manager",
+          "success_criteria_metrics": "Content calendar completed and approved by [Date]. All posts scheduled by [Date].",
+          "dependencies_prerequisites": "Completion of Q{quarter} keyword research and audience persona refinement.",
+          "priority": "{PriorityLevel.HIGH.value}",
+          "priority_justification": "Critical for maintaining consistent brand voice and achieving engagement targets for the quarter.",
+          "why_proposed": "Analysis of LinkedIn insights report (Page 3) showed a 20% drop in engagement last month, attributed to inconsistent posting schedule and lack of targeted content themes.",
+          "task_type": "{TaskType.INITIATIVE.value}",
+          "data_subject": "{DataSubject.POSTS.value}" // Could be 'general' for broader initiatives too
+        }}
+        Focus on precision, quality, actionability, and strict adherence to the specified JSON output schema and all constraints.
+        Ensure all string values in the JSON are properly escaped if they contain special characters (e.g., newlines, quotes).
         """
     async def extract_tasks(self, comprehensive_analysis: str) -> TaskExtractionOutput: