GuglielmoTor commited on
Commit
9edb5c4
·
verified ·
1 Parent(s): af334ac

Update features/insight_and_tasks/agents/task_extraction_agent.py

Browse files
features/insight_and_tasks/agents/task_extraction_agent.py CHANGED
@@ -1,5 +1,4 @@
1
  # agents/task_extraction_agent.py
2
- from enum import Enum
3
  import logging
4
  from typing import Optional
5
  from datetime import datetime, date # Ensure date is imported if used for type hints
@@ -27,10 +26,6 @@ logger = logging.getLogger(__name__)
27
 
28
  DEFAULT_AGENT_MODEL = "gemini-2.5-flash-preview-05-20" # Or your specific model
29
 
30
- def _get_enum_values_str(enum_class: type[Enum]) -> str:
31
- """Helper to format enum values for inclusion in the prompt."""
32
- return ", ".join([f"'{e.value}'" for e in enum_class])
33
-
34
  class TaskExtractionAgent:
35
  """
36
  Agent specialized in extracting actionable tasks and OKRs from analysis insights,
@@ -48,7 +43,7 @@ class TaskExtractionAgent:
48
  current_date: The current date to use for quarter calculations. Defaults to today.
49
  """
50
  self.api_key = api_key # Store if needed by LlmAgent or other components
51
- self.model_name = "gemini-1.5-pro" #model_name or DEFAULT_AGENT_MODEL
52
  self.current_date = current_date or datetime.utcnow().date() # Use date object for consistency
53
 
54
  # LlmAgent is initialized with dynamic instruction and output schema
@@ -89,61 +84,6 @@ class TaskExtractionAgent:
89
  quarter = self._get_quarter(self.current_date)
90
  days_remaining = self._days_until_quarter_end(self.current_date)
91
 
92
- # Dynamically get enum values to include in the prompt
93
- effort_levels_str = _get_enum_values_str(EffortLevel)
94
- task_types_str = _get_enum_values_str(TaskType)
95
- data_subjects_str = _get_enum_values_str(DataSubject)
96
- timeline_categories_str = _get_enum_values_str(TimelineCategory)
97
- priority_levels_str = _get_enum_values_str(PriorityLevel)
98
-
99
- # Detailed schema descriptions based on your Pydantic models.
100
- # These are manually transcribed from your model descriptions for this example.
101
- # For maximum robustness, consider a helper function to generate these
102
- # strings directly by introspecting your Pydantic models if they change frequently.
103
- task_schema_details = f"""
104
- 'Task' Model Schema: Represents a single, actionable item.
105
- (Refer to your Pydantic 'Task' model for exact field definitions and descriptions)
106
- - task_category (string, required): The broader strategic category or theme (e.g., Content Strategy, Audience Engagement, Reputation Management, Performance Monitoring). Helps in organizing and reporting tasks.
107
- - task_description (string, required): A concise, clear, and actionable description of what needs to be done for this specific task.
108
- - objective_deliverable (string, required): The specific, measurable outcome or output expected from completing this task. Clearly defines what 'done' looks like.
109
- - effort (string, required): An estimation of the resources (time, complexity) required. Allowed values: {effort_levels_str}.
110
- - timeline (string, required): The projected timeframe for completing this task. Allowed values: {timeline_categories_str}.
111
- - responsible_party (string, required): The designated team, role, or individual accountable for execution.
112
- - success_criteria_metrics (string, required): Specific, measurable criteria and KPIs to determine task success.
113
- - dependencies_prerequisites (string, optional): Any tasks, resources, or conditions that must be met before this task can begin or be completed. If not applicable, omit or use null.
114
- - priority (string, required): The assigned priority level. Allowed values: {priority_levels_str}.
115
- - priority_justification (string, required): A brief rationale for the assigned priority level, linking to impact or urgency.
116
- - why_proposed (string, required): The core reason for proposing this task, clearly linking back to specific findings or insights from the data analysis.
117
- - task_type (string, required): Classifies the task. Allowed values: {task_types_str}.
118
- - data_subject (string, conditional): For 'tracking' tasks, specifies the primary data domain (e.g., follower_stats, posts). Allowed values: {data_subjects_str}. This field MUST be specified if 'task_type' is '{TaskType.TRACKING.value}'. For '{TaskType.INITIATIVE.value}' tasks, it can be '{DataSubject.GENERAL.value}', null, or omitted if not specific.
119
- """
120
-
121
- key_result_schema_details = f"""
122
- 'KeyResult' Model Schema: A specific, measurable outcome contributing to an Objective.
123
- (Refer to your Pydantic 'KeyResult' model for exact field definitions and descriptions)
124
- - key_result_description (string, required): A clear, specific, measurable, achievable, relevant, and time-bound (SMART) description of the desired outcome.
125
- - tasks (array of 'Task' objects, required, can be empty if no tasks defined yet): A list of specific, actionable tasks to achieve this key result.
126
- - target_metric (string, optional): The primary metric to quantify achievement (e.g., 'Follower Growth Rate').
127
- - target_value (string, optional): The specific target for the 'target_metric' (e.g., '5%', '1000 new followers').
128
- """
129
-
130
- okr_schema_details = f"""
131
- 'OKR' Model Schema: Defines an Objective and its associated Key Results.
132
- (Refer to your Pydantic 'OKR' model for exact field definitions and descriptions)
133
- - objective_description (string, required): A high-level, qualitative, and aspirational goal.
134
- - key_results (array of 'KeyResult' objects, required): A list of 2-5 specific and measurable Key Results that define success for the objective.
135
- - objective_timeline (string, required): The overall projected timeline category for achieving this objective. Allowed values: {timeline_categories_str}.
136
- - objective_owner (string, optional): The team or individual primarily responsible for this objective.
137
- """
138
-
139
- task_extraction_output_schema_details = f"""
140
- 'TaskExtractionOutput' Model Schema: This is the root JSON object you MUST return.
141
- (Refer to your Pydantic 'TaskExtractionOutput' model for exact field definitions and descriptions)
142
- - current_quarter_info (string, required): Information about the current quarter. YOU MUST USE THIS EXACT VALUE: "Q{quarter}, {days_remaining} days remaining".
143
- - okrs (array of 'OKR' objects, required): A list of Objectives and Key Results derived from the analysis.
144
- - overall_strategic_focus (string, optional): A brief summary of the main strategic focus areas identified.
145
- - generation_timestamp (string, required, ISO 8601 format e.g., "YYYY-MM-DDTHH:MM:SS.ffffffZ"): Timestamp of when this output was generated. You should generate this.
146
- """
147
 
148
  return f"""
149
  You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and KeyResults (OKRs) framework.
@@ -153,21 +93,9 @@ class TaskExtractionAgent:
153
  - Days remaining in current quarter: {days_remaining}
154
  - Today's Date (for your context only, not for direct output unless specified by a schema field): {self.current_date.isoformat()}
155
 
156
- YOUR MANDATE:
157
- 1. Thoroughly analyze the provided 'comprehensive_analysis' text.
158
- 2. Define clear, aspirational Objectives. These become the 'objective_description' in 'OKR' objects.
159
- 3. For each Objective, formulate 2-3 specific, measurable Key Results. These populate the 'key_results' list within each 'OKR' object.
160
- 4. Under each KeyResult, detail the actionable Tasks required to achieve it. These populate the 'tasks' list within each 'KeyResult' object.
161
- 5. Strict Schema Adherence: Your entire output MUST be a single, valid JSON object that strictly conforms to the 'TaskExtractionOutput' Pydantic schema. All nested objects ('OKR', 'KeyResult', 'Task') MUST also strictly conform to their respective schemas. Pay extremely close attention to required fields, data types (string, array, etc.), and valid enum values.
162
-
163
- DETAILED SCHEMA DEFINITIONS (Your output MUST precisely follow these structures):
164
- {task_extraction_output_schema_details}
165
-
166
- {okr_schema_details}
167
-
168
- {key_result_schema_details}
169
-
170
- {task_schema_details}
171
 
172
  KEY GUIDELINES FOR QUALITY AND ACCURACY:
173
  - Task Timelines: Must be realistic considering the {days_remaining} days left in Q{quarter}. Prioritize actions that can achieve significant progress or be completed within this timeframe. Use 'timeline' values exclusively from: {timeline_categories_str}.
@@ -183,8 +111,7 @@ class TaskExtractionAgent:
183
  You will receive a 'comprehensive_analysis' text.
184
 
185
  OUTPUT FORMAT:
186
- You MUST return a SINGLE JSON object. This object must be a valid instance of the 'TaskExtractionOutput' Pydantic schema.
187
- Example of the overall JSON structure (content is illustrative; refer to schemas for full details):
188
  {{
189
  "current_quarter_info": "Q{quarter}, {days_remaining} days remaining",
190
  "okrs": [
@@ -198,32 +125,28 @@ class TaskExtractionAgent:
198
  "target_metric": "LinkedIn Organic Reach Percentage Increase",
199
  "target_value": "15%",
200
  "tasks": [
201
- // ... Array of Task objects, each following the 'Task' schema ...
202
- // See detailed Task example below.
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  ]
204
  }}
205
  ]
206
  }}
207
  ],
208
- "overall_strategic_focus": "Example: Focus on data-driven content strategy and proactive community engagement to boost Q{quarter} performance.", // Optional
209
- "generation_timestamp": "{datetime.utcnow().isoformat()}Z" // Generate an ISO 8601 UTC timestamp
210
- }}
211
-
212
- Detailed Example of a single 'Task' object (ensure all fields are covered as per schema):
213
- {{
214
- "task_category": "Content Strategy",
215
- "task_description": "Develop and schedule a 4-week content calendar for LinkedIn focusing on industry insights.",
216
- "objective_deliverable": "Deliverable: A finalized 4-week content calendar with 3 posts per week, approved and scheduled.",
217
- "effort": "{EffortLevel.MEDIUM.value}",
218
- "timeline": "{TimelineCategory.IMMEDIATE.value}",
219
- "responsible_party": "Content Marketing Manager",
220
- "success_criteria_metrics": "Content calendar completed and approved by [Date]. All posts scheduled by [Date].",
221
- "dependencies_prerequisites": "Completion of Q{quarter} keyword research and audience persona refinement.",
222
- "priority": "{PriorityLevel.HIGH.value}",
223
- "priority_justification": "Critical for maintaining consistent brand voice and achieving engagement targets for the quarter.",
224
- "why_proposed": "Analysis of LinkedIn insights report (Page 3) showed a 20% drop in engagement last month, attributed to inconsistent posting schedule and lack of targeted content themes.",
225
- "task_type": "{TaskType.INITIATIVE.value}",
226
- "data_subject": "{DataSubject.POSTS.value}" // Could be 'general' for broader initiatives too
227
  }}
228
 
229
  Focus on precision, quality, actionability, and strict adherence to the specified JSON output schema and all constraints.
 
1
  # agents/task_extraction_agent.py
 
2
  import logging
3
  from typing import Optional
4
  from datetime import datetime, date # Ensure date is imported if used for type hints
 
26
 
27
  DEFAULT_AGENT_MODEL = "gemini-2.5-flash-preview-05-20" # Or your specific model
28
 
 
 
 
 
29
  class TaskExtractionAgent:
30
  """
31
  Agent specialized in extracting actionable tasks and OKRs from analysis insights,
 
43
  current_date: The current date to use for quarter calculations. Defaults to today.
44
  """
45
  self.api_key = api_key # Store if needed by LlmAgent or other components
46
+ self.model_name = model_name or DEFAULT_AGENT_MODEL
47
  self.current_date = current_date or datetime.utcnow().date() # Use date object for consistency
48
 
49
  # LlmAgent is initialized with dynamic instruction and output schema
 
84
  quarter = self._get_quarter(self.current_date)
85
  days_remaining = self._days_until_quarter_end(self.current_date)
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  return f"""
89
  You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and KeyResults (OKRs) framework.
 
93
  - Days remaining in current quarter: {days_remaining}
94
  - Today's Date (for your context only, not for direct output unless specified by a schema field): {self.current_date.isoformat()}
95
 
96
+ For EACH 'OKR' object, you MUST generate a 'key_results' array containing 1 to 3 'KeyResult' objects.
97
+ For EACH 'KeyResult' object, you MUST generate a 'tasks' array containing 1 to 3 'Task' objects.
98
+ It is CRITICAL that you populate the 'key_results' list for every OKR, and the 'tasks' list for every KeyResult.
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  KEY GUIDELINES FOR QUALITY AND ACCURACY:
101
  - Task Timelines: Must be realistic considering the {days_remaining} days left in Q{quarter}. Prioritize actions that can achieve significant progress or be completed within this timeframe. Use 'timeline' values exclusively from: {timeline_categories_str}.
 
111
  You will receive a 'comprehensive_analysis' text.
112
 
113
  OUTPUT FORMAT:
114
+ # Example of the overall JSON structure (content is illustrative; refer to schemas for full details):
 
115
  {{
116
  "current_quarter_info": "Q{quarter}, {days_remaining} days remaining",
117
  "okrs": [
 
125
  "target_metric": "LinkedIn Organic Reach Percentage Increase",
126
  "target_value": "15%",
127
  "tasks": [
128
+ {{
129
+ "task_category": "Content Strategy",
130
+ "task_description": "Develop and schedule a 4-week content calendar for LinkedIn focusing on industry insights.",
131
+ "objective_deliverable": "Deliverable: A finalized 4-week content calendar with 3 posts per week, approved and scheduled.",
132
+ "effort": "{EffortLevel.MEDIUM.value}",
133
+ "timeline": "{TimelineCategory.IMMEDIATE.value}",
134
+ "responsible_party": "Content Marketing Manager",
135
+ "success_criteria_metrics": "Content calendar completed and approved by [Date]. All posts scheduled by [Date].",
136
+ "dependencies_prerequisites": "Completion of Q{quarter} keyword research and audience persona refinement.",
137
+ "priority": "{PriorityLevel.HIGH.value}",
138
+ "priority_justification": "Critical for maintaining consistent brand voice and achieving engagement targets for the quarter.",
139
+ "why_proposed": "Analysis of LinkedIn insights report (Page 3) showed a 20% drop in engagement last month, attributed to inconsistent posting schedule and lack of targeted content themes.",
140
+ "task_type": "{TaskType.INITIATIVE.value}",
141
+ "data_subject": "{DataSubject.POSTS.value}"
142
+ }}
143
  ]
144
  }}
145
  ]
146
  }}
147
  ],
148
+ "overall_strategic_focus": "Example: Focus on data-driven content strategy and proactive community engagement to boost Q{quarter} performance.",
149
+ "generation_timestamp": "{datetime.utcnow().isoformat()}Z"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  }}
151
 
152
  Focus on precision, quality, actionability, and strict adherence to the specified JSON output schema and all constraints.