GuglielmoTor commited on
Commit
e24f98c
·
verified ·
1 Parent(s): cc22c24

Create task_extraction_model.py

Browse files
features/insight_and_tasks/agents/task_extraction_model.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import enum
2
+ import json
3
+ from typing import List, Optional, Literal
4
+ from pydantic import BaseModel, Field, field_validator, ValidationInfo
5
+ from datetime import datetime, date
6
+
7
+ try:
8
+ from google import genai
9
+ except ImportError:
10
+ print("Warning: 'google.generai' library not found. Please install it.")
11
+ print("If you are using the standard Gemini API, try: pip install google-generativeai")
12
+ print("If using Vertex AI, ensure the Google Cloud SDK is configured.")
13
+ genai = None # Placeholder to allow script to be parsed
14
+
15
+ from features.insight_and_tasks.data_models.tasks import (
16
+ TaskExtractionOutput,
17
+ OKR,
18
+ KeyResult,
19
+ Task,
20
+ EffortLevel,
21
+ TimelineCategory,
22
+ PriorityLevel,
23
+ TaskType,
24
+ DataSubject # Ensure all are imported
25
+ )
26
+
27
+ # --- Helper Function for Date Calculations ---
28
+ def get_quarter_info():
29
+ """Calculates current quarter, year, and days remaining in the quarter."""
30
+ today = date.today()
31
+ current_year = today.year
32
+ current_quarter = (today.month - 1) // 3 + 1
33
+
34
+ # Determine the end date of the current quarter
35
+ if current_quarter == 1:
36
+ end_of_quarter_date = date(current_year, 3, 31)
37
+ elif current_quarter == 2:
38
+ end_of_quarter_date = date(current_year, 6, 30)
39
+ elif current_quarter == 3:
40
+ end_of_quarter_date = date(current_year, 9, 30)
41
+ else: # current_quarter == 4
42
+ end_of_quarter_date = date(current_year, 12, 31)
43
+
44
+ days_remaining = (end_of_quarter_date - today).days
45
+ days_remaining = max(0, days_remaining) # Ensure it's not negative
46
+
47
+ return current_quarter, current_year, days_remaining, today
48
+
49
+ # --- Main Task Extraction Function ---
50
+ def extract_tasks_from_text(user_text_input: str, api_key: str) -> TaskExtractionOutput:
51
+ """
52
+ Extracts tasks from input text using Gemini API and structures them as TaskExtractionOutput.
53
+
54
+ Args:
55
+ user_text_input: The text to analyze.
56
+ api_key: The Gemini API key.
57
+
58
+ Returns:
59
+ A TaskExtractionOutput Pydantic model instance.
60
+
61
+ Raises:
62
+ ValueError: If API call fails or response parsing is unsuccessful.
63
+ ImportError: If 'google.generai' is not available.
64
+ """
65
+ if not genai:
66
+ raise ImportError("The 'google.generai' library is not available. Please install and configure it.")
67
+
68
+ # Initialize the Gemini client (as per user's example structure)
69
+ # This specific client initialization might vary based on the exact 'google.generai' library version/origin.
70
+ try:
71
+ client = genai.Client(api_key=api_key)
72
+ except AttributeError:
73
+ # Fallback for standard google-generativeai SDK if genai.Client is not found
74
+ try:
75
+ genai.configure(api_key=api_key)
76
+ # This function will then need to use genai.GenerativeModel('gemini-2.0-flash')
77
+ # For simplicity, sticking to user's client.models.generate_content structure.
78
+ # This part would need significant rework if genai.Client is not the correct interface.
79
+ print("Warning: genai.Client not found. The API call structure might be incorrect for your 'google.generai' version.")
80
+ print("Assuming a client object with 'models.generate_content' method is expected.")
81
+ # This is a placeholder; actual client setup depends on the specific library.
82
+ # If this is google-generativeai, the user should adapt to use genai.GenerativeModel.
83
+ raise NotImplementedError("genai.Client not found. Please adapt API call to your SDK version.")
84
+
85
+ except Exception as e:
86
+ raise ImportError(f"Failed to initialize Gemini client or configure API key: {e}")
87
+
88
+
89
+ quarter, year, days_remaining, current_date_obj = get_quarter_info()
90
+ current_date_iso = current_date_obj.isoformat()
91
+
92
+ # Construct the detailed prompt for the LLM
93
+ prompt = f"""You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and KeyResults (OKRs) framework.
94
+
95
+ Your output MUST be a valid JSON object that strictly conforms to the 'TaskExtractionOutput' schema provided.
96
+
97
+ CURRENT CONTEXTUAL INFORMATION (CRITICAL - Use these exact values in your output where specified):
98
+ - Current Quarter: Q{quarter}
99
+ - Current Year: {year}
100
+ - Days remaining in current quarter: {days_remaining}
101
+ - Today's Date (for your context only, not for direct output unless specified by a schema field): {current_date_iso}
102
+
103
+ When populating the 'current_quarter_info' field in the TaskExtractionOutput, use the format: 'Q{quarter} {year}, {days_remaining} days remaining'.
104
+
105
+ GUIDELINES FOR TASK GENERATION:
106
+ 1. For EACH 'OKR' object, you MUST generate a 'key_results' array containing 1 to 3 'KeyResult' objects.
107
+ 2. For EACH 'KeyResult' object, you MUST generate a 'tasks' array containing 1 to 3 'Task' objects.
108
+ 3. It is CRITICAL that you populate the 'key_results' list for every OKR, and the 'tasks' list for every KeyResult.
109
+ 4. Ensure all fields in the schema are appropriately filled based on the input text. If information for an optional field is not present, omit it or use its default.
110
+
111
+ Now, analyze the following text and generate the structured output:
112
+ ---
113
+ TEXT TO ANALYZE:
114
+ {user_text_input}
115
+ ---
116
+ """
117
+
118
+ try:
119
+ response = models.generate_content(
120
+ model='gemini-2.0-flash', # As per user's example
121
+ contents=prompt,
122
+ config={
123
+ 'response_mime_type': 'application/json',
124
+ 'response_schema': TaskExtractionOutput, # Pass the Pydantic model class
125
+ },
126
+ )
127
+ except Exception as e:
128
+ raise ValueError(f"Gemini API call failed: {e}")
129
+
130
+ # Process the response
131
+ # Based on user's example `print(response.text)`, we assume .text contains the JSON.
132
+ # However, standard Gemini API often has it in response.candidates[0].content.parts[0].text.
133
+ response_json_text = None
134
+ if hasattr(response, 'text') and response.text:
135
+ response_json_text = response.text
136
+ elif hasattr(response, 'candidates') and response.candidates:
137
+ try:
138
+ part = response.candidates[0].content.parts[0]
139
+ if hasattr(part, 'text') and part.text:
140
+ response_json_text = part.text
141
+ except (IndexError, AttributeError):
142
+ pass # Could not find text in candidates
143
+
144
+ if response_json_text:
145
+ try:
146
+ # Validate and parse the JSON response using the Pydantic model
147
+ return TaskExtractionOutput.model_validate_json(response_json_text)
148
+ except Exception as e: # Catch Pydantic validation errors or JSON parsing errors
149
+ raise ValueError(f"Failed to parse or validate API response: {e}\nRaw response text: {response_json_text}")
150
+ else:
151
+ # Handle cases where the response is empty or indicates an error
152
+ feedback_message = ""
153
+ if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
154
+ feedback_message = f"Prompt feedback: {response.prompt_feedback}. "
155
+ raise ValueError(f"Failed to generate content or response text is empty. {feedback_message}Full response: {response}")