Spaces:
Running
Running
Create task_extraction_model.py
Browse files
features/insight_and_tasks/agents/task_extraction_model.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import enum
|
2 |
+
import json
|
3 |
+
from typing import List, Optional, Literal
|
4 |
+
from pydantic import BaseModel, Field, field_validator, ValidationInfo
|
5 |
+
from datetime import datetime, date
|
6 |
+
|
7 |
+
try:
|
8 |
+
from google import genai
|
9 |
+
except ImportError:
|
10 |
+
print("Warning: 'google.generai' library not found. Please install it.")
|
11 |
+
print("If you are using the standard Gemini API, try: pip install google-generativeai")
|
12 |
+
print("If using Vertex AI, ensure the Google Cloud SDK is configured.")
|
13 |
+
genai = None # Placeholder to allow script to be parsed
|
14 |
+
|
15 |
+
from features.insight_and_tasks.data_models.tasks import (
|
16 |
+
TaskExtractionOutput,
|
17 |
+
OKR,
|
18 |
+
KeyResult,
|
19 |
+
Task,
|
20 |
+
EffortLevel,
|
21 |
+
TimelineCategory,
|
22 |
+
PriorityLevel,
|
23 |
+
TaskType,
|
24 |
+
DataSubject # Ensure all are imported
|
25 |
+
)
|
26 |
+
|
27 |
+
# --- Helper Function for Date Calculations ---
|
28 |
+
def get_quarter_info():
|
29 |
+
"""Calculates current quarter, year, and days remaining in the quarter."""
|
30 |
+
today = date.today()
|
31 |
+
current_year = today.year
|
32 |
+
current_quarter = (today.month - 1) // 3 + 1
|
33 |
+
|
34 |
+
# Determine the end date of the current quarter
|
35 |
+
if current_quarter == 1:
|
36 |
+
end_of_quarter_date = date(current_year, 3, 31)
|
37 |
+
elif current_quarter == 2:
|
38 |
+
end_of_quarter_date = date(current_year, 6, 30)
|
39 |
+
elif current_quarter == 3:
|
40 |
+
end_of_quarter_date = date(current_year, 9, 30)
|
41 |
+
else: # current_quarter == 4
|
42 |
+
end_of_quarter_date = date(current_year, 12, 31)
|
43 |
+
|
44 |
+
days_remaining = (end_of_quarter_date - today).days
|
45 |
+
days_remaining = max(0, days_remaining) # Ensure it's not negative
|
46 |
+
|
47 |
+
return current_quarter, current_year, days_remaining, today
|
48 |
+
|
49 |
+
# --- Main Task Extraction Function ---
|
50 |
+
def extract_tasks_from_text(user_text_input: str, api_key: str) -> TaskExtractionOutput:
|
51 |
+
"""
|
52 |
+
Extracts tasks from input text using Gemini API and structures them as TaskExtractionOutput.
|
53 |
+
|
54 |
+
Args:
|
55 |
+
user_text_input: The text to analyze.
|
56 |
+
api_key: The Gemini API key.
|
57 |
+
|
58 |
+
Returns:
|
59 |
+
A TaskExtractionOutput Pydantic model instance.
|
60 |
+
|
61 |
+
Raises:
|
62 |
+
ValueError: If API call fails or response parsing is unsuccessful.
|
63 |
+
ImportError: If 'google.generai' is not available.
|
64 |
+
"""
|
65 |
+
if not genai:
|
66 |
+
raise ImportError("The 'google.generai' library is not available. Please install and configure it.")
|
67 |
+
|
68 |
+
# Initialize the Gemini client (as per user's example structure)
|
69 |
+
# This specific client initialization might vary based on the exact 'google.generai' library version/origin.
|
70 |
+
try:
|
71 |
+
client = genai.Client(api_key=api_key)
|
72 |
+
except AttributeError:
|
73 |
+
# Fallback for standard google-generativeai SDK if genai.Client is not found
|
74 |
+
try:
|
75 |
+
genai.configure(api_key=api_key)
|
76 |
+
# This function will then need to use genai.GenerativeModel('gemini-2.0-flash')
|
77 |
+
# For simplicity, sticking to user's client.models.generate_content structure.
|
78 |
+
# This part would need significant rework if genai.Client is not the correct interface.
|
79 |
+
print("Warning: genai.Client not found. The API call structure might be incorrect for your 'google.generai' version.")
|
80 |
+
print("Assuming a client object with 'models.generate_content' method is expected.")
|
81 |
+
# This is a placeholder; actual client setup depends on the specific library.
|
82 |
+
# If this is google-generativeai, the user should adapt to use genai.GenerativeModel.
|
83 |
+
raise NotImplementedError("genai.Client not found. Please adapt API call to your SDK version.")
|
84 |
+
|
85 |
+
except Exception as e:
|
86 |
+
raise ImportError(f"Failed to initialize Gemini client or configure API key: {e}")
|
87 |
+
|
88 |
+
|
89 |
+
quarter, year, days_remaining, current_date_obj = get_quarter_info()
|
90 |
+
current_date_iso = current_date_obj.isoformat()
|
91 |
+
|
92 |
+
# Construct the detailed prompt for the LLM
|
93 |
+
prompt = f"""You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and KeyResults (OKRs) framework.
|
94 |
+
|
95 |
+
Your output MUST be a valid JSON object that strictly conforms to the 'TaskExtractionOutput' schema provided.
|
96 |
+
|
97 |
+
CURRENT CONTEXTUAL INFORMATION (CRITICAL - Use these exact values in your output where specified):
|
98 |
+
- Current Quarter: Q{quarter}
|
99 |
+
- Current Year: {year}
|
100 |
+
- Days remaining in current quarter: {days_remaining}
|
101 |
+
- Today's Date (for your context only, not for direct output unless specified by a schema field): {current_date_iso}
|
102 |
+
|
103 |
+
When populating the 'current_quarter_info' field in the TaskExtractionOutput, use the format: 'Q{quarter} {year}, {days_remaining} days remaining'.
|
104 |
+
|
105 |
+
GUIDELINES FOR TASK GENERATION:
|
106 |
+
1. For EACH 'OKR' object, you MUST generate a 'key_results' array containing 1 to 3 'KeyResult' objects.
|
107 |
+
2. For EACH 'KeyResult' object, you MUST generate a 'tasks' array containing 1 to 3 'Task' objects.
|
108 |
+
3. It is CRITICAL that you populate the 'key_results' list for every OKR, and the 'tasks' list for every KeyResult.
|
109 |
+
4. Ensure all fields in the schema are appropriately filled based on the input text. If information for an optional field is not present, omit it or use its default.
|
110 |
+
|
111 |
+
Now, analyze the following text and generate the structured output:
|
112 |
+
---
|
113 |
+
TEXT TO ANALYZE:
|
114 |
+
{user_text_input}
|
115 |
+
---
|
116 |
+
"""
|
117 |
+
|
118 |
+
try:
|
119 |
+
response = models.generate_content(
|
120 |
+
model='gemini-2.0-flash', # As per user's example
|
121 |
+
contents=prompt,
|
122 |
+
config={
|
123 |
+
'response_mime_type': 'application/json',
|
124 |
+
'response_schema': TaskExtractionOutput, # Pass the Pydantic model class
|
125 |
+
},
|
126 |
+
)
|
127 |
+
except Exception as e:
|
128 |
+
raise ValueError(f"Gemini API call failed: {e}")
|
129 |
+
|
130 |
+
# Process the response
|
131 |
+
# Based on user's example `print(response.text)`, we assume .text contains the JSON.
|
132 |
+
# However, standard Gemini API often has it in response.candidates[0].content.parts[0].text.
|
133 |
+
response_json_text = None
|
134 |
+
if hasattr(response, 'text') and response.text:
|
135 |
+
response_json_text = response.text
|
136 |
+
elif hasattr(response, 'candidates') and response.candidates:
|
137 |
+
try:
|
138 |
+
part = response.candidates[0].content.parts[0]
|
139 |
+
if hasattr(part, 'text') and part.text:
|
140 |
+
response_json_text = part.text
|
141 |
+
except (IndexError, AttributeError):
|
142 |
+
pass # Could not find text in candidates
|
143 |
+
|
144 |
+
if response_json_text:
|
145 |
+
try:
|
146 |
+
# Validate and parse the JSON response using the Pydantic model
|
147 |
+
return TaskExtractionOutput.model_validate_json(response_json_text)
|
148 |
+
except Exception as e: # Catch Pydantic validation errors or JSON parsing errors
|
149 |
+
raise ValueError(f"Failed to parse or validate API response: {e}\nRaw response text: {response_json_text}")
|
150 |
+
else:
|
151 |
+
# Handle cases where the response is empty or indicates an error
|
152 |
+
feedback_message = ""
|
153 |
+
if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
|
154 |
+
feedback_message = f"Prompt feedback: {response.prompt_feedback}. "
|
155 |
+
raise ValueError(f"Failed to generate content or response text is empty. {feedback_message}Full response: {response}")
|