Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on 25 days ago

Commit

e24f98c

verified ·

1 Parent(s): cc22c24

Create task_extraction_model.py

Browse files

Files changed (1) hide show

features/insight_and_tasks/agents/task_extraction_model.py +155 -0

features/insight_and_tasks/agents/task_extraction_model.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import enum
+import json
+from typing import List, Optional, Literal
+from pydantic import BaseModel, Field, field_validator, ValidationInfo
+from datetime import datetime, date
+try:
+    from google import genai
+except ImportError:
+    print("Warning: 'google.generai' library not found. Please install it.")
+    print("If you are using the standard Gemini API, try: pip install google-generativeai")
+    print("If using Vertex AI, ensure the Google Cloud SDK is configured.")
+    genai = None # Placeholder to allow script to be parsed
+from features.insight_and_tasks.data_models.tasks import (
+    TaskExtractionOutput,
+    OKR,
+    KeyResult,
+    Task,
+    EffortLevel,
+    TimelineCategory,
+    PriorityLevel,
+    TaskType,
+    DataSubject # Ensure all are imported
+)
+# --- Helper Function for Date Calculations ---
+def get_quarter_info():
+    """Calculates current quarter, year, and days remaining in the quarter."""
+    today = date.today()
+    current_year = today.year
+    current_quarter = (today.month - 1) // 3 + 1
+    # Determine the end date of the current quarter
+    if current_quarter == 1:
+        end_of_quarter_date = date(current_year, 3, 31)
+    elif current_quarter == 2:
+        end_of_quarter_date = date(current_year, 6, 30)
+    elif current_quarter == 3:
+        end_of_quarter_date = date(current_year, 9, 30)
+    else:  # current_quarter == 4
+        end_of_quarter_date = date(current_year, 12, 31)
+    days_remaining = (end_of_quarter_date - today).days
+    days_remaining = max(0, days_remaining) # Ensure it's not negative
+    return current_quarter, current_year, days_remaining, today
+# --- Main Task Extraction Function ---
+def extract_tasks_from_text(user_text_input: str, api_key: str) -> TaskExtractionOutput:
+    """
+    Extracts tasks from input text using Gemini API and structures them as TaskExtractionOutput.
+    Args:
+        user_text_input: The text to analyze.
+        api_key: The Gemini API key.
+    Returns:
+        A TaskExtractionOutput Pydantic model instance.
+    Raises:
+        ValueError: If API call fails or response parsing is unsuccessful.
+        ImportError: If 'google.generai' is not available.
+    """
+    if not genai:
+        raise ImportError("The 'google.generai' library is not available. Please install and configure it.")
+    # Initialize the Gemini client (as per user's example structure)
+    # This specific client initialization might vary based on the exact 'google.generai' library version/origin.
+    try:
+        client = genai.Client(api_key=api_key)
+    except AttributeError:
+         # Fallback for standard google-generativeai SDK if genai.Client is not found
+        try:
+            genai.configure(api_key=api_key)
+            # This function will then need to use genai.GenerativeModel('gemini-2.0-flash')
+            # For simplicity, sticking to user's client.models.generate_content structure.
+            # This part would need significant rework if genai.Client is not the correct interface.
+            print("Warning: genai.Client not found. The API call structure might be incorrect for your 'google.generai' version.")
+            print("Assuming a client object with 'models.generate_content' method is expected.")
+            # This is a placeholder; actual client setup depends on the specific library.
+            # If this is google-generativeai, the user should adapt to use genai.GenerativeModel.
+            raise NotImplementedError("genai.Client not found. Please adapt API call to your SDK version.")
+        except Exception as e:
+            raise ImportError(f"Failed to initialize Gemini client or configure API key: {e}")
+    quarter, year, days_remaining, current_date_obj = get_quarter_info()
+    current_date_iso = current_date_obj.isoformat()
+    # Construct the detailed prompt for the LLM
+    prompt = f"""You are a Time-Aware Task Extraction Specialist, an AI expert in meticulously analyzing strategic insights (e.g., from LinkedIn analytics) and transforming them into a structured set of actionable tasks, organized within an Objectives and KeyResults (OKRs) framework.
+Your output MUST be a valid JSON object that strictly conforms to the 'TaskExtractionOutput' schema provided.
+CURRENT CONTEXTUAL INFORMATION (CRITICAL - Use these exact values in your output where specified):
+- Current Quarter: Q{quarter}
+- Current Year: {year}
+- Days remaining in current quarter: {days_remaining}
+- Today's Date (for your context only, not for direct output unless specified by a schema field): {current_date_iso}
+When populating the 'current_quarter_info' field in the TaskExtractionOutput, use the format: 'Q{quarter} {year}, {days_remaining} days remaining'.
+GUIDELINES FOR TASK GENERATION:
+1.  For EACH 'OKR' object, you MUST generate a 'key_results' array containing 1 to 3 'KeyResult' objects.
+2.  For EACH 'KeyResult' object, you MUST generate a 'tasks' array containing 1 to 3 'Task' objects.
+3.  It is CRITICAL that you populate the 'key_results' list for every OKR, and the 'tasks' list for every KeyResult.
+4.  Ensure all fields in the schema are appropriately filled based on the input text. If information for an optional field is not present, omit it or use its default.
+Now, analyze the following text and generate the structured output:
+---
+TEXT TO ANALYZE:
+{user_text_input}
+---
+"""
+    try:
+        response = models.generate_content(
+            model='gemini-2.0-flash',  # As per user's example
+            contents=prompt,
+            config={
+                'response_mime_type': 'application/json',
+                'response_schema': TaskExtractionOutput, # Pass the Pydantic model class
+            },
+        )
+    except Exception as e:
+        raise ValueError(f"Gemini API call failed: {e}")
+    # Process the response
+    # Based on user's example `print(response.text)`, we assume .text contains the JSON.
+    # However, standard Gemini API often has it in response.candidates[0].content.parts[0].text.
+    response_json_text = None
+    if hasattr(response, 'text') and response.text:
+        response_json_text = response.text
+    elif hasattr(response, 'candidates') and response.candidates:
+        try:
+            part = response.candidates[0].content.parts[0]
+            if hasattr(part, 'text') and part.text:
+                 response_json_text = part.text
+        except (IndexError, AttributeError):
+            pass # Could not find text in candidates
+    if response_json_text:
+        try:
+            # Validate and parse the JSON response using the Pydantic model
+            return TaskExtractionOutput.model_validate_json(response_json_text)
+        except Exception as e: # Catch Pydantic validation errors or JSON parsing errors
+            raise ValueError(f"Failed to parse or validate API response: {e}\nRaw response text: {response_json_text}")
+    else:
+        # Handle cases where the response is empty or indicates an error
+        feedback_message = ""
+        if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
+            feedback_message = f"Prompt feedback: {response.prompt_feedback}. "
+        raise ValueError(f"Failed to generate content or response text is empty. {feedback_message}Full response: {response}")