Spaces:

GuglielmoTor
/

LinkedinMonitor

Sleeping

App Files Files Community

GuglielmoTor commited on May 2

Commit

16353a0

verified ·

1 Parent(s): 4d49280

Create posts_categorization.py

Browse files

Files changed (1) hide show

posts_categorization.py +105 -0

posts_categorization.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import pandas as pd
+from groq import Groq
+import instructor
+from pydantic import BaseModel
+import os
+api_key = os.getenv('GROQ_API_KEY')
+# Create single patched Groq client with instructor for structured output
+client = instructor.from_groq(Groq(api_key=api_key), mode=instructor.Mode.JSON)
+class SummaryOutput(BaseModel):
+    summary: str
+# Define pydantic schema for classification output
+class ClassificationOutput(BaseModel):
+    category: str
+# Summarize post text
+def summarize_post(text):
+    if pd.isna(text) or text is None:
+        return None
+    text = str(text)[:2000]  # truncate to avoid token overflow
+    prompt = f"""
+    Summarize the following LinkedIn post in 5 to 10 words.
+    Only return the summary inside a JSON field called 'summary'.
+    Post Text:
+    \"\"\"{text}\"\"\"
+    """
+    try:
+        response = client.chat.completions.create(
+            model="deepseek-r1-distill-llama-70b",
+            response_model=SummaryOutput,
+            messages=[
+                {"role": "system", "content": "You are a precise summarizer. Only return a JSON object with a 'summary' string."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.3
+        )
+        return response.summary
+    except Exception as e:
+        print(f"Summarization error: {e}")
+        return None
+# Classify post summary into structured categories
+def classify_post(summary, labels):
+    if pd.isna(summary) or summary is None:
+        return None
+    prompt = f"""
+    Post Summary: "{summary}"
+    Available Categories:
+    {', '.join(labels)}
+    Task: Choose the single most relevant category from the list above that applies to this summary. Return only one category in a structured JSON format under the field 'category'.
+    If no category applies, return 'None'.
+    """
+    try:
+        result = client.chat.completions.create(
+            model="meta-llama/llama-4-maverick-17b-128e-instruct",
+            response_model=ClassificationOutput,
+            messages=[
+                {"role": "system", "content": "You are a strict classifier. Return only one matching category name under the field 'category'."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.3,
+            max_tokens=60
+        )
+        return result.category
+    except Exception as e:
+        print(f"Classification error: {e}")
+        return None
+def summarize_and_classify_post(text, labels):
+    summary = summarize_post(text)
+    category = classify_post(summary, labels) if summary else None
+    return {
+        "summary": summary,
+        "category": category
+    }
+def batch_summarize_and_classify(posts, labels):
+    labels = [
+    "Company Culture and Values",
+    "Employee Stories and Spotlights",
+    "Work-Life Balance, Flexibility, and Well-being",
+    "Diversity, Equity, and Inclusion (DEI)",
+    "Professional Development and Growth Opportunities",
+    "Mission, Vision, and Social Responsibility",
+    "None"
+    ]
+    results = []
+    for post in posts:
+        text = post.get("text")
+        result = summarize_and_classify_post(text, labels)
+        results.append(result)
+    return results