Spaces:
Running
Running
Create Linkedin_Data_API_Calls.py
Browse files- Linkedin_Data_API_Calls.py +146 -0
Linkedin_Data_API_Calls.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import requests
|
3 |
+
import html
|
4 |
+
from datetime import datetime
|
5 |
+
from collections import defaultdict
|
6 |
+
from transformers import pipeline
|
7 |
+
from sessions import create_session
|
8 |
+
from error_handling import display_error
|
9 |
+
from posts_categorization import batch_summarize_and_classify
|
10 |
+
import logging
|
11 |
+
|
12 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
13 |
+
|
14 |
+
API_V2_BASE = 'https://api.linkedin.com/v2'
|
15 |
+
API_REST_BASE = "https://api.linkedin.com/rest"
|
16 |
+
|
17 |
+
sentiment_pipeline = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
|
18 |
+
|
19 |
+
|
20 |
+
def fetch_comments(comm_client_id, token_dict, post_urns, stats_map):
|
21 |
+
from requests_oauthlib import OAuth2Session
|
22 |
+
linkedin = OAuth2Session(comm_client_id, token=token_dict)
|
23 |
+
linkedin.headers.update({'LinkedIn-Version': "202502"})
|
24 |
+
all_comments = {}
|
25 |
+
for post_urn in post_urns:
|
26 |
+
if stats_map.get(post_urn, {}).get('commentCount', 0) == 0:
|
27 |
+
continue
|
28 |
+
try:
|
29 |
+
url = f"{API_REST_BASE}/socialActions/{post_urn}/comments"
|
30 |
+
response = linkedin.get(url)
|
31 |
+
if response.status_code == 200:
|
32 |
+
elements = response.json().get('elements', [])
|
33 |
+
all_comments[post_urn] = [c.get('message', {}).get('text') for c in elements if c.get('message')]
|
34 |
+
else:
|
35 |
+
all_comments[post_urn] = []
|
36 |
+
except Exception:
|
37 |
+
all_comments[post_urn] = []
|
38 |
+
return all_comments
|
39 |
+
|
40 |
+
def analyze_sentiment(comments_data):
|
41 |
+
results = {}
|
42 |
+
for post_urn, comments in comments_data.items():
|
43 |
+
sentiment_counts = defaultdict(int)
|
44 |
+
total = 0
|
45 |
+
for comment in comments:
|
46 |
+
if not comment:
|
47 |
+
continue
|
48 |
+
try:
|
49 |
+
result = sentiment_pipeline(comment)
|
50 |
+
label = result[0]['label'].upper()
|
51 |
+
if label in ['POSITIVE', 'VERY POSITIVE']:
|
52 |
+
sentiment_counts['Positive π'] += 1
|
53 |
+
elif label in ['NEGATIVE', 'VERY NEGATIVE']:
|
54 |
+
sentiment_counts['Negative π'] += 1
|
55 |
+
elif label == 'NEUTRAL':
|
56 |
+
sentiment_counts['Neutral π'] += 1
|
57 |
+
else:
|
58 |
+
sentiment_counts['Unknown'] += 1
|
59 |
+
total += 1
|
60 |
+
except:
|
61 |
+
sentiment_counts['Error'] += 1
|
62 |
+
dominant = max(sentiment_counts, key=sentiment_counts.get, default='Neutral π')
|
63 |
+
percentage = round((sentiment_counts[dominant] / total) * 100, 1) if total else 0.0
|
64 |
+
results[post_urn] = {"sentiment": dominant, "percentage": percentage}
|
65 |
+
return results
|
66 |
+
|
67 |
+
def fetch_posts_and_stats(comm_client_id, community_token, org_urn, count=10):
|
68 |
+
token_dict = community_token if isinstance(community_token, dict) else {'access_token': community_token, 'token_type': 'Bearer'}
|
69 |
+
session = create_session(comm_client_id, token=token_dict)
|
70 |
+
#org_urn, org_name = fetch_org_urn(comm_client_id, token_dict)
|
71 |
+
org_name = "GRLS"
|
72 |
+
|
73 |
+
posts_url = f"{API_REST_BASE}/posts?author={org_urn}&q=author&count={count}&sortBy=LAST_MODIFIED"
|
74 |
+
try:
|
75 |
+
resp = session.get(posts_url)
|
76 |
+
resp.raise_for_status()
|
77 |
+
raw_posts = resp.json().get("elements", [])
|
78 |
+
except requests.exceptions.RequestException as e:
|
79 |
+
status = getattr(e.response, 'status_code', 'N/A')
|
80 |
+
raise ValueError(f"Failed to fetch posts (Status: {status})") from e
|
81 |
+
|
82 |
+
if not raw_posts:
|
83 |
+
return [], org_name, {}
|
84 |
+
|
85 |
+
post_urns = [p["id"] for p in raw_posts if ":share:" in p["id"] or ":ugcPost:" in p["id"]]
|
86 |
+
stats_map = {}
|
87 |
+
post_texts = [{"text": p.get("commentary") or p.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {}).get("shareCommentaryV2", {}).get("text", "")} for p in raw_posts]
|
88 |
+
structured_results = batch_summarize_and_classify(post_texts)
|
89 |
+
|
90 |
+
for i in range(0, len(post_urns), 20):
|
91 |
+
batch = post_urns[i:i+20]
|
92 |
+
params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
|
93 |
+
for idx, urn in enumerate(batch):
|
94 |
+
key = f"shares[{idx}]" if ":share:" in urn else f"ugcPosts[{idx}]"
|
95 |
+
params[key] = urn
|
96 |
+
try:
|
97 |
+
stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
|
98 |
+
stat_resp.raise_for_status()
|
99 |
+
for stat in stat_resp.json().get("elements", []):
|
100 |
+
urn = stat.get("share") or stat.get("ugcPost")
|
101 |
+
if urn:
|
102 |
+
stats_map[urn] = stat.get("totalShareStatistics", {})
|
103 |
+
except:
|
104 |
+
continue
|
105 |
+
|
106 |
+
comments = fetch_comments(comm_client_id, token_dict, post_urns, stats_map)
|
107 |
+
sentiments = analyze_sentiment(comments)
|
108 |
+
posts = []
|
109 |
+
|
110 |
+
for post in raw_posts:
|
111 |
+
post_id = post.get("id")
|
112 |
+
stats = stats_map.get(post_id, {})
|
113 |
+
timestamp = post.get("publishedAt") or post.get("createdAt")
|
114 |
+
when = datetime.fromtimestamp(timestamp / 1000).strftime("%Y-%m-%d %H:%M") if timestamp else "Unknown"
|
115 |
+
text = post.get("commentary") or post.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {}).get("shareCommentaryV2", {}).get("text") or "[No text]"
|
116 |
+
text = html.escape(text[:250]).replace("\n", "<br>") + ("..." if len(text) > 250 else "")
|
117 |
+
|
118 |
+
likes = stats.get("likeCount", 0)
|
119 |
+
comments_count = stats.get("commentCount", 0)
|
120 |
+
clicks = stats.get("clickCount", 0)
|
121 |
+
shares = stats.get("shareCount", 0)
|
122 |
+
impressions = stats.get("impressionCount", 0)
|
123 |
+
engagement = stats.get("engagement", likes + comments_count + clicks + shares) / impressions * 100 if impressions else 0.0
|
124 |
+
|
125 |
+
sentiment_info = sentiments.get(post_id, {"sentiment": "Neutral π", "percentage": 0.0})
|
126 |
+
|
127 |
+
posts.append({
|
128 |
+
"id": post_id,
|
129 |
+
"when": when,
|
130 |
+
"text": text,
|
131 |
+
"likes": likes,
|
132 |
+
"comments": comments_count,
|
133 |
+
"clicks": clicks,
|
134 |
+
"shares": shares,
|
135 |
+
"impressions": impressions,
|
136 |
+
"engagement": f"{engagement:.2f}%",
|
137 |
+
"sentiment": sentiment_info["sentiment"],
|
138 |
+
"sentiment_percent": sentiment_info["percentage"]
|
139 |
+
})
|
140 |
+
logging.info(f"Appended post data for {post_id}: Likes={likes}, Comments={comments_count}, Shares={shares}, Clicks={clicks}")
|
141 |
+
|
142 |
+
for post, structured in zip(posts, structured_results):
|
143 |
+
post["summary"] = structured["summary"]
|
144 |
+
post["category"] = structured["category"]
|
145 |
+
|
146 |
+
return posts, org_name, sentiments
|