Spaces:
Running
Running
Update analytics_plot_generator.py
Browse files- analytics_plot_generator.py +211 -159
analytics_plot_generator.py
CHANGED
@@ -184,14 +184,8 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
|
184 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
185 |
|
186 |
fig, ax = plt.subplots(figsize=(8, 5))
|
187 |
-
# Define a broader range of colors or a colormap for more sentiment types
|
188 |
colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
|
189 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
190 |
-
# Or keep your specific colors if sentiment labels are fixed:
|
191 |
-
# colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'}
|
192 |
-
# pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index]
|
193 |
-
|
194 |
-
|
195 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
196 |
ax.set_title('Mention Sentiment Distribution')
|
197 |
ax.axis('equal')
|
@@ -204,73 +198,54 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
|
204 |
finally:
|
205 |
plt.close('all')
|
206 |
|
207 |
-
# ---
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
df_copy = df.copy()
|
219 |
-
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
220 |
-
df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce')
|
221 |
-
df_copy = df_copy.dropna(subset=[date_column, count_column]).sort_values(by=date_column)
|
222 |
-
if df_copy.empty:
|
223 |
-
return create_placeholder_plot(title="Total Follower Growth", message="No valid data after cleaning.")
|
224 |
-
|
225 |
-
fig, ax = plt.subplots(figsize=(10,5))
|
226 |
-
ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green')
|
227 |
-
ax.set_title('Total Follower Growth Over Time')
|
228 |
-
ax.set_xlabel('Date')
|
229 |
-
ax.set_ylabel('Total Followers')
|
230 |
-
ax.grid(True, linestyle='--', alpha=0.7)
|
231 |
-
plt.xticks(rotation=45)
|
232 |
-
plt.tight_layout()
|
233 |
-
return fig
|
234 |
-
except Exception as e:
|
235 |
-
logging.error(f"Error in generate_total_follower_growth_plot: {e}", exc_info=True)
|
236 |
-
return create_placeholder_plot(title="Total Follower Growth Error", message=str(e))
|
237 |
-
finally:
|
238 |
-
plt.close('all')
|
239 |
-
|
240 |
-
# --- New Plot Functions ---
|
241 |
-
|
242 |
-
def generate_followers_count_over_time_plot(df, date_column='date', count_column='follower_count_o', type_filter_column='follower_count_type', type_value='follower_gains_monthly'):
|
243 |
-
"""Generates a plot for specific follower counts over time (e.g., monthly gains)."""
|
244 |
title = f"Followers Count Over Time ({type_value})"
|
245 |
-
logging.info(f"Generating {title}. Date: '{
|
246 |
|
247 |
if df is None or df.empty:
|
248 |
return create_placeholder_plot(title=title, message="No follower data available.")
|
249 |
|
250 |
-
required_cols = [
|
251 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
252 |
if missing_cols:
|
253 |
-
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.")
|
254 |
|
255 |
try:
|
256 |
df_copy = df.copy()
|
257 |
-
df_filtered = df_copy[df_copy[type_filter_column] == type_value]
|
258 |
|
259 |
if df_filtered.empty:
|
260 |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
261 |
|
262 |
-
|
263 |
-
df_filtered[
|
264 |
-
|
|
|
|
|
|
|
|
|
265 |
|
266 |
if df_filtered.empty:
|
267 |
return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.")
|
268 |
|
269 |
fig, ax = plt.subplots(figsize=(10, 5))
|
270 |
-
ax.plot(df_filtered[
|
|
|
|
|
271 |
ax.set_title(title)
|
272 |
ax.set_xlabel('Date')
|
273 |
ax.set_ylabel('Follower Count')
|
|
|
274 |
ax.grid(True, linestyle='--', alpha=0.7)
|
275 |
plt.xticks(rotation=45)
|
276 |
plt.tight_layout()
|
@@ -281,50 +256,69 @@ def generate_followers_count_over_time_plot(df, date_column='date', count_column
|
|
281 |
finally:
|
282 |
plt.close('all')
|
283 |
|
284 |
-
def generate_followers_growth_rate_plot(df,
|
285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
title = f"Follower Growth Rate ({type_value})"
|
287 |
-
logging.info(f"Generating {title}. Date: '{
|
288 |
|
289 |
if df is None or df.empty:
|
290 |
return create_placeholder_plot(title=title, message="No follower data available.")
|
291 |
|
292 |
-
required_cols = [
|
293 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
294 |
if missing_cols:
|
295 |
-
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.")
|
296 |
|
297 |
try:
|
298 |
df_copy = df.copy()
|
299 |
-
df_filtered = df_copy[df_copy[type_filter_column] == type_value]
|
300 |
|
301 |
if df_filtered.empty:
|
302 |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
303 |
|
304 |
-
df_filtered[
|
305 |
-
df_filtered[
|
306 |
-
df_filtered =
|
|
|
|
|
307 |
|
308 |
-
if df_filtered.empty or len(df_filtered) < 2:
|
309 |
return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
|
310 |
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
# Replace inf with NaN
|
315 |
df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
|
316 |
-
df_filtered.dropna(subset=['
|
317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
-
if
|
320 |
-
return create_placeholder_plot(title=title, message="No valid growth rate data after calculation.")
|
321 |
|
322 |
-
fig, ax = plt.subplots(figsize=(10, 5))
|
323 |
-
ax.plot(df_filtered.index, df_filtered['growth_rate'], marker='o', linestyle='-', color='lightcoral')
|
324 |
ax.set_title(title)
|
325 |
ax.set_xlabel('Date')
|
326 |
ax.set_ylabel('Growth Rate (%)')
|
327 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter())
|
|
|
328 |
ax.grid(True, linestyle='--', alpha=0.7)
|
329 |
plt.xticks(rotation=45)
|
330 |
plt.tight_layout()
|
@@ -335,50 +329,78 @@ def generate_followers_growth_rate_plot(df, date_column='date', count_column='fo
|
|
335 |
finally:
|
336 |
plt.close('all')
|
337 |
|
338 |
-
def generate_followers_by_demographics_plot(df, category_col='category_name',
|
339 |
-
|
340 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
if df is None or df.empty:
|
343 |
return create_placeholder_plot(title=plot_title, message="No follower data available.")
|
344 |
|
345 |
-
required_cols = [category_col,
|
346 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
347 |
if missing_cols:
|
348 |
-
return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}.")
|
349 |
|
350 |
-
if type_value is None:
|
351 |
return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.")
|
352 |
|
353 |
try:
|
354 |
df_copy = df.copy()
|
355 |
-
df_filtered = df_copy[df_copy[type_filter_column] == type_value]
|
356 |
|
357 |
if df_filtered.empty:
|
358 |
return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.")
|
359 |
|
360 |
-
df_filtered[
|
|
|
361 |
|
362 |
-
|
363 |
-
|
|
|
|
|
|
|
364 |
|
365 |
if demographics_data.empty:
|
366 |
return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.")
|
367 |
|
368 |
-
# Limit to top N for readability if too many categories
|
369 |
top_n = 10
|
370 |
if len(demographics_data) > top_n:
|
371 |
demographics_data = demographics_data.head(top_n)
|
372 |
-
|
|
|
|
|
373 |
|
|
|
|
|
|
|
|
|
374 |
|
375 |
-
|
376 |
-
|
377 |
-
|
|
|
378 |
ax.set_xlabel(category_col.replace('_', ' ').title())
|
379 |
ax.set_ylabel('Number of Followers')
|
|
|
|
|
|
|
380 |
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
381 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
plt.tight_layout()
|
383 |
return fig
|
384 |
except Exception as e:
|
@@ -404,16 +426,14 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
|
|
404 |
try:
|
405 |
df_copy = df.copy()
|
406 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
407 |
-
# Assuming 'engagement' is already a rate (e.g., 0.05 for 5%). If it's an absolute count, this logic needs change.
|
408 |
df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce')
|
409 |
df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column)
|
410 |
|
411 |
if df_copy.empty:
|
412 |
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
413 |
|
414 |
-
# Resample daily and calculate mean engagement rate
|
415 |
engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
|
416 |
-
engagement_over_time = engagement_over_time.dropna()
|
417 |
|
418 |
if engagement_over_time.empty:
|
419 |
return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.")
|
@@ -423,7 +443,12 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
|
|
423 |
ax.set_title(title)
|
424 |
ax.set_xlabel('Date')
|
425 |
ax.set_ylabel('Engagement Rate')
|
426 |
-
|
|
|
|
|
|
|
|
|
|
|
427 |
ax.grid(True, linestyle='--', alpha=0.7)
|
428 |
plt.xticks(rotation=45)
|
429 |
plt.tight_layout()
|
@@ -434,7 +459,7 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
|
|
434 |
finally:
|
435 |
plt.close('all')
|
436 |
|
437 |
-
def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'):
|
438 |
"""Generates a plot for reach (clicks) over time."""
|
439 |
title = "Reach Over Time (Clicks)"
|
440 |
logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
@@ -453,16 +478,12 @@ def generate_reach_over_time_plot(df, date_column='published_at', reach_col='cli
|
|
453 |
df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
|
454 |
df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
|
455 |
|
456 |
-
if df_copy.empty:
|
457 |
-
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
458 |
|
459 |
reach_over_time = df_copy.resample('D')[reach_col].sum()
|
460 |
-
|
461 |
-
|
462 |
-
pass # allow plot of zeros if that's the case
|
463 |
-
elif reach_over_time.sum() == 0 and not df_copy.empty : # if all values are zero
|
464 |
-
pass
|
465 |
-
|
466 |
|
467 |
fig, ax = plt.subplots(figsize=(10, 5))
|
468 |
ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen')
|
@@ -498,8 +519,8 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
|
|
498 |
df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce')
|
499 |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
|
500 |
|
501 |
-
if df_copy.empty:
|
502 |
-
|
503 |
|
504 |
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
|
505 |
|
@@ -521,73 +542,111 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
|
|
521 |
|
522 |
if __name__ == '__main__':
|
523 |
# Create dummy data for testing
|
524 |
-
# Posts Data (merged with stats)
|
525 |
posts_data = {
|
526 |
'id': [f'post{i}' for i in range(1, 7)],
|
527 |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
|
528 |
'likeCount': [10, 5, 12, 8, 15, 3, 20],
|
529 |
'commentCount': [2, 1, 3, 1, 4, 0, 5],
|
530 |
-
'shareCount': [1, 0, 1, 1, 2, 0, 1],
|
531 |
'clickCount': [20, 15, 30, 22, 40, 10, 50],
|
532 |
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
|
533 |
-
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08]
|
534 |
}
|
535 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
536 |
|
537 |
-
# Follower Stats Data
|
538 |
follower_data = {
|
539 |
-
'
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
|
|
|
|
566 |
}
|
567 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
568 |
-
# Ensure 'total_followers' for generate_total_follower_growth_plot is correctly populated for its specific rows
|
569 |
-
sample_follower_stats_df.loc[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot', 'total_followers'] = sample_follower_stats_df['follower_count_o']
|
570 |
-
|
571 |
-
|
572 |
-
logging.info("--- Testing New Plot Generations ---")
|
573 |
-
|
574 |
-
fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), date_column='date', count_column='follower_count_o', type_value='follower_gains_monthly')
|
575 |
-
if fig_followers_count: logging.info("Followers Count Over Time (monthly) plot generated.")
|
576 |
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
|
583 |
-
fig_role = generate_followers_by_demographics_plot(
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
588 |
|
589 |
-
|
590 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
591 |
|
592 |
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
|
593 |
if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
|
@@ -597,12 +656,5 @@ if __name__ == '__main__':
|
|
597 |
|
598 |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
|
599 |
if fig_impressions: logging.info("Impressions Over Time plot generated.")
|
600 |
-
|
601 |
-
# Test existing total follower growth plot with appropriate data
|
602 |
-
total_followers_df = sample_follower_stats_df[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot'].copy()
|
603 |
-
total_followers_df['date'] = pd.to_datetime(total_followers_df['date']) # Ensure date is datetime
|
604 |
-
fig_total_growth = generate_total_follower_growth_plot(total_followers_df, date_column='date', count_column='total_followers')
|
605 |
-
if fig_total_growth: logging.info("Total Follower Growth plot (existing function) generated.")
|
606 |
-
|
607 |
|
608 |
logging.info("Test script finished. Review plots if displayed locally or saved.")
|
|
|
184 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
185 |
|
186 |
fig, ax = plt.subplots(figsize=(8, 5))
|
|
|
187 |
colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
|
188 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
|
|
|
|
|
|
|
|
|
|
189 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
190 |
ax.set_title('Mention Sentiment Distribution')
|
191 |
ax.axis('equal')
|
|
|
198 |
finally:
|
199 |
plt.close('all')
|
200 |
|
201 |
+
# --- Updated Follower Plot Functions ---
|
202 |
+
|
203 |
+
def generate_followers_count_over_time_plot(df, date_info_column='category_name',
|
204 |
+
organic_count_col='follower_count_organic',
|
205 |
+
paid_count_col='follower_count_paid',
|
206 |
+
type_filter_column='follower_count_type',
|
207 |
+
type_value='follower_gains_monthly'):
|
208 |
+
"""
|
209 |
+
Generates a plot for specific follower counts (organic and paid) over time.
|
210 |
+
Date information is expected in 'date_info_column' as strings (e.g., "2024-08-01").
|
211 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
title = f"Followers Count Over Time ({type_value})"
|
213 |
+
logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
214 |
|
215 |
if df is None or df.empty:
|
216 |
return create_placeholder_plot(title=title, message="No follower data available.")
|
217 |
|
218 |
+
required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column]
|
219 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
220 |
if missing_cols:
|
221 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
222 |
|
223 |
try:
|
224 |
df_copy = df.copy()
|
225 |
+
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() # Use .copy() to avoid SettingWithCopyWarning
|
226 |
|
227 |
if df_filtered.empty:
|
228 |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
229 |
|
230 |
+
# Convert date_info_column to datetime
|
231 |
+
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
|
232 |
+
|
233 |
+
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
|
234 |
+
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
|
235 |
+
|
236 |
+
df_filtered = df_filtered.dropna(subset=['datetime_obj', organic_count_col, paid_count_col]).sort_values(by='datetime_obj')
|
237 |
|
238 |
if df_filtered.empty:
|
239 |
return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.")
|
240 |
|
241 |
fig, ax = plt.subplots(figsize=(10, 5))
|
242 |
+
ax.plot(df_filtered['datetime_obj'], df_filtered[organic_count_col], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers')
|
243 |
+
ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers')
|
244 |
+
|
245 |
ax.set_title(title)
|
246 |
ax.set_xlabel('Date')
|
247 |
ax.set_ylabel('Follower Count')
|
248 |
+
ax.legend()
|
249 |
ax.grid(True, linestyle='--', alpha=0.7)
|
250 |
plt.xticks(rotation=45)
|
251 |
plt.tight_layout()
|
|
|
256 |
finally:
|
257 |
plt.close('all')
|
258 |
|
259 |
+
def generate_followers_growth_rate_plot(df, date_info_column='category_name',
|
260 |
+
organic_count_col='follower_count_organic',
|
261 |
+
paid_count_col='follower_count_paid',
|
262 |
+
type_filter_column='follower_count_type',
|
263 |
+
type_value='follower_gains_monthly'):
|
264 |
+
"""
|
265 |
+
Calculates and plots follower growth rate (organic and paid) over time.
|
266 |
+
Date information is expected in 'date_info_column' as strings (e.g., "2024-08-01").
|
267 |
+
"""
|
268 |
title = f"Follower Growth Rate ({type_value})"
|
269 |
+
logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
270 |
|
271 |
if df is None or df.empty:
|
272 |
return create_placeholder_plot(title=title, message="No follower data available.")
|
273 |
|
274 |
+
required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column]
|
275 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
276 |
if missing_cols:
|
277 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
278 |
|
279 |
try:
|
280 |
df_copy = df.copy()
|
281 |
+
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
|
282 |
|
283 |
if df_filtered.empty:
|
284 |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
285 |
|
286 |
+
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
|
287 |
+
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce')
|
288 |
+
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce')
|
289 |
+
|
290 |
+
df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj')
|
291 |
|
292 |
+
if df_filtered.empty or len(df_filtered) < 2: # Need at least 2 points for pct_change
|
293 |
return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
|
294 |
|
295 |
+
df_filtered['organic_growth_rate'] = df_filtered[organic_count_col].pct_change() * 100
|
296 |
+
df_filtered['paid_growth_rate'] = df_filtered[paid_count_col].pct_change() * 100
|
297 |
+
|
298 |
+
# Replace inf with NaN then drop NaNs for growth rates
|
299 |
df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
|
300 |
+
# df_filtered.dropna(subset=['organic_growth_rate', 'paid_growth_rate'], how='all', inplace=True) # Keep row if at least one rate is valid
|
301 |
|
302 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
303 |
+
|
304 |
+
plotted_organic = False
|
305 |
+
if 'organic_growth_rate' in df_filtered.columns and not df_filtered['organic_growth_rate'].dropna().empty:
|
306 |
+
ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate')
|
307 |
+
plotted_organic = True
|
308 |
+
|
309 |
+
plotted_paid = False
|
310 |
+
if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty:
|
311 |
+
ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate')
|
312 |
+
plotted_paid = True
|
313 |
|
314 |
+
if not plotted_organic and not plotted_paid:
|
315 |
+
return create_placeholder_plot(title=title, message="No valid growth rate data to display after calculation.")
|
316 |
|
|
|
|
|
317 |
ax.set_title(title)
|
318 |
ax.set_xlabel('Date')
|
319 |
ax.set_ylabel('Growth Rate (%)')
|
320 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter())
|
321 |
+
ax.legend()
|
322 |
ax.grid(True, linestyle='--', alpha=0.7)
|
323 |
plt.xticks(rotation=45)
|
324 |
plt.tight_layout()
|
|
|
329 |
finally:
|
330 |
plt.close('all')
|
331 |
|
332 |
+
def generate_followers_by_demographics_plot(df, category_col='category_name',
|
333 |
+
organic_count_col='follower_count_organic',
|
334 |
+
paid_count_col='follower_count_paid',
|
335 |
+
type_filter_column='follower_count_type',
|
336 |
+
type_value=None, plot_title="Followers by Demographics"):
|
337 |
+
"""
|
338 |
+
Generates a grouped bar chart for follower demographics (organic and paid).
|
339 |
+
'category_col' here is the demographic attribute (e.g., Location, Industry).
|
340 |
+
"""
|
341 |
+
logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
342 |
|
343 |
if df is None or df.empty:
|
344 |
return create_placeholder_plot(title=plot_title, message="No follower data available.")
|
345 |
|
346 |
+
required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column]
|
347 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
348 |
if missing_cols:
|
349 |
+
return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
350 |
|
351 |
+
if type_value is None:
|
352 |
return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.")
|
353 |
|
354 |
try:
|
355 |
df_copy = df.copy()
|
356 |
+
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
|
357 |
|
358 |
if df_filtered.empty:
|
359 |
return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.")
|
360 |
|
361 |
+
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
|
362 |
+
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
|
363 |
|
364 |
+
demographics_data = df_filtered.groupby(category_col)[[organic_count_col, paid_count_col]].sum()
|
365 |
+
# Sort by total followers (organic + paid) for better visualization
|
366 |
+
demographics_data['total_for_sort'] = demographics_data[organic_count_col] + demographics_data[paid_count_col]
|
367 |
+
demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort'])
|
368 |
+
|
369 |
|
370 |
if demographics_data.empty:
|
371 |
return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.")
|
372 |
|
|
|
373 |
top_n = 10
|
374 |
if len(demographics_data) > top_n:
|
375 |
demographics_data = demographics_data.head(top_n)
|
376 |
+
plot_title_updated = f"{plot_title} (Top {top_n})"
|
377 |
+
else:
|
378 |
+
plot_title_updated = plot_title
|
379 |
|
380 |
+
fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) )
|
381 |
+
|
382 |
+
bar_width = 0.35
|
383 |
+
index = np.arange(len(demographics_data.index))
|
384 |
|
385 |
+
bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color='skyblue')
|
386 |
+
bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color='lightcoral')
|
387 |
+
|
388 |
+
ax.set_title(plot_title_updated)
|
389 |
ax.set_xlabel(category_col.replace('_', ' ').title())
|
390 |
ax.set_ylabel('Number of Followers')
|
391 |
+
ax.set_xticks(index)
|
392 |
+
ax.set_xticklabels(demographics_data.index, rotation=45, ha="right")
|
393 |
+
ax.legend()
|
394 |
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
395 |
+
|
396 |
+
# Add labels on top of bars
|
397 |
+
for bar_group in [bars1, bars2]:
|
398 |
+
for bar in bar_group:
|
399 |
+
yval = bar.get_height()
|
400 |
+
if yval > 0: # Only add label if value is not zero
|
401 |
+
ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * ax.get_ylim()[1]),
|
402 |
+
str(int(yval)), ha='center', va='bottom', fontsize=8)
|
403 |
+
|
404 |
plt.tight_layout()
|
405 |
return fig
|
406 |
except Exception as e:
|
|
|
426 |
try:
|
427 |
df_copy = df.copy()
|
428 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
|
|
429 |
df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce')
|
430 |
df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column)
|
431 |
|
432 |
if df_copy.empty:
|
433 |
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
434 |
|
|
|
435 |
engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
|
436 |
+
engagement_over_time = engagement_over_time.dropna()
|
437 |
|
438 |
if engagement_over_time.empty:
|
439 |
return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.")
|
|
|
443 |
ax.set_title(title)
|
444 |
ax.set_xlabel('Date')
|
445 |
ax.set_ylabel('Engagement Rate')
|
446 |
+
# Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
|
447 |
+
max_rate_val = engagement_over_time.max()
|
448 |
+
formatter_xmax = 1.0 if max_rate_val <= 1.5 else 100.0 # Heuristic: if max is small, assume 0-1 scale
|
449 |
+
if max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
|
450 |
+
formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting
|
451 |
+
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
|
452 |
ax.grid(True, linestyle='--', alpha=0.7)
|
453 |
plt.xticks(rotation=45)
|
454 |
plt.tight_layout()
|
|
|
459 |
finally:
|
460 |
plt.close('all')
|
461 |
|
462 |
+
def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'):
|
463 |
"""Generates a plot for reach (clicks) over time."""
|
464 |
title = "Reach Over Time (Clicks)"
|
465 |
logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
|
|
478 |
df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
|
479 |
df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
|
480 |
|
481 |
+
if df_copy.empty: # After dropping NaNs for essential columns
|
482 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.")
|
483 |
|
484 |
reach_over_time = df_copy.resample('D')[reach_col].sum()
|
485 |
+
# No need to check if reach_over_time is empty if df_copy wasn't, sum of NaNs is 0.
|
486 |
+
# Plot will show 0 if all sums are 0.
|
|
|
|
|
|
|
|
|
487 |
|
488 |
fig, ax = plt.subplots(figsize=(10, 5))
|
489 |
ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen')
|
|
|
519 |
df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce')
|
520 |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
|
521 |
|
522 |
+
if df_copy.empty: # After dropping NaNs for essential columns
|
523 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
|
524 |
|
525 |
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
|
526 |
|
|
|
542 |
|
543 |
if __name__ == '__main__':
|
544 |
# Create dummy data for testing
|
|
|
545 |
posts_data = {
|
546 |
'id': [f'post{i}' for i in range(1, 7)],
|
547 |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
|
548 |
'likeCount': [10, 5, 12, 8, 15, 3, 20],
|
549 |
'commentCount': [2, 1, 3, 1, 4, 0, 5],
|
550 |
+
'shareCount': [1, 0, 1, 1, 2, 0, 1], # Assuming this is the correct column name from your data
|
551 |
'clickCount': [20, 15, 30, 22, 40, 10, 50],
|
552 |
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
|
553 |
+
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08]
|
554 |
}
|
555 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
556 |
|
557 |
+
# Updated Follower Stats Data
|
558 |
follower_data = {
|
559 |
+
'follower_count_type': [
|
560 |
+
'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
|
561 |
+
'follower_geo', 'follower_geo', 'follower_geo',
|
562 |
+
'follower_function', 'follower_function',
|
563 |
+
'follower_industry', 'follower_industry',
|
564 |
+
'follower_seniority', 'follower_seniority'
|
565 |
+
],
|
566 |
+
# 'category_name' now holds dates for time-series, and actual categories for demographics
|
567 |
+
'category_name': [
|
568 |
+
'2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
|
569 |
+
'USA', 'Canada', 'UK', # Geo
|
570 |
+
'Engineering', 'Sales', # Function/Role
|
571 |
+
'Tech', 'Finance', # Industry
|
572 |
+
'Senior', 'Junior' # Seniority
|
573 |
+
],
|
574 |
+
'follower_count_organic': [
|
575 |
+
100, 110, 125, # Organic monthly gains
|
576 |
+
500, 300, 150, # Organic Geo counts
|
577 |
+
400, 200, # Organic Role counts
|
578 |
+
250, 180, # Organic Industry counts
|
579 |
+
300, 220 # Organic Seniority counts
|
580 |
+
],
|
581 |
+
'follower_count_paid': [
|
582 |
+
20, 30, 25, # Paid monthly gains
|
583 |
+
50, 40, 60, # Paid Geo counts
|
584 |
+
30, 20, # Paid Role counts
|
585 |
+
45, 35, # Paid Industry counts
|
586 |
+
60, 40 # Paid Seniority counts
|
587 |
+
]
|
588 |
}
|
589 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
590 |
|
591 |
+
logging.info("--- Testing Updated Follower Plot Generations ---")
|
592 |
+
|
593 |
+
fig_followers_count = generate_followers_count_over_time_plot(
|
594 |
+
sample_follower_stats_df.copy(),
|
595 |
+
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
596 |
+
)
|
597 |
+
if fig_followers_count: logging.info("Followers Count Over Time (monthly, organic/paid) plot generated.")
|
598 |
+
|
599 |
+
fig_followers_rate = generate_followers_growth_rate_plot(
|
600 |
+
sample_follower_stats_df.copy(),
|
601 |
+
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
602 |
+
)
|
603 |
+
if fig_followers_rate: logging.info("Followers Growth Rate (monthly, organic/paid) plot generated.")
|
604 |
+
|
605 |
+
fig_geo = generate_followers_by_demographics_plot(
|
606 |
+
sample_follower_stats_df.copy(),
|
607 |
+
type_value='follower_geo', # category_col defaults to 'category_name'
|
608 |
+
plot_title="Followers by Location (Organic/Paid)"
|
609 |
+
)
|
610 |
+
if fig_geo: logging.info("Followers by Location (grouped organic/paid) plot generated.")
|
611 |
|
612 |
+
fig_role = generate_followers_by_demographics_plot(
|
613 |
+
sample_follower_stats_df.copy(),
|
614 |
+
type_value='follower_function',
|
615 |
+
plot_title="Followers by Role (Organic/Paid)"
|
616 |
+
)
|
617 |
+
if fig_role: logging.info("Followers by Role (grouped organic/paid) plot generated.")
|
618 |
+
|
619 |
+
fig_industry = generate_followers_by_demographics_plot(
|
620 |
+
sample_follower_stats_df.copy(),
|
621 |
+
type_value='follower_industry',
|
622 |
+
plot_title="Followers by Industry (Organic/Paid)"
|
623 |
+
)
|
624 |
+
if fig_industry: logging.info("Followers by Industry (grouped organic/paid) plot generated.")
|
625 |
+
|
626 |
+
fig_seniority = generate_followers_by_demographics_plot(
|
627 |
+
sample_follower_stats_df.copy(),
|
628 |
+
type_value='follower_seniority',
|
629 |
+
plot_title="Followers by Seniority (Organic/Paid)"
|
630 |
+
)
|
631 |
+
if fig_seniority: logging.info("Followers by Seniority (grouped organic/paid) plot generated.")
|
632 |
+
|
633 |
+
logging.info("--- Testing Other Plot Generations (No Changes to these) ---")
|
634 |
+
fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
|
635 |
+
if fig_posts_activity: logging.info("Posts activity plot generated.")
|
636 |
+
|
637 |
+
fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
|
638 |
+
if fig_engagement_type: logging.info("Engagement type plot generated.")
|
639 |
|
640 |
+
# Dummy mentions for testing
|
641 |
+
mentions_data = {
|
642 |
+
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
|
643 |
+
'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
|
644 |
+
}
|
645 |
+
sample_mentions_df = pd.DataFrame(mentions_data)
|
646 |
+
fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
|
647 |
+
if fig_mentions_activity: logging.info("Mentions activity plot generated.")
|
648 |
+
fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
|
649 |
+
if fig_mention_sentiment: logging.info("Mention sentiment plot generated.")
|
650 |
|
651 |
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
|
652 |
if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
|
|
|
656 |
|
657 |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
|
658 |
if fig_impressions: logging.info("Impressions Over Time plot generated.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
659 |
|
660 |
logging.info("Test script finished. Review plots if displayed locally or saved.")
|