GuglielmoTor commited on
Commit
b7a0e8c
·
verified ·
1 Parent(s): 5ea0f55

Update analytics_plot_generator.py

Browse files
Files changed (1) hide show
  1. analytics_plot_generator.py +211 -159
analytics_plot_generator.py CHANGED
@@ -184,14 +184,8 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
184
  return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
185
 
186
  fig, ax = plt.subplots(figsize=(8, 5))
187
- # Define a broader range of colors or a colormap for more sentiment types
188
  colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
189
  pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
190
- # Or keep your specific colors if sentiment labels are fixed:
191
- # colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'}
192
- # pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index]
193
-
194
-
195
  ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
196
  ax.set_title('Mention Sentiment Distribution')
197
  ax.axis('equal')
@@ -204,73 +198,54 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
204
  finally:
205
  plt.close('all')
206
 
207
- # --- Existing Follower Growth Plot (can be reused or adapted) ---
208
- def generate_total_follower_growth_plot(df, date_column='date', count_column='total_followers'):
209
- """ Generates a plot for TOTAL follower growth over time. """
210
- # This is your existing function, ensure it's called with the correct data for overall growth.
211
- # For 'Follower Count Over Time (follower_gains_monthly)', we'll make a new specific one if structure differs.
212
- logging.info(f"Generating total follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. DF rows: {len(df) if df is not None else 'None'}")
213
- if df is None or df.empty:
214
- return create_placeholder_plot(title="Total Follower Growth", message="No follower data.")
215
- if date_column not in df.columns or count_column not in df.columns:
216
- return create_placeholder_plot(title="Total Follower Growth", message=f"Missing columns: {date_column} or {count_column}.")
217
- try:
218
- df_copy = df.copy()
219
- df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
220
- df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce')
221
- df_copy = df_copy.dropna(subset=[date_column, count_column]).sort_values(by=date_column)
222
- if df_copy.empty:
223
- return create_placeholder_plot(title="Total Follower Growth", message="No valid data after cleaning.")
224
-
225
- fig, ax = plt.subplots(figsize=(10,5))
226
- ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green')
227
- ax.set_title('Total Follower Growth Over Time')
228
- ax.set_xlabel('Date')
229
- ax.set_ylabel('Total Followers')
230
- ax.grid(True, linestyle='--', alpha=0.7)
231
- plt.xticks(rotation=45)
232
- plt.tight_layout()
233
- return fig
234
- except Exception as e:
235
- logging.error(f"Error in generate_total_follower_growth_plot: {e}", exc_info=True)
236
- return create_placeholder_plot(title="Total Follower Growth Error", message=str(e))
237
- finally:
238
- plt.close('all')
239
-
240
- # --- New Plot Functions ---
241
-
242
- def generate_followers_count_over_time_plot(df, date_column='date', count_column='follower_count_o', type_filter_column='follower_count_type', type_value='follower_gains_monthly'):
243
- """Generates a plot for specific follower counts over time (e.g., monthly gains)."""
244
  title = f"Followers Count Over Time ({type_value})"
245
- logging.info(f"Generating {title}. Date: '{date_column}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
246
 
247
  if df is None or df.empty:
248
  return create_placeholder_plot(title=title, message="No follower data available.")
249
 
250
- required_cols = [date_column, count_column, type_filter_column]
251
  missing_cols = [col for col in required_cols if col not in df.columns]
252
  if missing_cols:
253
- return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.")
254
 
255
  try:
256
  df_copy = df.copy()
257
- df_filtered = df_copy[df_copy[type_filter_column] == type_value]
258
 
259
  if df_filtered.empty:
260
  return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
261
 
262
- df_filtered[date_column] = pd.to_datetime(df_filtered[date_column], errors='coerce')
263
- df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce')
264
- df_filtered = df_filtered.dropna(subset=[date_column, count_column]).sort_values(by=date_column)
 
 
 
 
265
 
266
  if df_filtered.empty:
267
  return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.")
268
 
269
  fig, ax = plt.subplots(figsize=(10, 5))
270
- ax.plot(df_filtered[date_column], df_filtered[count_column], marker='o', linestyle='-', color='dodgerblue')
 
 
271
  ax.set_title(title)
272
  ax.set_xlabel('Date')
273
  ax.set_ylabel('Follower Count')
 
274
  ax.grid(True, linestyle='--', alpha=0.7)
275
  plt.xticks(rotation=45)
276
  plt.tight_layout()
@@ -281,50 +256,69 @@ def generate_followers_count_over_time_plot(df, date_column='date', count_column
281
  finally:
282
  plt.close('all')
283
 
284
- def generate_followers_growth_rate_plot(df, date_column='date', count_column='follower_count_o', type_filter_column='follower_count_type', type_value='follower_gains_monthly'):
285
- """Calculates and plots follower growth rate over time."""
 
 
 
 
 
 
 
286
  title = f"Follower Growth Rate ({type_value})"
287
- logging.info(f"Generating {title}. Date: '{date_column}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
288
 
289
  if df is None or df.empty:
290
  return create_placeholder_plot(title=title, message="No follower data available.")
291
 
292
- required_cols = [date_column, count_column, type_filter_column]
293
  missing_cols = [col for col in required_cols if col not in df.columns]
294
  if missing_cols:
295
- return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.")
296
 
297
  try:
298
  df_copy = df.copy()
299
- df_filtered = df_copy[df_copy[type_filter_column] == type_value]
300
 
301
  if df_filtered.empty:
302
  return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
303
 
304
- df_filtered[date_column] = pd.to_datetime(df_filtered[date_column], errors='coerce')
305
- df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce')
306
- df_filtered = df_filtered.dropna(subset=[date_column, count_column]).sort_values(by=date_column).set_index(date_column)
 
 
307
 
308
- if df_filtered.empty or len(df_filtered) < 2:
309
  return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
310
 
311
- # Calculate growth rate: (current - previous) / previous * 100
312
- # Ensure previous is not zero to avoid division by zero
313
- df_filtered['growth_rate'] = df_filtered[count_column].pct_change() * 100
314
- # Replace inf with NaN (e.g. if previous was 0 and current is non-zero) then drop NaNs
315
  df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
316
- df_filtered.dropna(subset=['growth_rate'], inplace=True)
317
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
- if df_filtered.empty:
320
- return create_placeholder_plot(title=title, message="No valid growth rate data after calculation.")
321
 
322
- fig, ax = plt.subplots(figsize=(10, 5))
323
- ax.plot(df_filtered.index, df_filtered['growth_rate'], marker='o', linestyle='-', color='lightcoral')
324
  ax.set_title(title)
325
  ax.set_xlabel('Date')
326
  ax.set_ylabel('Growth Rate (%)')
327
  ax.yaxis.set_major_formatter(mticker.PercentFormatter())
 
328
  ax.grid(True, linestyle='--', alpha=0.7)
329
  plt.xticks(rotation=45)
330
  plt.tight_layout()
@@ -335,50 +329,78 @@ def generate_followers_growth_rate_plot(df, date_column='date', count_column='fo
335
  finally:
336
  plt.close('all')
337
 
338
- def generate_followers_by_demographics_plot(df, category_col='category_name', count_column='follower_count_o', type_filter_column='follower_count_type', type_value=None, plot_title="Followers by Demographics"):
339
- """Generates a bar chart for follower demographics (e.g., by location, industry)."""
340
- logging.info(f"Generating {plot_title}. Category: '{category_col}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
 
 
 
 
 
 
 
341
 
342
  if df is None or df.empty:
343
  return create_placeholder_plot(title=plot_title, message="No follower data available.")
344
 
345
- required_cols = [category_col, count_column, type_filter_column]
346
  missing_cols = [col for col in required_cols if col not in df.columns]
347
  if missing_cols:
348
- return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}.")
349
 
350
- if type_value is None: # Should be specified
351
  return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.")
352
 
353
  try:
354
  df_copy = df.copy()
355
- df_filtered = df_copy[df_copy[type_filter_column] == type_value]
356
 
357
  if df_filtered.empty:
358
  return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.")
359
 
360
- df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce').fillna(0)
 
361
 
362
- # Group by the category column and sum the count column
363
- demographics_data = df_filtered.groupby(category_col)[count_column].sum().sort_values(ascending=False)
 
 
 
364
 
365
  if demographics_data.empty:
366
  return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.")
367
 
368
- # Limit to top N for readability if too many categories
369
  top_n = 10
370
  if len(demographics_data) > top_n:
371
  demographics_data = demographics_data.head(top_n)
372
- plot_title += f" (Top {top_n})"
 
 
373
 
 
 
 
 
374
 
375
- fig, ax = plt.subplots(figsize=(10, 6) if len(demographics_data) > 5 else (8,5) )
376
- demographics_data.plot(kind='bar', ax=ax, color='teal')
377
- ax.set_title(plot_title)
 
378
  ax.set_xlabel(category_col.replace('_', ' ').title())
379
  ax.set_ylabel('Number of Followers')
 
 
 
380
  ax.grid(axis='y', linestyle='--', alpha=0.7)
381
- plt.xticks(rotation=45, ha="right")
 
 
 
 
 
 
 
 
382
  plt.tight_layout()
383
  return fig
384
  except Exception as e:
@@ -404,16 +426,14 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
404
  try:
405
  df_copy = df.copy()
406
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
407
- # Assuming 'engagement' is already a rate (e.g., 0.05 for 5%). If it's an absolute count, this logic needs change.
408
  df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce')
409
  df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column)
410
 
411
  if df_copy.empty:
412
  return create_placeholder_plot(title=title, message="No valid data after cleaning.")
413
 
414
- # Resample daily and calculate mean engagement rate
415
  engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
416
- engagement_over_time = engagement_over_time.dropna() # Remove days with no data after resampling
417
 
418
  if engagement_over_time.empty:
419
  return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.")
@@ -423,7 +443,12 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
423
  ax.set_title(title)
424
  ax.set_xlabel('Date')
425
  ax.set_ylabel('Engagement Rate')
426
- ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0 if engagement_over_time.max() <=1 else 100.0)) # Adjust based on rate scale
 
 
 
 
 
427
  ax.grid(True, linestyle='--', alpha=0.7)
428
  plt.xticks(rotation=45)
429
  plt.tight_layout()
@@ -434,7 +459,7 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
434
  finally:
435
  plt.close('all')
436
 
437
- def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'): # Using clickCount as proxy for Reach
438
  """Generates a plot for reach (clicks) over time."""
439
  title = "Reach Over Time (Clicks)"
440
  logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
@@ -453,16 +478,12 @@ def generate_reach_over_time_plot(df, date_column='published_at', reach_col='cli
453
  df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
454
  df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
455
 
456
- if df_copy.empty:
457
- return create_placeholder_plot(title=title, message="No valid data after cleaning.")
458
 
459
  reach_over_time = df_copy.resample('D')[reach_col].sum()
460
-
461
- if reach_over_time.empty and not df_copy.empty : # if original had data but resampling resulted in empty (e.g. all NaNs for sum)
462
- pass # allow plot of zeros if that's the case
463
- elif reach_over_time.sum() == 0 and not df_copy.empty : # if all values are zero
464
- pass
465
-
466
 
467
  fig, ax = plt.subplots(figsize=(10, 5))
468
  ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen')
@@ -498,8 +519,8 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
498
  df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce')
499
  df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
500
 
501
- if df_copy.empty:
502
- return create_placeholder_plot(title=title, message="No valid data after cleaning.")
503
 
504
  impressions_over_time = df_copy.resample('D')[impressions_col].sum()
505
 
@@ -521,73 +542,111 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
521
 
522
  if __name__ == '__main__':
523
  # Create dummy data for testing
524
- # Posts Data (merged with stats)
525
  posts_data = {
526
  'id': [f'post{i}' for i in range(1, 7)],
527
  'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
528
  'likeCount': [10, 5, 12, 8, 15, 3, 20],
529
  'commentCount': [2, 1, 3, 1, 4, 0, 5],
530
- 'shareCount': [1, 0, 1, 1, 2, 0, 1],
531
  'clickCount': [20, 15, 30, 22, 40, 10, 50],
532
  'impressionCount': [200, 150, 300, 220, 400, 100, 500],
533
- 'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08] # Engagement Rate
534
  }
535
  sample_merged_posts_df = pd.DataFrame(posts_data)
536
 
537
- # Follower Stats Data
538
  follower_data = {
539
- 'date': pd.to_datetime(['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15', '2023-03-01', # For time series
540
- '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', # For demographics (snapshot)
541
- '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01',
542
- '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01'
543
- ]),
544
- 'follower_count_type': ['follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
545
- 'follower_geo', 'follower_geo', 'follower_geo', # Location
546
- 'follower_function', 'follower_function', 'follower_function', # Role
547
- 'follower_industry', 'follower_industry', 'follower_industry', # Industry
548
- 'follower_seniority', 'follower_seniority', 'follower_seniority', # Seniority
549
- 'total_followers_snapshot', 'total_followers_snapshot', 'total_followers_snapshot' # For existing total growth
550
- ],
551
- 'category_name': ['Jan', 'Jan-Mid', 'Feb', 'Feb-Mid', 'Mar', # Corresponds to follower_gains_monthly
552
- 'USA', 'Canada', 'UK', # Geo
553
- 'Engineering', 'Sales', 'Marketing', # Function/Role
554
- 'Tech', 'Finance', 'Healthcare', # Industry
555
- 'Senior', 'Junior', 'Manager', # Seniority
556
- 'Overall1', 'Overall2', 'Overall3' # For total_followers_snapshot
557
- ],
558
- 'follower_count_o': [100, 105, 115, 120, 130, # Counts for monthly gains
559
- 500, 300, 200, # Geo counts
560
- 400, 350, 250, # Role counts
561
- 600, 200, 200, # Industry counts
562
- 300, 400, 300, # Seniority counts
563
- 1000, 1010, 1025 # For total_followers_snapshot
564
- ],
565
- 'total_followers': [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,100,115,130] # For existing total growth plot
 
 
566
  }
567
  sample_follower_stats_df = pd.DataFrame(follower_data)
568
- # Ensure 'total_followers' for generate_total_follower_growth_plot is correctly populated for its specific rows
569
- sample_follower_stats_df.loc[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot', 'total_followers'] = sample_follower_stats_df['follower_count_o']
570
-
571
-
572
- logging.info("--- Testing New Plot Generations ---")
573
-
574
- fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), date_column='date', count_column='follower_count_o', type_value='follower_gains_monthly')
575
- if fig_followers_count: logging.info("Followers Count Over Time (monthly) plot generated.")
576
 
577
- fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), date_column='date', count_column='follower_count_o', type_value='follower_gains_monthly')
578
- if fig_followers_rate: logging.info("Followers Growth Rate (monthly) plot generated.")
579
-
580
- fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location")
581
- if fig_geo: logging.info("Followers by Location plot generated.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
 
583
- fig_role = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_function', plot_title="Followers by Role")
584
- if fig_role: logging.info("Followers by Role plot generated.")
585
-
586
- fig_industry = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_industry', plot_title="Followers by Industry")
587
- if fig_industry: logging.info("Followers by Industry plot generated.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
 
589
- fig_seniority = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_seniority', plot_title="Followers by Seniority")
590
- if fig_seniority: logging.info("Followers by Seniority plot generated.")
 
 
 
 
 
 
 
 
591
 
592
  fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
593
  if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
@@ -597,12 +656,5 @@ if __name__ == '__main__':
597
 
598
  fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
599
  if fig_impressions: logging.info("Impressions Over Time plot generated.")
600
-
601
- # Test existing total follower growth plot with appropriate data
602
- total_followers_df = sample_follower_stats_df[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot'].copy()
603
- total_followers_df['date'] = pd.to_datetime(total_followers_df['date']) # Ensure date is datetime
604
- fig_total_growth = generate_total_follower_growth_plot(total_followers_df, date_column='date', count_column='total_followers')
605
- if fig_total_growth: logging.info("Total Follower Growth plot (existing function) generated.")
606
-
607
 
608
  logging.info("Test script finished. Review plots if displayed locally or saved.")
 
184
  return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
185
 
186
  fig, ax = plt.subplots(figsize=(8, 5))
 
187
  colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
188
  pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
 
 
 
 
 
189
  ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
190
  ax.set_title('Mention Sentiment Distribution')
191
  ax.axis('equal')
 
198
  finally:
199
  plt.close('all')
200
 
201
+ # --- Updated Follower Plot Functions ---
202
+
203
+ def generate_followers_count_over_time_plot(df, date_info_column='category_name',
204
+ organic_count_col='follower_count_organic',
205
+ paid_count_col='follower_count_paid',
206
+ type_filter_column='follower_count_type',
207
+ type_value='follower_gains_monthly'):
208
+ """
209
+ Generates a plot for specific follower counts (organic and paid) over time.
210
+ Date information is expected in 'date_info_column' as strings (e.g., "2024-08-01").
211
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  title = f"Followers Count Over Time ({type_value})"
213
+ logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
214
 
215
  if df is None or df.empty:
216
  return create_placeholder_plot(title=title, message="No follower data available.")
217
 
218
+ required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column]
219
  missing_cols = [col for col in required_cols if col not in df.columns]
220
  if missing_cols:
221
+ return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
222
 
223
  try:
224
  df_copy = df.copy()
225
+ df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() # Use .copy() to avoid SettingWithCopyWarning
226
 
227
  if df_filtered.empty:
228
  return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
229
 
230
+ # Convert date_info_column to datetime
231
+ df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
232
+
233
+ df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
234
+ df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
235
+
236
+ df_filtered = df_filtered.dropna(subset=['datetime_obj', organic_count_col, paid_count_col]).sort_values(by='datetime_obj')
237
 
238
  if df_filtered.empty:
239
  return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.")
240
 
241
  fig, ax = plt.subplots(figsize=(10, 5))
242
+ ax.plot(df_filtered['datetime_obj'], df_filtered[organic_count_col], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers')
243
+ ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers')
244
+
245
  ax.set_title(title)
246
  ax.set_xlabel('Date')
247
  ax.set_ylabel('Follower Count')
248
+ ax.legend()
249
  ax.grid(True, linestyle='--', alpha=0.7)
250
  plt.xticks(rotation=45)
251
  plt.tight_layout()
 
256
  finally:
257
  plt.close('all')
258
 
259
+ def generate_followers_growth_rate_plot(df, date_info_column='category_name',
260
+ organic_count_col='follower_count_organic',
261
+ paid_count_col='follower_count_paid',
262
+ type_filter_column='follower_count_type',
263
+ type_value='follower_gains_monthly'):
264
+ """
265
+ Calculates and plots follower growth rate (organic and paid) over time.
266
+ Date information is expected in 'date_info_column' as strings (e.g., "2024-08-01").
267
+ """
268
  title = f"Follower Growth Rate ({type_value})"
269
+ logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
270
 
271
  if df is None or df.empty:
272
  return create_placeholder_plot(title=title, message="No follower data available.")
273
 
274
+ required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column]
275
  missing_cols = [col for col in required_cols if col not in df.columns]
276
  if missing_cols:
277
+ return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
278
 
279
  try:
280
  df_copy = df.copy()
281
+ df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
282
 
283
  if df_filtered.empty:
284
  return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
285
 
286
+ df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
287
+ df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce')
288
+ df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce')
289
+
290
+ df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj')
291
 
292
+ if df_filtered.empty or len(df_filtered) < 2: # Need at least 2 points for pct_change
293
  return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
294
 
295
+ df_filtered['organic_growth_rate'] = df_filtered[organic_count_col].pct_change() * 100
296
+ df_filtered['paid_growth_rate'] = df_filtered[paid_count_col].pct_change() * 100
297
+
298
+ # Replace inf with NaN then drop NaNs for growth rates
299
  df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
300
+ # df_filtered.dropna(subset=['organic_growth_rate', 'paid_growth_rate'], how='all', inplace=True) # Keep row if at least one rate is valid
301
 
302
+ fig, ax = plt.subplots(figsize=(10, 5))
303
+
304
+ plotted_organic = False
305
+ if 'organic_growth_rate' in df_filtered.columns and not df_filtered['organic_growth_rate'].dropna().empty:
306
+ ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate')
307
+ plotted_organic = True
308
+
309
+ plotted_paid = False
310
+ if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty:
311
+ ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate')
312
+ plotted_paid = True
313
 
314
+ if not plotted_organic and not plotted_paid:
315
+ return create_placeholder_plot(title=title, message="No valid growth rate data to display after calculation.")
316
 
 
 
317
  ax.set_title(title)
318
  ax.set_xlabel('Date')
319
  ax.set_ylabel('Growth Rate (%)')
320
  ax.yaxis.set_major_formatter(mticker.PercentFormatter())
321
+ ax.legend()
322
  ax.grid(True, linestyle='--', alpha=0.7)
323
  plt.xticks(rotation=45)
324
  plt.tight_layout()
 
329
  finally:
330
  plt.close('all')
331
 
332
+ def generate_followers_by_demographics_plot(df, category_col='category_name',
333
+ organic_count_col='follower_count_organic',
334
+ paid_count_col='follower_count_paid',
335
+ type_filter_column='follower_count_type',
336
+ type_value=None, plot_title="Followers by Demographics"):
337
+ """
338
+ Generates a grouped bar chart for follower demographics (organic and paid).
339
+ 'category_col' here is the demographic attribute (e.g., Location, Industry).
340
+ """
341
+ logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
342
 
343
  if df is None or df.empty:
344
  return create_placeholder_plot(title=plot_title, message="No follower data available.")
345
 
346
+ required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column]
347
  missing_cols = [col for col in required_cols if col not in df.columns]
348
  if missing_cols:
349
+ return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
350
 
351
+ if type_value is None:
352
  return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.")
353
 
354
  try:
355
  df_copy = df.copy()
356
+ df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
357
 
358
  if df_filtered.empty:
359
  return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.")
360
 
361
+ df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
362
+ df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
363
 
364
+ demographics_data = df_filtered.groupby(category_col)[[organic_count_col, paid_count_col]].sum()
365
+ # Sort by total followers (organic + paid) for better visualization
366
+ demographics_data['total_for_sort'] = demographics_data[organic_count_col] + demographics_data[paid_count_col]
367
+ demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort'])
368
+
369
 
370
  if demographics_data.empty:
371
  return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.")
372
 
 
373
  top_n = 10
374
  if len(demographics_data) > top_n:
375
  demographics_data = demographics_data.head(top_n)
376
+ plot_title_updated = f"{plot_title} (Top {top_n})"
377
+ else:
378
+ plot_title_updated = plot_title
379
 
380
+ fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) )
381
+
382
+ bar_width = 0.35
383
+ index = np.arange(len(demographics_data.index))
384
 
385
+ bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color='skyblue')
386
+ bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color='lightcoral')
387
+
388
+ ax.set_title(plot_title_updated)
389
  ax.set_xlabel(category_col.replace('_', ' ').title())
390
  ax.set_ylabel('Number of Followers')
391
+ ax.set_xticks(index)
392
+ ax.set_xticklabels(demographics_data.index, rotation=45, ha="right")
393
+ ax.legend()
394
  ax.grid(axis='y', linestyle='--', alpha=0.7)
395
+
396
+ # Add labels on top of bars
397
+ for bar_group in [bars1, bars2]:
398
+ for bar in bar_group:
399
+ yval = bar.get_height()
400
+ if yval > 0: # Only add label if value is not zero
401
+ ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * ax.get_ylim()[1]),
402
+ str(int(yval)), ha='center', va='bottom', fontsize=8)
403
+
404
  plt.tight_layout()
405
  return fig
406
  except Exception as e:
 
426
  try:
427
  df_copy = df.copy()
428
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
 
429
  df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce')
430
  df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column)
431
 
432
  if df_copy.empty:
433
  return create_placeholder_plot(title=title, message="No valid data after cleaning.")
434
 
 
435
  engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
436
+ engagement_over_time = engagement_over_time.dropna()
437
 
438
  if engagement_over_time.empty:
439
  return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.")
 
443
  ax.set_title(title)
444
  ax.set_xlabel('Date')
445
  ax.set_ylabel('Engagement Rate')
446
+ # Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
447
+ max_rate_val = engagement_over_time.max()
448
+ formatter_xmax = 1.0 if max_rate_val <= 1.5 else 100.0 # Heuristic: if max is small, assume 0-1 scale
449
+ if max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
450
+ formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting
451
+ ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
452
  ax.grid(True, linestyle='--', alpha=0.7)
453
  plt.xticks(rotation=45)
454
  plt.tight_layout()
 
459
  finally:
460
  plt.close('all')
461
 
462
+ def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'):
463
  """Generates a plot for reach (clicks) over time."""
464
  title = "Reach Over Time (Clicks)"
465
  logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
 
478
  df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
479
  df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
480
 
481
+ if df_copy.empty: # After dropping NaNs for essential columns
482
+ return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.")
483
 
484
  reach_over_time = df_copy.resample('D')[reach_col].sum()
485
+ # No need to check if reach_over_time is empty if df_copy wasn't, sum of NaNs is 0.
486
+ # Plot will show 0 if all sums are 0.
 
 
 
 
487
 
488
  fig, ax = plt.subplots(figsize=(10, 5))
489
  ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen')
 
519
  df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce')
520
  df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
521
 
522
+ if df_copy.empty: # After dropping NaNs for essential columns
523
+ return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
524
 
525
  impressions_over_time = df_copy.resample('D')[impressions_col].sum()
526
 
 
542
 
543
  if __name__ == '__main__':
544
  # Create dummy data for testing
 
545
  posts_data = {
546
  'id': [f'post{i}' for i in range(1, 7)],
547
  'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
548
  'likeCount': [10, 5, 12, 8, 15, 3, 20],
549
  'commentCount': [2, 1, 3, 1, 4, 0, 5],
550
+ 'shareCount': [1, 0, 1, 1, 2, 0, 1], # Assuming this is the correct column name from your data
551
  'clickCount': [20, 15, 30, 22, 40, 10, 50],
552
  'impressionCount': [200, 150, 300, 220, 400, 100, 500],
553
+ 'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08]
554
  }
555
  sample_merged_posts_df = pd.DataFrame(posts_data)
556
 
557
+ # Updated Follower Stats Data
558
  follower_data = {
559
+ 'follower_count_type': [
560
+ 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
561
+ 'follower_geo', 'follower_geo', 'follower_geo',
562
+ 'follower_function', 'follower_function',
563
+ 'follower_industry', 'follower_industry',
564
+ 'follower_seniority', 'follower_seniority'
565
+ ],
566
+ # 'category_name' now holds dates for time-series, and actual categories for demographics
567
+ 'category_name': [
568
+ '2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
569
+ 'USA', 'Canada', 'UK', # Geo
570
+ 'Engineering', 'Sales', # Function/Role
571
+ 'Tech', 'Finance', # Industry
572
+ 'Senior', 'Junior' # Seniority
573
+ ],
574
+ 'follower_count_organic': [
575
+ 100, 110, 125, # Organic monthly gains
576
+ 500, 300, 150, # Organic Geo counts
577
+ 400, 200, # Organic Role counts
578
+ 250, 180, # Organic Industry counts
579
+ 300, 220 # Organic Seniority counts
580
+ ],
581
+ 'follower_count_paid': [
582
+ 20, 30, 25, # Paid monthly gains
583
+ 50, 40, 60, # Paid Geo counts
584
+ 30, 20, # Paid Role counts
585
+ 45, 35, # Paid Industry counts
586
+ 60, 40 # Paid Seniority counts
587
+ ]
588
  }
589
  sample_follower_stats_df = pd.DataFrame(follower_data)
 
 
 
 
 
 
 
 
590
 
591
+ logging.info("--- Testing Updated Follower Plot Generations ---")
592
+
593
+ fig_followers_count = generate_followers_count_over_time_plot(
594
+ sample_follower_stats_df.copy(),
595
+ type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
596
+ )
597
+ if fig_followers_count: logging.info("Followers Count Over Time (monthly, organic/paid) plot generated.")
598
+
599
+ fig_followers_rate = generate_followers_growth_rate_plot(
600
+ sample_follower_stats_df.copy(),
601
+ type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
602
+ )
603
+ if fig_followers_rate: logging.info("Followers Growth Rate (monthly, organic/paid) plot generated.")
604
+
605
+ fig_geo = generate_followers_by_demographics_plot(
606
+ sample_follower_stats_df.copy(),
607
+ type_value='follower_geo', # category_col defaults to 'category_name'
608
+ plot_title="Followers by Location (Organic/Paid)"
609
+ )
610
+ if fig_geo: logging.info("Followers by Location (grouped organic/paid) plot generated.")
611
 
612
+ fig_role = generate_followers_by_demographics_plot(
613
+ sample_follower_stats_df.copy(),
614
+ type_value='follower_function',
615
+ plot_title="Followers by Role (Organic/Paid)"
616
+ )
617
+ if fig_role: logging.info("Followers by Role (grouped organic/paid) plot generated.")
618
+
619
+ fig_industry = generate_followers_by_demographics_plot(
620
+ sample_follower_stats_df.copy(),
621
+ type_value='follower_industry',
622
+ plot_title="Followers by Industry (Organic/Paid)"
623
+ )
624
+ if fig_industry: logging.info("Followers by Industry (grouped organic/paid) plot generated.")
625
+
626
+ fig_seniority = generate_followers_by_demographics_plot(
627
+ sample_follower_stats_df.copy(),
628
+ type_value='follower_seniority',
629
+ plot_title="Followers by Seniority (Organic/Paid)"
630
+ )
631
+ if fig_seniority: logging.info("Followers by Seniority (grouped organic/paid) plot generated.")
632
+
633
+ logging.info("--- Testing Other Plot Generations (No Changes to these) ---")
634
+ fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
635
+ if fig_posts_activity: logging.info("Posts activity plot generated.")
636
+
637
+ fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
638
+ if fig_engagement_type: logging.info("Engagement type plot generated.")
639
 
640
+ # Dummy mentions for testing
641
+ mentions_data = {
642
+ 'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
643
+ 'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
644
+ }
645
+ sample_mentions_df = pd.DataFrame(mentions_data)
646
+ fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
647
+ if fig_mentions_activity: logging.info("Mentions activity plot generated.")
648
+ fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
649
+ if fig_mention_sentiment: logging.info("Mention sentiment plot generated.")
650
 
651
  fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
652
  if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
 
656
 
657
  fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
658
  if fig_impressions: logging.info("Impressions Over Time plot generated.")
 
 
 
 
 
 
 
659
 
660
  logging.info("Test script finished. Review plots if displayed locally or saved.")