Spaces:
Running
Running
import pandas as pd | |
import matplotlib.pyplot as plt | |
import logging | |
from io import BytesIO | |
import base64 | |
import numpy as np | |
import matplotlib.ticker as mticker | |
import matplotlib.patches as patches # Added for rounded corners | |
import ast # For safely evaluating string representations of lists | |
from data_processing.analytics_data_processing import ( | |
generate_chatbot_data_summaries, | |
prepare_filtered_analytics_data | |
) | |
# Configure logging for this module | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s') | |
def _apply_theme_aware_styling(fig, ax): | |
""" | |
Helper to apply theme-aware styling to a Matplotlib plot. | |
It reads colors from rcParams, which Gradio should set based on the current theme. | |
This makes text, backgrounds, and grids adapt to light/dark mode. | |
""" | |
# Get theme-aware colors from Matplotlib's runtime configuration | |
THEME_TEXT_COLOR = plt.rcParams.get('text.color', 'black') | |
THEME_GRID_COLOR = plt.rcParams.get('grid.color', 'lightgray') | |
THEME_AXES_FACE_COLOR = plt.rcParams.get('axes.facecolor', 'whitesmoke') | |
THEME_AXES_EDGE_COLOR = plt.rcParams.get('axes.edgecolor', 'lightgray') | |
# Make the original figure and axes backgrounds transparent to draw our own. | |
fig.patch.set_alpha(0.0) | |
ax.patch.set_alpha(0.0) | |
# Turn off original spines to draw a new rounded background shape. | |
ax.spines['top'].set_visible(False) | |
ax.spines['right'].set_visible(False) | |
ax.spines['bottom'].set_visible(False) | |
ax.spines['left'].set_visible(False) | |
# Add a new rounded background for the axes area using theme colors. | |
rounded_rect_bg = patches.FancyBboxPatch( | |
(0, 0), 1, 1, | |
boxstyle="round,pad=0,rounding_size=0.015", | |
transform=ax.transAxes, | |
facecolor=THEME_AXES_FACE_COLOR, | |
edgecolor=THEME_AXES_EDGE_COLOR, | |
linewidth=0.5, | |
zorder=-1 | |
) | |
ax.add_patch(rounded_rect_bg) | |
# Apply the theme's text color to all major text elements. | |
ax.xaxis.label.set_color(THEME_TEXT_COLOR) | |
ax.yaxis.label.set_color(THEME_TEXT_COLOR) | |
ax.title.set_color(THEME_TEXT_COLOR) | |
# Apply the theme's text color to the tick labels and tick marks. | |
ax.tick_params(axis='x', colors=THEME_TEXT_COLOR) | |
ax.tick_params(axis='y', colors=THEME_TEXT_COLOR) | |
# Set grid color and ensure it's drawn behind data | |
ax.grid(True, linestyle='--', alpha=0.6, zorder=0, color=THEME_GRID_COLOR) | |
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."): | |
"""Creates a theme-aware placeholder Matplotlib plot.""" | |
try: | |
fig, ax = plt.subplots(figsize=(8, 4)) | |
_apply_theme_aware_styling(fig, ax) | |
# Use the theme's text color for the message | |
THEME_TEXT_COLOR = plt.rcParams.get('text.color', 'black') | |
ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True, zorder=1, color=THEME_TEXT_COLOR) | |
ax.axis('off') | |
fig.subplots_adjust(top=0.90, bottom=0.10, left=0.10, right=0.90) | |
return fig | |
except Exception as e: | |
logging.error(f"Error creating placeholder plot: {e}") | |
fig_err, ax_err = plt.subplots(figsize=(8,4)) | |
fig_err.patch.set_alpha(0.0) | |
ax_err.patch.set_alpha(0.0) | |
ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center', zorder=1, color='red') | |
ax_err.axis('off') | |
return fig_err | |
def generate_posts_activity_plot(df, date_column='published_at'): | |
"""Generates a theme-aware plot for posts activity over time.""" | |
if df is None or df.empty or date_column not in df.columns: | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No data available.") | |
fig = None | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column]) | |
if df_copy.empty: | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.") | |
posts_over_time = df_copy.set_index(date_column).resample('D').size() | |
if posts_over_time.empty: | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
_apply_theme_aware_styling(fig, ax) | |
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', zorder=1) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Number of Posts') | |
plt.xticks(rotation=45) | |
fig.tight_layout(pad=0.5) | |
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating posts activity plot: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title="Posts Activity Error", message=str(e)) | |
def generate_mentions_activity_plot(df, date_column='date'): | |
"""Generates a theme-aware plot for mentions activity over time.""" | |
if df is None or df.empty or date_column not in df.columns: | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available.") | |
fig = None | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column]) | |
if df_copy.empty: | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.") | |
mentions_over_time = df_copy.set_index(date_column).resample('D').size() | |
if mentions_over_time.empty: | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
_apply_theme_aware_styling(fig, ax) | |
mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple', zorder=1) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Number of Mentions') | |
plt.xticks(rotation=45) | |
fig.tight_layout(pad=0.5) | |
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating mentions activity plot: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title="Mentions Activity Error", message=str(e)) | |
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'): | |
"""Generates a theme-aware pie chart for mention sentiment distribution.""" | |
if df is None or df.empty or sentiment_column not in df.columns: | |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available.") | |
fig = None | |
try: | |
sentiment_counts = df[sentiment_column].value_counts() | |
if sentiment_counts.empty: | |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.") | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
_apply_theme_aware_styling(fig, ax) | |
THEME_TEXT_COLOR = plt.rcParams.get('text.color', 'black') | |
pie_slice_colors = plt.cm.get_cmap('Pastel2', len(sentiment_counts)) | |
wedges, texts, autotexts = ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, | |
colors=[pie_slice_colors(i) for i in range(len(sentiment_counts))]) | |
# Set text colors to be theme-aware | |
for text_item in texts + autotexts: | |
text_item.set_color(THEME_TEXT_COLOR) | |
text_item.set_zorder(2) | |
for wedge in wedges: | |
wedge.set_zorder(1) | |
ax.axis('equal') | |
fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title="Mention Sentiment Error", message=str(e)) | |
def generate_followers_count_over_time_plot(df, **kwargs): | |
"""Generates a theme-aware plot for followers count over time.""" | |
type_value = kwargs.get('type_value', 'follower_gains_monthly') | |
title = f"Followers Count Over Time ({type_value})" | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No follower data available.") | |
fig = None | |
try: | |
df_filtered = df[df['follower_count_type'] == type_value].copy() | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") | |
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered['category_name'], errors='coerce') | |
df_filtered['follower_count_organic'] = pd.to_numeric(df_filtered['follower_count_organic'], errors='coerce').fillna(0) | |
df_filtered['follower_count_paid'] = pd.to_numeric(df_filtered['follower_count_paid'], errors='coerce').fillna(0) | |
df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj') | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message="No valid data after cleaning.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
_apply_theme_aware_styling(fig, ax) | |
ax.plot(df_filtered['datetime_obj'], df_filtered['follower_count_organic'], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers', zorder=1) | |
ax.plot(df_filtered['datetime_obj'], df_filtered['follower_count_paid'], marker='x', linestyle='--', color='seagreen', label='Paid Followers', zorder=1) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Follower Count') | |
legend = ax.legend() | |
if legend: | |
for text in legend.get_texts(): | |
text.set_color(plt.rcParams.get('text.color', 'black')) | |
legend.set_zorder(2) | |
plt.xticks(rotation=45) | |
fig.tight_layout(pad=0.5) | |
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def generate_followers_growth_rate_plot(df, **kwargs): | |
"""Generates a theme-aware plot for follower growth rate.""" | |
type_value = kwargs.get('type_value', 'follower_gains_monthly') | |
title = f"Follower Growth Rate ({type_value})" | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No follower data available.") | |
fig = None | |
try: | |
df_filtered = df[df['follower_count_type'] == type_value].copy() | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") | |
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered['category_name'], errors='coerce') | |
df_filtered['follower_count_organic'] = pd.to_numeric(df_filtered['follower_count_organic'], errors='coerce') | |
df_filtered['follower_count_paid'] = pd.to_numeric(df_filtered['follower_count_paid'], errors='coerce') | |
df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj') | |
if len(df_filtered) < 2: | |
return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.") | |
df_filtered['organic_growth_rate'] = df_filtered['follower_count_organic'].pct_change() * 100 | |
df_filtered['paid_growth_rate'] = df_filtered['follower_count_paid'].pct_change() * 100 | |
df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True) | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
_apply_theme_aware_styling(fig, ax) | |
plotted = False | |
if not df_filtered['organic_growth_rate'].dropna().empty: | |
ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate', zorder=1) | |
plotted = True | |
if not df_filtered['paid_growth_rate'].dropna().empty: | |
ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate', zorder=1) | |
plotted = True | |
if not plotted: | |
return create_placeholder_plot(title=title, message="No growth rate data to display.") | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Growth Rate (%)') | |
ax.yaxis.set_major_formatter(mticker.PercentFormatter()) | |
legend = ax.legend() | |
if legend: | |
for text in legend.get_texts(): | |
text.set_color(plt.rcParams.get('text.color', 'black')) | |
legend.set_zorder(2) | |
plt.xticks(rotation=45) | |
fig.tight_layout(pad=0.5) | |
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def generate_followers_by_demographics_plot(df, **kwargs): | |
"""Generates a theme-aware bar plot for followers by demographics.""" | |
plot_title = kwargs.get('plot_title', "Followers by Demographics") | |
type_value = kwargs.get('type_value') | |
category_col = 'category_name' | |
if df is None or df.empty or not type_value: | |
return create_placeholder_plot(title=plot_title, message="No data or demographic type not specified.") | |
fig = None | |
try: | |
df_filtered = df[df['follower_count_type'] == type_value].copy() | |
if df_filtered.empty: | |
return create_placeholder_plot(title=plot_title, message=f"No data for type '{type_value}'.") | |
df_filtered['follower_count_organic'] = pd.to_numeric(df_filtered['follower_count_organic'], errors='coerce').fillna(0) | |
df_filtered['follower_count_paid'] = pd.to_numeric(df_filtered['follower_count_paid'], errors='coerce').fillna(0) | |
demographics_data = df_filtered.groupby(category_col)[['follower_count_organic', 'follower_count_paid']].sum() | |
demographics_data['total_for_sort'] = demographics_data.sum(axis=1) | |
demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).head(10).drop(columns=['total_for_sort']) | |
if demographics_data.empty: | |
return create_placeholder_plot(title=plot_title, message="No demographic data to display.") | |
fig, ax = plt.subplots(figsize=(12, 7)) | |
_apply_theme_aware_styling(fig, ax) | |
demographics_data.plot(kind='bar', ax=ax, zorder=1, width=0.8, color=['dodgerblue', 'seagreen']) | |
ax.set_xlabel(category_col.replace('_', ' ').title()) | |
ax.set_ylabel('Number of Followers') | |
legend = ax.legend(['Organic', 'Paid']) | |
if legend: | |
for text in legend.get_texts(): | |
text.set_color(plt.rcParams.get('text.color', 'black')) | |
legend.set_zorder(2) | |
plt.xticks(rotation=45, ha="right") | |
fig.tight_layout(pad=0.5) | |
fig.subplots_adjust(top=0.92, bottom=0.25, left=0.1, right=0.95) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {plot_title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{plot_title} Error", message=str(e)) | |
def generate_generic_time_series_plot(df, date_column, value_column, title, ylabel, color='blue'): | |
"""Generic function to create a theme-aware time series plot.""" | |
if df is None or df.empty or date_column not in df.columns or value_column not in df.columns: | |
return create_placeholder_plot(title=title, message="No data available.") | |
fig = None | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[value_column] = pd.to_numeric(df_copy[value_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, value_column]).set_index(date_column) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid data.") | |
data_over_time = df_copy.resample('D')[value_column].sum() | |
if data_over_time.empty: | |
return create_placeholder_plot(title=title, message="No data in the selected period.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
_apply_theme_aware_styling(fig, ax) | |
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color=color, zorder=1) | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel(ylabel) | |
plt.xticks(rotation=45) | |
fig.tight_layout(pad=0.5) | |
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'): | |
"""Generates a theme-aware plot for engagement rate with special y-axis formatting.""" | |
title = "Engagement Rate Over Time" | |
if df is None or df.empty or date_column not in df.columns or engagement_rate_col not in df.columns: | |
return create_placeholder_plot(title=title, message="No data available.") | |
fig = None | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid data.") | |
engagement_over_time = df_copy.set_index(date_column).resample('D')[engagement_rate_col].mean().dropna() | |
if engagement_over_time.empty: | |
return create_placeholder_plot(title=title, message="No data to display.") | |
fig, ax = plt.subplots(figsize=(10,5)) | |
_apply_theme_aware_styling(fig,ax) | |
ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange', zorder=1) | |
# Determine the correct formatter based on the data's scale | |
max_rate = engagement_over_time.max() | |
formatter_xmax = 1.0 if max_rate <= 1.5 else 100.0 | |
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax)) | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Engagement Rate') | |
plt.xticks(rotation=45) | |
fig.tight_layout(pad=0.5) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def generate_reach_over_time_plot(df, **kwargs): | |
return generate_generic_time_series_plot(df, 'published_at', 'clickCount', 'Reach Over Time (Clicks)', 'Total Clicks', color='mediumseagreen') | |
def generate_impressions_over_time_plot(df, **kwargs): | |
return generate_generic_time_series_plot(df, 'published_at', 'impressionCount', 'Impressions Over Time', 'Total Impressions', color='slateblue') | |
def generate_likes_over_time_plot(df, **kwargs): | |
return generate_generic_time_series_plot(df, 'published_at', 'likeCount', 'Reactions (Likes) Over Time', 'Total Likes', color='crimson') | |
def generate_clicks_over_time_plot(df, **kwargs): | |
return generate_generic_time_series_plot(df, 'published_at', 'clickCount', 'Clicks Over Time', 'Total Clicks', color='mediumseagreen') | |
def generate_shares_over_time_plot(df, **kwargs): | |
return generate_generic_time_series_plot(df, 'published_at', 'shareCount', 'Shares Over Time', 'Total Shares', color='teal') | |
def generate_comments_over_time_plot(df, **kwargs): | |
return generate_generic_time_series_plot(df, 'published_at', 'commentCount', 'Comments Over Time', 'Total Comments', color='gold') | |
def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', **kwargs): | |
"""Generates a theme-aware pie chart for comment sentiment.""" | |
title = "Breakdown of Comments by Sentiment" | |
if df is None or df.empty or sentiment_column not in df.columns: | |
return create_placeholder_plot(title=title, message="No data available.") | |
fig = None | |
try: | |
sentiment_counts = df[sentiment_column].value_counts().dropna() | |
if sentiment_counts.empty: | |
return create_placeholder_plot(title=title, message="No sentiment data available.") | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
_apply_theme_aware_styling(fig, ax) | |
THEME_TEXT_COLOR = plt.rcParams.get('text.color', 'black') | |
pie_slice_colors = plt.cm.get_cmap('coolwarm', len(sentiment_counts)) | |
wedges, texts, autotexts = ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=[pie_slice_colors(i) for i in range(len(sentiment_counts))]) | |
for text_item in texts + autotexts: | |
text_item.set_color(THEME_TEXT_COLOR) | |
ax.set_title(title) | |
ax.axis('equal') | |
fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def generate_post_frequency_plot(df, date_column='published_at', **kwargs): | |
"""Generates a theme-aware plot for post frequency, using .size() for counting.""" | |
title = "Post Frequency Over Time" | |
if df is None or df.empty or date_column not in df.columns: | |
return create_placeholder_plot(title=title, message="No data available.") | |
fig = None | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column]).set_index(date_column) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid data.") | |
data_over_time = df_copy.resample('D').size() # Use size() to count posts | |
if data_over_time.empty: | |
return create_placeholder_plot(title=title, message="No data in the selected period.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
_apply_theme_aware_styling(fig, ax) | |
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', zorder=1) | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Number of Posts') | |
plt.xticks(rotation=45) | |
fig.tight_layout(pad=0.5) | |
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def generate_content_format_breakdown_plot(df, format_col='media_type', **kwargs): | |
"""Generates a theme-aware bar chart for content format breakdown.""" | |
title = "Breakdown of Content by Format" | |
if df is None or df.empty or format_col not in df.columns: | |
return create_placeholder_plot(title=title, message="No data available.") | |
fig = None | |
try: | |
format_counts = df[format_col].value_counts().dropna() | |
if format_counts.empty: | |
return create_placeholder_plot(title=title, message="No format data.") | |
fig, ax = plt.subplots(figsize=(8,6)) | |
_apply_theme_aware_styling(fig,ax) | |
format_counts.plot(kind='bar', ax=ax, zorder=1, color=plt.cm.get_cmap('viridis')(np.linspace(0, 1, len(format_counts)))) | |
ax.set_title(title) | |
ax.set_xlabel('Media Type') | |
ax.set_ylabel('Number of Posts') | |
plt.xticks(rotation=45, ha="right") | |
# Add text labels with theme color | |
TEXT_COLOR = plt.rcParams.get('text.color', 'black') | |
for i, v in enumerate(format_counts): | |
ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom', zorder=2, color=TEXT_COLOR) | |
fig.tight_layout(pad=0.5) | |
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.15, right=0.95) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def _parse_eb_label(label_data): | |
if isinstance(label_data, list): return label_data | |
if isinstance(label_data, str): | |
try: | |
parsed = ast.literal_eval(label_data) | |
return parsed if isinstance(parsed, list) else [str(parsed)] | |
except (ValueError, SyntaxError): | |
return [label_data.strip()] if label_data.strip() else [] | |
return [] if pd.isna(label_data) else [str(label_data)] | |
def generate_content_topic_breakdown_plot(df, topics_col='li_eb_labels', **kwargs): | |
"""Generates a theme-aware horizontal bar chart for content topics.""" | |
title = "Breakdown of Content by Topics (Top 15)" | |
if df is None or df.empty or topics_col not in df.columns: | |
return create_placeholder_plot(title=title, message="No data available.") | |
fig = None | |
try: | |
topic_counts = df[topics_col].apply(_parse_eb_label).explode().dropna().value_counts() | |
topic_counts = topic_counts[topic_counts.index != ''] | |
if topic_counts.empty: | |
return create_placeholder_plot(title=title, message="No topic data found.") | |
top_topics = topic_counts.nlargest(15).sort_values(ascending=True) | |
fig, ax = plt.subplots(figsize=(10, 8)) | |
_apply_theme_aware_styling(fig,ax) | |
top_topics.plot(kind='barh', ax=ax, zorder=1, color=plt.cm.get_cmap('YlGnBu')(np.linspace(0.3, 1, len(top_topics)))) | |
ax.set_title(title) | |
ax.set_xlabel('Number of Posts') | |
ax.set_ylabel('Topic') | |
# Add text labels with theme color | |
TEXT_COLOR = plt.rcParams.get('text.color', 'black') | |
for i, (topic, count) in enumerate(top_topics.items()): | |
ax.text(count + (0.01 * top_topics.max()), i, f' {count}', va='center', ha='left', zorder=2, color=TEXT_COLOR) | |
fig.tight_layout(pad=0.5) | |
fig.subplots_adjust(top=0.92, bottom=0.1, left=0.3, right=0.95) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def update_analytics_plots_figures(token_state_value, date_filter_option, custom_start_date, custom_end_date, current_plot_configs): | |
""" | |
Main function to generate all analytics plots based on provided data and configurations. | |
Uses a dictionary-based approach for cleaner execution. | |
""" | |
logging.info(f"Updating analytics plot figures for theme-aware plotting. Filter: {date_filter_option}") | |
num_expected_plots = len(current_plot_configs) | |
plot_data_summaries_for_chatbot = {} | |
if not token_state_value or not token_state_value.get("token"): | |
message = "❌ Accesso negato. Nessun token. Impossibile generare le analisi." | |
logging.warning(message) | |
placeholder_figs = [create_placeholder_plot(title="Accesso Negato") for _ in range(num_expected_plots)] | |
summaries = {p_cfg["id"]: "Accesso negato, nessun dato per il chatbot." for p_cfg in current_plot_configs} | |
return [message] + placeholder_figs + [summaries] | |
try: | |
(filtered_merged_posts_df, filtered_mentions_df, date_filtered_follower_stats_df, | |
raw_follower_stats_df, start_dt_for_msg, end_dt_for_msg) = \ | |
prepare_filtered_analytics_data(token_state_value, date_filter_option, custom_start_date, custom_end_date) | |
plot_data_summaries_for_chatbot = generate_chatbot_data_summaries( | |
current_plot_configs, filtered_merged_posts_df, filtered_mentions_df, | |
date_filtered_follower_stats_df, raw_follower_stats_df, token_state_value | |
) | |
except Exception as e: | |
error_msg = f"❌ Errore durante la preparazione dei dati per le analisi: {e}" | |
logging.error(error_msg, exc_info=True) | |
placeholder_figs = [create_placeholder_plot(title="Errore Preparazione Dati", message=str(e)) for _ in range(num_expected_plots)] | |
summaries = {p_cfg["id"]: f"Errore preparazione dati: {e}" for p_cfg in current_plot_configs} | |
return [error_msg] + placeholder_figs + [summaries] | |
# Map plot IDs to their respective generation functions | |
plot_functions = { | |
"followers_count": lambda: generate_followers_count_over_time_plot(date_filtered_follower_stats_df, type_value='follower_gains_monthly'), | |
"followers_growth_rate": lambda: generate_followers_growth_rate_plot(date_filtered_follower_stats_df, type_value='follower_gains_monthly'), | |
"followers_by_location": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_geo', plot_title="Follower per Località"), | |
"followers_by_role": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_function', plot_title="Follower per Ruolo"), | |
"followers_by_industry": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_industry', plot_title="Follower per Settore"), | |
"followers_by_seniority": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_seniority', plot_title="Follower per Anzianità"), | |
"engagement_rate": lambda: generate_engagement_rate_over_time_plot(filtered_merged_posts_df), | |
"reach_over_time": lambda: generate_reach_over_time_plot(filtered_merged_posts_df), | |
"impressions_over_time": lambda: generate_impressions_over_time_plot(filtered_merged_posts_df), | |
"likes_over_time": lambda: generate_likes_over_time_plot(filtered_merged_posts_df), | |
"clicks_over_time": lambda: generate_clicks_over_time_plot(filtered_merged_posts_df), | |
"shares_over_time": lambda: generate_shares_over_time_plot(filtered_merged_posts_df), | |
"comments_over_time": lambda: generate_comments_over_time_plot(filtered_merged_posts_df), | |
"comments_sentiment": lambda: generate_comments_sentiment_breakdown_plot(filtered_merged_posts_df), | |
"post_frequency_cs": lambda: generate_post_frequency_plot(filtered_merged_posts_df), | |
"content_format_breakdown_cs": lambda: generate_content_format_breakdown_plot(filtered_merged_posts_df, format_col=token_state_value.get("config_media_type_col", "media_type")), | |
"content_topic_breakdown_cs": lambda: generate_content_topic_breakdown_plot(filtered_merged_posts_df, topics_col=token_state_value.get("config_eb_labels_col", "li_eb_labels")), | |
"mention_analysis_volume": lambda: generate_mentions_activity_plot(filtered_mentions_df, date_column=token_state_value.get("config_date_col_mentions", "date")), | |
"mention_analysis_sentiment": lambda: generate_mention_sentiment_plot(filtered_mentions_df) | |
} | |
plot_figs = [] | |
for config in current_plot_configs: | |
plot_id = config["id"] | |
if plot_id in plot_functions: | |
try: | |
fig = plot_functions[plot_id]() | |
plot_figs.append(fig) | |
except Exception as e: | |
logging.error(f"Failed to generate plot for '{plot_id}': {e}", exc_info=True) | |
plot_figs.append(create_placeholder_plot(title=f"Error: {config.get('label', plot_id)}", message=str(e))) | |
else: | |
logging.warning(f"No plot function found for ID: '{plot_id}'") | |
plot_figs.append(create_placeholder_plot(title=f"Plot Not Implemented: {config.get('label', plot_id)}")) | |
message = f"📊 Analisi aggiornate per il periodo: {date_filter_option}" | |
if date_filter_option == "Intervallo Personalizzato": | |
s_display = start_dt_for_msg.strftime('%Y-%m-%d') if start_dt_for_msg else "N/A" | |
e_display = end_dt_for_msg.strftime('%Y-%m-%d') if end_dt_for_msg else "N/A" | |
message += f" (Da: {s_display} A: {e_display})" | |
return [message] + plot_figs + [plot_data_summaries_for_chatbot] | |