Spaces:
Running
Running
import pandas as pd | |
import matplotlib.pyplot as plt | |
import logging | |
from io import BytesIO | |
import base64 | |
import numpy as np | |
import matplotlib.ticker as mticker | |
import matplotlib.patches as patches | |
import ast | |
from data_processing.analytics_data_processing import ( | |
generate_chatbot_data_summaries, | |
prepare_filtered_analytics_data | |
) | |
# Configure logging for this module | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s') | |
def _apply_theme_aware_styling(fig, ax, is_pie=False): | |
""" | |
Applies a modern, theme-aware style to a Matplotlib plot. | |
It reads colors from rcParams, which Gradio sets based on the theme. | |
""" | |
try: | |
# Use a modern, clean style as a base | |
plt.style.use('seaborn-v0_8-whitegrid') | |
# Get theme-aware colors from Matplotlib's runtime configuration | |
TEXT_COLOR = plt.rcParams.get('text.color', '#E5E7EB') # Default to light gray for dark themes | |
GRID_COLOR = plt.rcParams.get('grid.color', '#4B5563') # Default to a darker grid | |
FACE_COLOR = plt.rcParams.get('axes.facecolor', '#1F2937') # Default to dark gray | |
EDGE_COLOR = plt.rcParams.get('axes.edgecolor', '#374151') # Default to a slightly lighter gray | |
FIG_FACE_COLOR = plt.rcParams.get('figure.facecolor', '#111827') # Default to very dark gray | |
fig.set_facecolor(FIG_FACE_COLOR) | |
ax.set_facecolor(FACE_COLOR) | |
# Apply the theme's text color to all major text elements. | |
ax.title.set_color(TEXT_COLOR) | |
ax.xaxis.label.set_color(TEXT_COLOR) | |
ax.yaxis.label.set_color(TEXT_COLOR) | |
# Apply the theme's text color to the tick labels and tick marks. | |
ax.tick_params(axis='x', colors=TEXT_COLOR) | |
ax.tick_params(axis='y', colors=TEXT_COLOR) | |
# Remove spines for a cleaner look | |
if not is_pie: | |
ax.spines['top'].set_visible(False) | |
ax.spines['right'].set_visible(False) | |
ax.spines['bottom'].set_color(EDGE_COLOR) | |
ax.spines['left'].set_color(EDGE_COLOR) | |
else: | |
ax.spines['top'].set_visible(False) | |
ax.spines['right'].set_visible(False) | |
ax.spines['bottom'].set_visible(False) | |
ax.spines['left'].set_visible(False) | |
# Set grid color and ensure it's drawn behind data | |
ax.grid(True, linestyle='--', alpha=0.6, zorder=0, color=GRID_COLOR) | |
except Exception as e: | |
logging.error(f"Error applying theme styling: {e}") | |
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."): | |
"""Creates a theme-aware placeholder Matplotlib plot.""" | |
try: | |
fig, ax = plt.subplots(figsize=(8, 4)) | |
_apply_theme_aware_styling(fig, ax) | |
TEXT_COLOR = plt.rcParams.get('text.color', '#E5E7EB') | |
ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=12, wrap=True, zorder=1, color=TEXT_COLOR, alpha=0.7) | |
ax.axis('off') | |
fig.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error creating placeholder plot: {e}") | |
fig_err, ax_err = plt.subplots(figsize=(8,4)) | |
fig_err.patch.set_facecolor('#111827') | |
ax_err.set_facecolor('#1F2937') | |
ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center', zorder=1, color='red') | |
ax_err.axis('off') | |
return fig_err | |
# --- Generic and Reusable Plotting Functions --- | |
def generate_generic_time_series_plot(df, date_column, value_column, title, ylabel, color='cyan'): | |
"""Generic function to create a theme-aware time series plot.""" | |
if df is None or df.empty or date_column not in df.columns or value_column not in df.columns: | |
logging.info(f"len df {len(df) if df else 0}, dat col {date_column}, value_column {value_column} ") | |
return create_placeholder_plot(title=title, message="No data available.") | |
fig = None | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[value_column] = pd.to_numeric(df_copy[value_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, value_column]).set_index(date_column) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid data.") | |
data_over_time = df_copy.resample('D')[value_column].sum() | |
if data_over_time.empty: | |
return create_placeholder_plot(title=title, message="No data in the selected period.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
_apply_theme_aware_styling(fig, ax) | |
ax.plot(data_over_time.index, data_over_time.values, marker='o', linestyle='-', color=color, zorder=1, markersize=5, alpha=0.8) | |
ax.fill_between(data_over_time.index, data_over_time.values, color=color, alpha=0.1, zorder=1) | |
ax.set_title(title, fontsize=14, weight='bold') | |
ax.set_xlabel('Date', fontsize=10) | |
ax.set_ylabel(ylabel, fontsize=10) | |
plt.xticks(rotation=30, ha="right") | |
fig.tight_layout(pad=1.5) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def generate_generic_bar_plot(data_series, title, xlabel, ylabel, color_map='viridis'): | |
"""Generic function to create a theme-aware bar plot.""" | |
if data_series is None or data_series.empty: | |
return create_placeholder_plot(title=title, message="No data to display.") | |
fig = None | |
try: | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
_apply_theme_aware_styling(fig, ax) | |
colors = plt.cm.get_cmap(color_map)(np.linspace(0.4, 0.9, len(data_series))) | |
data_series.plot(kind='bar', ax=ax, zorder=2, color=colors, width=0.8) | |
ax.set_title(title, fontsize=14, weight='bold') | |
ax.set_xlabel(xlabel, fontsize=10) | |
ax.set_ylabel(ylabel, fontsize=10) | |
plt.xticks(rotation=45, ha="right") | |
TEXT_COLOR = plt.rcParams.get('text.color', '#E5E7EB') | |
for i, v in enumerate(data_series): | |
ax.text(i, v + (0.01 * data_series.max()), str(int(v)), ha='center', va='bottom', zorder=3, color=TEXT_COLOR, fontsize=9) | |
fig.tight_layout(pad=1.5) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def generate_generic_pie_chart(data_series, title, color_map='Pastel2'): | |
"""Generic function to create a theme-aware pie chart.""" | |
if data_series is None or data_series.empty: | |
return create_placeholder_plot(title=title, message="No data available.") | |
fig = None | |
try: | |
fig, ax = plt.subplots(figsize=(8, 6)) | |
_apply_theme_aware_styling(fig, ax, is_pie=True) | |
THEME_TEXT_COLOR = plt.rcParams.get('text.color', '#E5E7EB') | |
pie_slice_colors = plt.cm.get_cmap(color_map, len(data_series)) | |
colors = [pie_slice_colors(i) for i in range(len(data_series))] | |
wedges, texts, autotexts = ax.pie( | |
data_series, | |
autopct='%1.1f%%', | |
startangle=140, | |
colors=colors, | |
pctdistance=0.85, | |
wedgeprops=dict(width=0.4, edgecolor=plt.rcParams.get('figure.facecolor', '#111827'), linewidth=2) | |
) | |
for text_item in texts + autotexts: | |
text_item.set_color(THEME_TEXT_COLOR) | |
text_item.set_fontsize(10) | |
text_item.set_zorder(2) | |
for autotext in autotexts: | |
autotext.set_weight('bold') | |
ax.set_title(title, fontsize=14, weight='bold', pad=20) | |
ax.legend(wedges, data_series.index, title="Categories", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1), | |
labelcolor=THEME_TEXT_COLOR, | |
frameon=False) | |
fig.tight_layout(pad=1.5) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
# --- Specific Plot Implementations --- | |
def generate_followers_count_over_time_plot(df, **kwargs): | |
type_value = kwargs.get('type_value', 'follower_gains_monthly') | |
title = f"Followers Count Over Time" | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No follower data available.") | |
fig = None | |
try: | |
df_filtered = df[df['follower_count_type'] == type_value].copy() | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") | |
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered['category_name'], errors='coerce') | |
df_filtered['follower_count_organic'] = pd.to_numeric(df_filtered['follower_count_organic'], errors='coerce').fillna(0) | |
df_filtered['follower_count_paid'] = pd.to_numeric(df_filtered['follower_count_paid'], errors='coerce').fillna(0) | |
df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj') | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message="No valid data after cleaning.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
_apply_theme_aware_styling(fig, ax) | |
ax.plot(df_filtered['datetime_obj'], df_filtered['follower_count_organic'], marker='o', linestyle='-', color='#22D3EE', label='Organic Followers', zorder=1) | |
ax.plot(df_filtered['datetime_obj'], df_filtered['follower_count_paid'], marker='x', linestyle='--', color='#A78BFA', label='Paid Followers', zorder=1) | |
ax.set_title(title, fontsize=14, weight='bold') | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Follower Count') | |
legend = ax.legend() | |
for text in legend.get_texts(): | |
text.set_color(plt.rcParams.get('text.color', 'black')) | |
legend.set_zorder(2) | |
legend.get_frame().set_alpha(0.5) | |
legend.get_frame().set_facecolor('#1F2937') | |
plt.xticks(rotation=30, ha="right") | |
fig.tight_layout(pad=1.5) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def generate_followers_by_demographics_plot(df, **kwargs): | |
plot_title = kwargs.get('plot_title', "Followers by Demographics") | |
type_value = kwargs.get('type_value') | |
if df is None or df.empty or not type_value: | |
return create_placeholder_plot(title=plot_title, message="No data or demographic type not specified.") | |
fig = None | |
try: | |
df_filtered = df[df['follower_count_type'] == type_value].copy() | |
if df_filtered.empty: | |
return create_placeholder_plot(title=plot_title, message=f"No data for type '{type_value}'.") | |
df_filtered['follower_count_organic'] = pd.to_numeric(df_filtered['follower_count_organic'], errors='coerce').fillna(0) | |
demographics_data = df_filtered.groupby('category_name')['follower_count_organic'].sum() | |
demographics_data = demographics_data.sort_values(ascending=False).head(10) | |
if demographics_data.empty: | |
return create_placeholder_plot(title=plot_title, message="No demographic data to display.") | |
return generate_generic_bar_plot(demographics_data, plot_title, 'Category', 'Number of Followers', 'plasma') | |
except Exception as e: | |
logging.error(f"Error in {plot_title}: {e}", exc_info=True) | |
if fig: plt.close(fig) | |
return create_placeholder_plot(title=f"{plot_title} Error", message=str(e)) | |
def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'): | |
title = "Engagement Rate Over Time" | |
# This plot is a specific time series, so we use the generic function | |
return generate_generic_time_series_plot(df, date_column, engagement_rate_col, title, 'Engagement Rate (%)', color='#F472B6') | |
def generate_content_format_breakdown_plot(df, format_col='media_type', **kwargs): | |
title = "Content by Format" | |
if df is None or df.empty or format_col not in df.columns: | |
return create_placeholder_plot(title=title, message="No data available.") | |
format_counts = df[format_col].value_counts().dropna() | |
return generate_generic_pie_chart(format_counts, title, 'viridis') | |
def _parse_eb_label(label_data): | |
if isinstance(label_data, list): return label_data | |
if isinstance(label_data, str): | |
try: | |
parsed = ast.literal_eval(label_data) | |
return parsed if isinstance(parsed, list) else [str(parsed)] | |
except (ValueError, SyntaxError): | |
return [label_data.strip()] if label_data.strip() else [] | |
return [] if pd.isna(label_data) else [str(label_data)] | |
def generate_content_topic_breakdown_plot(df, topics_col='li_eb_labels', **kwargs): | |
title = "Content by Topics" | |
if df is None or df.empty or topics_col not in df.columns: | |
return create_placeholder_plot(title=title, message="No data available.") | |
try: | |
topic_counts = df[topics_col].apply(_parse_eb_label).explode().dropna().value_counts() | |
topic_counts = topic_counts[topic_counts.index != ''].nlargest(15).sort_values(ascending=True) | |
if topic_counts.empty: | |
return create_placeholder_plot(title=title, message="No topic data found.") | |
fig, ax = plt.subplots(figsize=(10, 8)) | |
_apply_theme_aware_styling(fig, ax) | |
colors = plt.cm.get_cmap('YlGnBu')(np.linspace(0.3, 1, len(topic_counts))) | |
topic_counts.plot(kind='barh', ax=ax, zorder=2, color=colors) | |
ax.set_title(title, fontsize=14, weight='bold') | |
ax.set_xlabel('Number of Posts') | |
ax.set_ylabel('Topic') | |
TEXT_COLOR = plt.rcParams.get('text.color', '#E5E7EB') | |
for i, (topic, count) in enumerate(topic_counts.items()): | |
ax.text(count + (0.01 * topic_counts.max()), i, f' {count}', va='center', ha='left', zorder=3, color=TEXT_COLOR, fontsize=9) | |
fig.tight_layout(pad=1.5) | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
def update_analytics_plots_figures(token_state_value, date_filter_option, custom_start_date, custom_end_date, current_plot_configs): | |
logging.info(f"Updating analytics plot figures with new styling. Filter: {date_filter_option}") | |
num_expected_plots = len(current_plot_configs) | |
# ... (rest of your data loading logic is fine) | |
if not token_state_value or not token_state_value.get("token"): | |
message = "❌ Accesso negato. Nessun token. Impossibile generare le analisi." | |
logging.warning(message) | |
placeholder_figs = [create_placeholder_plot(title="Accesso Negato") for _ in range(num_expected_plots)] | |
summaries = {p_cfg["id"]: "Accesso negato, nessun dato per il chatbot." for p_cfg in current_plot_configs} | |
return [message] + placeholder_figs + [summaries] | |
try: | |
(filtered_merged_posts_df, filtered_mentions_df, date_filtered_follower_stats_df, | |
raw_follower_stats_df, start_dt_for_msg, end_dt_for_msg) = \ | |
prepare_filtered_analytics_data(token_state_value, date_filter_option, custom_start_date, custom_end_date) | |
plot_data_summaries_for_chatbot = generate_chatbot_data_summaries( | |
current_plot_configs, filtered_merged_posts_df, filtered_mentions_df, | |
date_filtered_follower_stats_df, raw_follower_stats_df, token_state_value | |
) | |
except Exception as e: | |
error_msg = f"❌ Errore durante la preparazione dei dati per le analisi: {e}" | |
logging.error(error_msg, exc_info=True) | |
placeholder_figs = [create_placeholder_plot(title="Errore Preparazione Dati", message=str(e)) for _ in range(num_expected_plots)] | |
summaries = {p_cfg["id"]: f"Errore preparazione dati: {e}" for p_cfg in current_plot_configs} | |
return [error_msg] + placeholder_figs + [summaries] | |
# Map plot IDs to their respective generation functions | |
plot_functions = { | |
# Dinamiche dei Follower | |
"followers_count": lambda: generate_followers_count_over_time_plot(date_filtered_follower_stats_df, type_value='follower_gains_monthly'), | |
"followers_growth_rate": lambda: generate_generic_time_series_plot(date_filtered_follower_stats_df, 'category_name', 'follower_count_organic', 'Follower Growth Rate', 'Growth Rate (%)', color='#A78BFA'), # Simplified for now | |
"followers_by_location": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_geo', plot_title="Follower per Località"), | |
"followers_by_role": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_function', plot_title="Follower per Ruolo"), | |
"followers_by_industry": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_industry', plot_title="Follower per Settore"), | |
"followers_by_seniority": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_seniority', plot_title="Follower per Anzianità"), | |
# Approfondimenti Performance Post | |
"engagement_rate": lambda: generate_engagement_rate_over_time_plot(filtered_merged_posts_df), | |
"reach_over_time": lambda: generate_generic_time_series_plot(filtered_merged_posts_df, 'published_at', 'clickCount', 'Reach Over Time (Clicks)', 'Total Clicks', color='#6EE7B7'), | |
"impressions_over_time": lambda: generate_generic_time_series_plot(filtered_merged_posts_df, 'published_at', 'impressionCount', 'Impressions Over Time', 'Total Impressions', color='#38BDF8'), | |
"likes_over_time": lambda: generate_generic_time_series_plot(filtered_merged_posts_df, 'published_at', 'likeCount', 'Reactions (Likes) Over Time', 'Total Likes', color='#FB7185'), | |
# Engagement Dettagliato Post nel Tempo | |
"clicks_over_time": lambda: generate_generic_time_series_plot(filtered_merged_posts_df, 'published_at', 'clickCount', 'Clicks Over Time', 'Total Clicks', color='#6EE7B7'), | |
"shares_over_time": lambda: generate_generic_time_series_plot(filtered_merged_posts_df, 'published_at', 'shareCount', 'Shares Over Time', 'Total Shares', color='#34D399'), | |
"comments_over_time": lambda: generate_generic_time_series_plot(filtered_merged_posts_df, 'published_at', 'commentCount', 'Comments Over Time', 'Total Comments', color='#FACC15'), | |
"comments_sentiment": lambda: generate_generic_pie_chart(filtered_merged_posts_df['sentiment'].value_counts().dropna(), "Breakdown of Comments by Sentiment", 'coolwarm'), | |
# Analisi Strategia Contenuti | |
"post_frequency_cs": lambda: generate_generic_time_series_plot(filtered_merged_posts_df.resample('D', on='published_at').size().reset_index(name='count'), 'published_at', 'count', 'Post Frequency', 'Number of Posts', color='#C084FC'), | |
"content_format_breakdown_cs": lambda: generate_content_format_breakdown_plot(filtered_merged_posts_df, format_col=token_state_value.get("config_media_type_col", "media_type")), | |
"content_topic_breakdown_cs": lambda: generate_content_topic_breakdown_plot(filtered_merged_posts_df, topics_col=token_state_value.get("config_eb_labels_col", "li_eb_labels")), | |
# Analisi Menzioni (Dettaglio) | |
"mention_analysis_volume": lambda: generate_generic_time_series_plot( | |
filtered_mentions_df.resample('D', on=token_state_value.get("config_date_col_mentions", "date")).size().reset_index(name='count'), | |
token_state_value.get("config_date_col_mentions", "date"), | |
'count', | |
'Mentions Volume', | |
'Number of Mentions', | |
color='#818CF8' | |
), | |
"mention_analysis_sentiment": lambda: generate_generic_pie_chart(filtered_mentions_df['sentiment_label'].value_counts().dropna(), "Mention Sentiment Breakdown") | |
} | |
logging.info(f"colonne posts df {filtered_merged_posts_df.columns}") | |
logging.info(f"colonne mentions df {filtered_mentions_df.columns}") | |
plot_figs = [] | |
for config in current_plot_configs: | |
plot_id = config["id"] | |
if plot_id in plot_functions: | |
try: | |
fig = plot_functions[plot_id]() | |
plot_figs.append(fig) | |
except Exception as e: | |
logging.error(f"Failed to generate plot for '{plot_id}': {e}", exc_info=True) | |
plot_figs.append(create_placeholder_plot(title=f"Error: {config.get('label', plot_id)}", message=str(e))) | |
else: | |
logging.warning(f"No plot function found for ID: '{plot_id}'") | |
plot_figs.append(create_placeholder_plot(title=f"Plot Not Implemented: {config.get('label', plot_id)}")) | |
message = f"📊 Analisi aggiornate per il periodo: {date_filter_option}" | |
if date_filter_option == "Intervallo Personalizzato": | |
s_display = start_dt_for_msg.strftime('%Y-%m-%d') if start_dt_for_msg else "N/A" | |
e_display = end_dt_for_msg.strftime('%Y-%m-%d') if end_dt_for_msg else "N/A" | |
message += f" (Da: {s_display} A: {e_display})" | |
return [message] + plot_figs + [plot_data_summaries_for_chatbot] | |