Spaces:
Running
Running
Update ui/analytics_plot_generator.py
Browse files- ui/analytics_plot_generator.py +340 -670
ui/analytics_plot_generator.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
#analytics_plot_generator.py
|
2 |
import pandas as pd
|
3 |
import matplotlib.pyplot as plt
|
4 |
import logging
|
@@ -16,145 +15,133 @@ from data_processing.analytics_data_processing import (
|
|
16 |
# Configure logging for this module
|
17 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
"""
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
ax.spines['top'].set_visible(False)
|
30 |
ax.spines['right'].set_visible(False)
|
31 |
ax.spines['bottom'].set_visible(False)
|
32 |
ax.spines['left'].set_visible(False)
|
33 |
|
34 |
-
# Add a new rounded background for the axes
|
35 |
-
# Using FancyBboxPatch to create a rounded rectangle background for the plot area
|
36 |
-
# Coordinates are relative to axes (0,0 is bottom-left, 1,1 is top-right)
|
37 |
rounded_rect_bg = patches.FancyBboxPatch(
|
38 |
-
(0, 0),
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
linewidth=0.5, # Border line width
|
46 |
-
zorder=-1 # Put it behind other plot elements like gridlines and data
|
47 |
)
|
48 |
ax.add_patch(rounded_rect_bg)
|
49 |
-
|
50 |
-
#
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."):
|
55 |
-
"""Creates a placeholder Matplotlib plot
|
56 |
try:
|
57 |
fig, ax = plt.subplots(figsize=(8, 4))
|
58 |
-
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
64 |
return fig
|
65 |
except Exception as e:
|
66 |
logging.error(f"Error creating placeholder plot: {e}")
|
67 |
-
# Fallback placeholder if the above fails (less styling)
|
68 |
fig_err, ax_err = plt.subplots(figsize=(8,4))
|
69 |
fig_err.patch.set_alpha(0.0)
|
70 |
ax_err.patch.set_alpha(0.0)
|
71 |
-
ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center', zorder=1)
|
72 |
ax_err.axis('off')
|
73 |
return fig_err
|
74 |
|
75 |
def generate_posts_activity_plot(df, date_column='published_at'):
|
76 |
-
"""Generates a plot for posts activity over time."""
|
77 |
-
|
78 |
-
|
79 |
-
logging.warning(f"Posts activity: DataFrame is empty.")
|
80 |
-
return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.")
|
81 |
-
if date_column not in df.columns:
|
82 |
-
logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
|
83 |
-
return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
|
84 |
|
85 |
-
fig = None
|
86 |
try:
|
87 |
df_copy = df.copy()
|
88 |
-
|
89 |
-
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
90 |
-
|
91 |
df_copy = df_copy.dropna(subset=[date_column])
|
92 |
if df_copy.empty:
|
93 |
-
logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.")
|
94 |
return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.")
|
95 |
|
96 |
posts_over_time = df_copy.set_index(date_column).resample('D').size()
|
97 |
-
|
98 |
if posts_over_time.empty:
|
99 |
-
logging.info("Posts activity: No posts after resampling by day.")
|
100 |
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
|
101 |
|
102 |
fig, ax = plt.subplots(figsize=(10, 5))
|
103 |
-
|
104 |
|
105 |
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', zorder=1)
|
106 |
ax.set_xlabel('Date')
|
107 |
ax.set_ylabel('Number of Posts')
|
108 |
-
ax.grid(True, linestyle='--', alpha=0.6, zorder=0) # Ensure grid is behind plot line
|
109 |
plt.xticks(rotation=45)
|
110 |
-
fig.tight_layout(pad=0.5)
|
111 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
112 |
-
logging.info("Successfully generated posts activity plot.")
|
113 |
return fig
|
114 |
except Exception as e:
|
115 |
logging.error(f"Error generating posts activity plot: {e}", exc_info=True)
|
116 |
-
if fig: plt.close(fig)
|
117 |
return create_placeholder_plot(title="Posts Activity Error", message=str(e))
|
118 |
|
119 |
|
120 |
def generate_mentions_activity_plot(df, date_column='date'):
|
121 |
-
"""Generates a plot for mentions activity over time."""
|
122 |
-
|
123 |
-
|
124 |
-
logging.warning(f"Mentions activity: DataFrame is empty.")
|
125 |
-
return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.")
|
126 |
-
if date_column not in df.columns:
|
127 |
-
logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
|
128 |
-
return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
|
129 |
|
130 |
fig = None
|
131 |
try:
|
132 |
df_copy = df.copy()
|
133 |
-
|
134 |
-
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
135 |
-
|
136 |
df_copy = df_copy.dropna(subset=[date_column])
|
137 |
if df_copy.empty:
|
138 |
-
logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.")
|
139 |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.")
|
140 |
|
141 |
mentions_over_time = df_copy.set_index(date_column).resample('D').size()
|
142 |
-
|
143 |
if mentions_over_time.empty:
|
144 |
-
logging.info("Mentions activity: No mentions after resampling by day.")
|
145 |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.")
|
146 |
|
147 |
fig, ax = plt.subplots(figsize=(10, 5))
|
148 |
-
|
149 |
|
150 |
mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple', zorder=1)
|
151 |
ax.set_xlabel('Date')
|
152 |
ax.set_ylabel('Number of Mentions')
|
153 |
-
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
154 |
plt.xticks(rotation=45)
|
155 |
fig.tight_layout(pad=0.5)
|
156 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
157 |
-
logging.info("Successfully generated mentions activity plot.")
|
158 |
return fig
|
159 |
except Exception as e:
|
160 |
logging.error(f"Error generating mentions activity plot: {e}", exc_info=True)
|
@@ -162,524 +149,303 @@ def generate_mentions_activity_plot(df, date_column='date'):
|
|
162 |
return create_placeholder_plot(title="Mentions Activity Error", message=str(e))
|
163 |
|
164 |
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
165 |
-
"""Generates a pie chart for mention sentiment distribution."""
|
166 |
-
|
167 |
-
|
168 |
-
if df is None or df.empty:
|
169 |
-
logging.warning("Mention sentiment: DataFrame is empty.")
|
170 |
-
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.")
|
171 |
-
if sentiment_column not in df.columns:
|
172 |
-
msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}"
|
173 |
-
logging.warning(msg)
|
174 |
-
return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg)
|
175 |
|
176 |
fig = None
|
177 |
try:
|
178 |
-
|
179 |
-
sentiment_counts = df_copy[sentiment_column].value_counts()
|
180 |
if sentiment_counts.empty:
|
181 |
-
logging.info("Mention sentiment: No sentiment data after value_counts.")
|
182 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
183 |
|
184 |
fig, ax = plt.subplots(figsize=(8, 5))
|
185 |
-
|
186 |
|
187 |
-
|
188 |
pie_slice_colors = plt.cm.get_cmap('Pastel2', len(sentiment_counts))
|
189 |
|
190 |
-
|
191 |
-
wedges, texts, autotexts = ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90,
|
192 |
colors=[pie_slice_colors(i) for i in range(len(sentiment_counts))])
|
193 |
-
|
194 |
-
|
195 |
-
wedge.set_zorder(1)
|
196 |
for text_item in texts + autotexts:
|
|
|
197 |
text_item.set_zorder(2)
|
|
|
|
|
198 |
|
199 |
-
ax.axis('equal')
|
200 |
-
|
201 |
-
fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95) # Give pie chart space
|
202 |
-
logging.info("Successfully generated mention sentiment plot.")
|
203 |
return fig
|
204 |
except Exception as e:
|
205 |
logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True)
|
206 |
if fig: plt.close(fig)
|
207 |
return create_placeholder_plot(title="Mention Sentiment Error", message=str(e))
|
208 |
|
209 |
-
def generate_followers_count_over_time_plot(df,
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
type_value='follower_gains_monthly'):
|
214 |
-
title = f"Followers Count Over Time ({type_value})"
|
215 |
-
logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
216 |
-
|
217 |
if df is None or df.empty:
|
218 |
return create_placeholder_plot(title=title, message="No follower data available.")
|
219 |
|
220 |
-
required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column]
|
221 |
-
missing_cols = [col for col in required_cols if col not in df.columns]
|
222 |
-
if missing_cols:
|
223 |
-
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
224 |
-
|
225 |
fig = None
|
226 |
try:
|
227 |
-
|
228 |
-
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
|
229 |
-
|
230 |
if df_filtered.empty:
|
231 |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
232 |
-
|
233 |
-
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[
|
234 |
-
df_filtered[
|
235 |
-
df_filtered[
|
236 |
-
df_filtered = df_filtered.dropna(subset=['datetime_obj'
|
237 |
-
|
238 |
if df_filtered.empty:
|
239 |
-
return create_placeholder_plot(title=title, message="No valid data after cleaning
|
240 |
|
241 |
fig, ax = plt.subplots(figsize=(10, 5))
|
242 |
-
|
243 |
|
244 |
-
ax.plot(df_filtered['datetime_obj'], df_filtered[
|
245 |
-
ax.plot(df_filtered['datetime_obj'], df_filtered[
|
246 |
ax.set_xlabel('Date')
|
247 |
ax.set_ylabel('Follower Count')
|
248 |
-
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
|
|
251 |
plt.xticks(rotation=45)
|
252 |
fig.tight_layout(pad=0.5)
|
253 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
254 |
return fig
|
255 |
except Exception as e:
|
256 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
257 |
if fig: plt.close(fig)
|
258 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
259 |
|
260 |
-
def generate_followers_growth_rate_plot(df,
|
261 |
-
|
262 |
-
|
263 |
-
type_filter_column='follower_count_type',
|
264 |
-
type_value='follower_gains_monthly'):
|
265 |
title = f"Follower Growth Rate ({type_value})"
|
266 |
-
logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
267 |
-
|
268 |
if df is None or df.empty:
|
269 |
return create_placeholder_plot(title=title, message="No follower data available.")
|
270 |
|
271 |
-
required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column]
|
272 |
-
missing_cols = [col for col in required_cols if col not in df.columns]
|
273 |
-
if missing_cols:
|
274 |
-
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
275 |
-
|
276 |
fig = None
|
277 |
try:
|
278 |
-
|
279 |
-
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
|
280 |
-
|
281 |
if df_filtered.empty:
|
282 |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
283 |
-
|
284 |
-
df_filtered['
|
285 |
-
df_filtered[
|
286 |
-
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce')
|
287 |
df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj')
|
288 |
|
289 |
-
if
|
290 |
return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
|
291 |
-
|
292 |
-
df_filtered['organic_growth_rate'] = df_filtered[
|
293 |
-
df_filtered['paid_growth_rate'] = df_filtered[
|
294 |
df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
|
295 |
|
296 |
fig, ax = plt.subplots(figsize=(10, 5))
|
297 |
-
|
298 |
|
299 |
-
|
300 |
-
if
|
301 |
ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate', zorder=1)
|
302 |
-
|
303 |
-
|
304 |
-
if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty:
|
305 |
ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate', zorder=1)
|
306 |
-
|
307 |
-
|
308 |
-
if not
|
309 |
-
return create_placeholder_plot(title=title, message="No
|
310 |
|
311 |
ax.set_xlabel('Date')
|
312 |
ax.set_ylabel('Growth Rate (%)')
|
313 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter())
|
314 |
-
|
315 |
-
|
316 |
-
|
|
|
|
|
|
|
|
|
317 |
plt.xticks(rotation=45)
|
318 |
fig.tight_layout(pad=0.5)
|
319 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
320 |
return fig
|
321 |
except Exception as e:
|
322 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
323 |
if fig: plt.close(fig)
|
324 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
325 |
|
326 |
-
def generate_followers_by_demographics_plot(df,
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
332 |
-
|
333 |
-
if df is None or df.empty:
|
334 |
-
return create_placeholder_plot(title=plot_title, message="No follower data available.")
|
335 |
-
|
336 |
-
required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column]
|
337 |
-
missing_cols = [col for col in required_cols if col not in df.columns]
|
338 |
-
if missing_cols:
|
339 |
-
return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
340 |
|
341 |
-
if
|
342 |
-
return create_placeholder_plot(title=plot_title, message="
|
343 |
|
344 |
fig = None
|
345 |
try:
|
346 |
-
|
347 |
-
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
|
348 |
-
|
349 |
if df_filtered.empty:
|
350 |
-
return create_placeholder_plot(title=plot_title, message=f"No data for
|
351 |
-
|
352 |
-
df_filtered[
|
353 |
-
df_filtered[
|
354 |
-
demographics_data = df_filtered.groupby(category_col)[[
|
355 |
-
demographics_data['total_for_sort'] = demographics_data
|
356 |
-
demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort'])
|
357 |
|
358 |
if demographics_data.empty:
|
359 |
-
return create_placeholder_plot(title=plot_title, message="No demographic data to display
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
demographics_data = demographics_data.head(top_n)
|
364 |
-
|
365 |
-
fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) )
|
366 |
-
_apply_rounded_corners_and_transparent_bg(fig, ax)
|
367 |
-
|
368 |
-
bar_width = 0.35
|
369 |
-
index = np.arange(len(demographics_data.index))
|
370 |
|
371 |
-
|
372 |
-
color_paid = plt.cm.get_cmap('tab10')(1)
|
373 |
-
|
374 |
-
bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color=color_organic, zorder=1)
|
375 |
-
bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color=color_paid, zorder=1)
|
376 |
-
|
377 |
ax.set_xlabel(category_col.replace('_', ' ').title())
|
378 |
ax.set_ylabel('Number of Followers')
|
379 |
-
|
380 |
-
ax.
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
yval = bar_item.get_height()
|
388 |
-
if yval > 0:
|
389 |
-
ax.text(bar_item.get_x() + bar_item.get_width()/2.0, yval + (0.01 * ax.get_ylim()[1]),
|
390 |
-
str(int(yval)), ha='center', va='bottom', fontsize=8, zorder=2)
|
391 |
-
|
392 |
fig.tight_layout(pad=0.5)
|
393 |
-
fig.subplots_adjust(top=0.92, bottom=0.25, left=0.1, right=0.95)
|
394 |
return fig
|
395 |
except Exception as e:
|
396 |
logging.error(f"Error generating {plot_title}: {e}", exc_info=True)
|
397 |
if fig: plt.close(fig)
|
398 |
return create_placeholder_plot(title=f"{plot_title} Error", message=str(e))
|
399 |
|
400 |
-
def
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
return create_placeholder_plot(title=title, message="No post data for engagement rate.")
|
406 |
-
|
407 |
-
required_cols = [date_column, engagement_rate_col]
|
408 |
-
missing_cols = [col for col in required_cols if col not in df.columns]
|
409 |
-
if missing_cols:
|
410 |
-
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
411 |
-
|
412 |
fig = None
|
413 |
try:
|
414 |
df_copy = df.copy()
|
415 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
416 |
-
df_copy[
|
417 |
-
df_copy = df_copy.dropna(subset=[date_column,
|
418 |
-
|
419 |
if df_copy.empty:
|
420 |
-
return create_placeholder_plot(title=title, message="No valid data
|
421 |
-
|
422 |
-
engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
|
423 |
-
engagement_over_time = engagement_over_time.dropna()
|
424 |
|
425 |
-
|
426 |
-
|
|
|
427 |
|
428 |
fig, ax = plt.subplots(figsize=(10, 5))
|
429 |
-
|
430 |
-
|
431 |
-
ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange', zorder=1)
|
432 |
-
ax.set_xlabel('Date')
|
433 |
-
ax.set_ylabel('Engagement Rate')
|
434 |
-
max_rate_val = engagement_over_time.max() if not engagement_over_time.empty else 0
|
435 |
-
formatter_xmax = 1.0 if 0 <= max_rate_val <= 1.5 else 100.0
|
436 |
-
if max_rate_val > 1.5 and formatter_xmax == 1.0:
|
437 |
-
formatter_xmax = 100.0
|
438 |
-
elif max_rate_val > 100 and formatter_xmax == 1.0:
|
439 |
-
formatter_xmax = max_rate_val
|
440 |
-
|
441 |
-
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
|
442 |
-
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
443 |
-
plt.xticks(rotation=45)
|
444 |
-
fig.tight_layout(pad=0.5)
|
445 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
446 |
-
return fig
|
447 |
-
except Exception as e:
|
448 |
-
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
449 |
-
if fig: plt.close(fig)
|
450 |
-
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
451 |
-
|
452 |
-
def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'):
|
453 |
-
title = "Reach Over Time (Clicks)"
|
454 |
-
logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
455 |
-
|
456 |
-
if df is None or df.empty:
|
457 |
-
return create_placeholder_plot(title=title, message="No post data for reach.")
|
458 |
|
459 |
-
|
460 |
-
|
461 |
-
if missing_cols:
|
462 |
-
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
463 |
-
|
464 |
-
fig = None
|
465 |
-
try:
|
466 |
-
df_copy = df.copy()
|
467 |
-
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
468 |
-
df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
|
469 |
-
df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
|
470 |
-
|
471 |
-
if df_copy.empty:
|
472 |
-
return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.")
|
473 |
-
|
474 |
-
reach_over_time = df_copy.resample('D')[reach_col].sum()
|
475 |
-
|
476 |
-
fig, ax = plt.subplots(figsize=(10, 5))
|
477 |
-
_apply_rounded_corners_and_transparent_bg(fig, ax)
|
478 |
-
|
479 |
-
ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen', zorder=1)
|
480 |
ax.set_xlabel('Date')
|
481 |
-
ax.set_ylabel(
|
482 |
-
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
483 |
plt.xticks(rotation=45)
|
484 |
fig.tight_layout(pad=0.5)
|
485 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
486 |
return fig
|
487 |
except Exception as e:
|
488 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
489 |
if fig: plt.close(fig)
|
490 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
491 |
|
492 |
-
def
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
return create_placeholder_plot(title=title, message="No post data for impressions.")
|
498 |
-
|
499 |
-
required_cols = [date_column, impressions_col]
|
500 |
-
missing_cols = [col for col in required_cols if col not in df.columns]
|
501 |
-
if missing_cols:
|
502 |
-
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
503 |
-
|
504 |
fig = None
|
505 |
try:
|
506 |
df_copy = df.copy()
|
507 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
508 |
-
df_copy[
|
509 |
-
df_copy = df_copy.dropna(subset=[date_column,
|
510 |
|
511 |
if df_copy.empty:
|
512 |
-
return create_placeholder_plot(title=title, message="No valid data
|
513 |
|
514 |
-
|
515 |
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue', zorder=1)
|
520 |
-
ax.set_xlabel('Date')
|
521 |
-
ax.set_ylabel('Total Impressions')
|
522 |
-
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
523 |
-
plt.xticks(rotation=45)
|
524 |
-
fig.tight_layout(pad=0.5)
|
525 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
526 |
-
return fig
|
527 |
-
except Exception as e:
|
528 |
-
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
529 |
-
if fig: plt.close(fig)
|
530 |
-
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
531 |
-
|
532 |
-
def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
|
533 |
-
title = "Reactions (Likes) Over Time"
|
534 |
-
logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
535 |
-
if df is None or df.empty:
|
536 |
-
return create_placeholder_plot(title=title, message="No post data for likes.")
|
537 |
-
required_cols = [date_column, likes_col]
|
538 |
-
if any(col not in df.columns for col in required_cols):
|
539 |
-
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
540 |
-
|
541 |
-
fig = None
|
542 |
-
try:
|
543 |
-
df_copy = df.copy()
|
544 |
-
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
545 |
-
df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce')
|
546 |
-
df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
|
547 |
-
if df_copy.empty:
|
548 |
-
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
549 |
|
550 |
-
|
551 |
-
fig,
|
552 |
-
_apply_rounded_corners_and_transparent_bg(fig, ax)
|
553 |
|
554 |
-
ax.plot(
|
555 |
-
ax.set_xlabel('Date')
|
556 |
-
ax.set_ylabel('Total Likes')
|
557 |
-
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
558 |
-
plt.xticks(rotation=45)
|
559 |
-
fig.tight_layout(pad=0.5)
|
560 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
561 |
-
return fig
|
562 |
-
except Exception as e:
|
563 |
-
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
564 |
-
if fig: plt.close(fig)
|
565 |
-
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
566 |
-
|
567 |
-
def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
|
568 |
-
# This function reuses generate_reach_over_time_plot logic
|
569 |
-
return generate_reach_over_time_plot(df, date_column, clicks_col)
|
570 |
-
|
571 |
-
def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
|
572 |
-
title = "Shares Over Time"
|
573 |
-
logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
574 |
-
if df is None or df.empty:
|
575 |
-
return create_placeholder_plot(title=title, message="No post data for shares.")
|
576 |
-
required_cols = [date_column, shares_col]
|
577 |
-
if any(col not in df.columns for col in required_cols):
|
578 |
-
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
579 |
-
|
580 |
-
fig = None
|
581 |
-
try:
|
582 |
-
df_copy = df.copy()
|
583 |
-
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
584 |
-
df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce')
|
585 |
-
df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column)
|
586 |
-
if df_copy.empty:
|
587 |
-
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
588 |
-
|
589 |
-
data_over_time = df_copy.resample('D')[shares_col].sum()
|
590 |
-
fig, ax = plt.subplots(figsize=(10, 5))
|
591 |
-
_apply_rounded_corners_and_transparent_bg(fig, ax)
|
592 |
|
593 |
-
|
|
|
|
|
|
|
|
|
|
|
594 |
ax.set_xlabel('Date')
|
595 |
-
ax.set_ylabel('
|
596 |
-
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
597 |
plt.xticks(rotation=45)
|
598 |
fig.tight_layout(pad=0.5)
|
599 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
600 |
return fig
|
601 |
except Exception as e:
|
602 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
603 |
if fig: plt.close(fig)
|
604 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
605 |
|
606 |
-
def
|
607 |
-
|
608 |
-
logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
609 |
-
if df is None or df.empty:
|
610 |
-
return create_placeholder_plot(title=title, message="No post data for comments.")
|
611 |
-
required_cols = [date_column, comments_col]
|
612 |
-
if any(col not in df.columns for col in required_cols):
|
613 |
-
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
614 |
|
615 |
-
|
616 |
-
|
617 |
-
df_copy = df.copy()
|
618 |
-
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
619 |
-
df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce')
|
620 |
-
df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column)
|
621 |
-
if df_copy.empty:
|
622 |
-
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
623 |
-
|
624 |
-
data_over_time = df_copy.resample('D')[comments_col].sum()
|
625 |
-
fig, ax = plt.subplots(figsize=(10, 5))
|
626 |
-
_apply_rounded_corners_and_transparent_bg(fig, ax)
|
627 |
-
|
628 |
-
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold', zorder=1)
|
629 |
-
ax.set_xlabel('Date')
|
630 |
-
ax.set_ylabel('Total Comments')
|
631 |
-
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
632 |
-
plt.xticks(rotation=45)
|
633 |
-
fig.tight_layout(pad=0.5)
|
634 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
635 |
-
return fig
|
636 |
-
except Exception as e:
|
637 |
-
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
638 |
-
if fig: plt.close(fig)
|
639 |
-
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
640 |
|
641 |
-
def
|
642 |
-
|
643 |
-
logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
|
644 |
|
645 |
-
|
646 |
-
|
647 |
-
if sentiment_column not in df.columns:
|
648 |
-
if 'sentiment' in df.columns and sentiment_column != 'sentiment': # Check for a common alternative name
|
649 |
-
logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.")
|
650 |
-
sentiment_column = 'sentiment'
|
651 |
-
if sentiment_column not in df.columns: # If fallback also not found
|
652 |
-
return create_placeholder_plot(title=title, message=f"Fallback sentiment column 'sentiment' also not found. Available: {df.columns.tolist()}")
|
653 |
-
else: # If original and 'sentiment' fallback are not found
|
654 |
-
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' not found. Available: {df.columns.tolist()}")
|
655 |
|
|
|
|
|
656 |
|
657 |
-
|
658 |
-
|
659 |
|
|
|
|
|
|
|
|
|
|
|
|
|
660 |
fig = None
|
661 |
try:
|
662 |
-
|
663 |
-
|
664 |
-
|
665 |
-
|
666 |
-
if sentiment_counts.empty or sentiment_counts.sum() == 0:
|
667 |
-
return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
|
668 |
|
669 |
fig, ax = plt.subplots(figsize=(8, 5))
|
670 |
-
|
671 |
|
|
|
672 |
pie_slice_colors = plt.cm.get_cmap('coolwarm', len(sentiment_counts))
|
673 |
-
|
674 |
-
|
675 |
-
colors=[pie_slice_colors(i) for i in range(len(sentiment_counts))])
|
676 |
-
for wedge in wedges:
|
677 |
-
wedge.set_zorder(1)
|
678 |
for text_item in texts + autotexts:
|
679 |
-
text_item.
|
680 |
-
|
681 |
-
ax.
|
682 |
-
|
683 |
fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95)
|
684 |
return fig
|
685 |
except Exception as e:
|
@@ -687,90 +453,68 @@ def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sen
|
|
687 |
if fig: plt.close(fig)
|
688 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
689 |
|
690 |
-
def generate_post_frequency_plot(df, date_column='published_at',
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
if df is None or df.empty:
|
695 |
return create_placeholder_plot(title=title, message="No data available.")
|
696 |
-
|
697 |
-
return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.")
|
698 |
-
|
699 |
fig = None
|
700 |
try:
|
701 |
df_copy = df.copy()
|
702 |
-
|
703 |
-
|
704 |
-
|
705 |
-
df_copy = df_copy.dropna(subset=[date_column])
|
706 |
if df_copy.empty:
|
707 |
-
return create_placeholder_plot(title=title, message="No valid
|
708 |
|
709 |
-
|
710 |
-
|
711 |
-
|
712 |
-
return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.")
|
713 |
|
714 |
fig, ax = plt.subplots(figsize=(10, 5))
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
post_frequency.plot(kind='bar', ax=ax, color=[bar_colors(i) for i in range(num_bars)], zorder=1)
|
721 |
-
for i, v in enumerate(post_frequency):
|
722 |
-
ax.text(i, v + (0.01 * post_frequency.max()), str(v), ha='center', va='bottom', zorder=2)
|
723 |
-
else:
|
724 |
-
post_frequency.plot(kind='line', ax=ax, marker='o', zorder=1)
|
725 |
-
|
726 |
-
ax.set_xlabel('Date' if resample_period == 'D' else 'Period')
|
727 |
ax.set_ylabel('Number of Posts')
|
728 |
-
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
729 |
plt.xticks(rotation=45)
|
730 |
fig.tight_layout(pad=0.5)
|
731 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
732 |
-
logging.info(f"Successfully generated {title} plot.")
|
733 |
return fig
|
734 |
except Exception as e:
|
735 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
736 |
if fig: plt.close(fig)
|
737 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
738 |
|
739 |
-
def generate_content_format_breakdown_plot(df, format_col='media_type'):
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
if df is None or df.empty:
|
744 |
return create_placeholder_plot(title=title, message="No data available.")
|
745 |
-
|
746 |
-
return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}")
|
747 |
-
|
748 |
fig = None
|
749 |
try:
|
750 |
-
|
751 |
-
format_counts = df_copy[format_col].value_counts().dropna()
|
752 |
-
|
753 |
if format_counts.empty:
|
754 |
-
return create_placeholder_plot(title=title, message="No
|
755 |
|
756 |
-
fig, ax = plt.subplots(figsize=(8,
|
757 |
-
|
758 |
|
759 |
-
|
760 |
-
|
761 |
-
|
762 |
-
format_counts.plot(kind='bar', ax=ax, color=[bar_colors(i) for i in range(num_bars)], zorder=1)
|
763 |
ax.set_xlabel('Media Type')
|
764 |
ax.set_ylabel('Number of Posts')
|
765 |
-
ax.grid(axis='y', linestyle='--', alpha=0.6, zorder=0)
|
766 |
plt.xticks(rotation=45, ha="right")
|
767 |
-
|
|
|
|
|
768 |
for i, v in enumerate(format_counts):
|
769 |
-
ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom', zorder=2)
|
770 |
-
|
771 |
fig.tight_layout(pad=0.5)
|
772 |
-
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.15, right=0.95)
|
773 |
-
logging.info(f"Successfully generated {title} plot.")
|
774 |
return fig
|
775 |
except Exception as e:
|
776 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
@@ -778,199 +522,125 @@ def generate_content_format_breakdown_plot(df, format_col='media_type'):
|
|
778 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
779 |
|
780 |
def _parse_eb_label(label_data):
|
781 |
-
if isinstance(label_data, list):
|
782 |
-
return label_data
|
783 |
if isinstance(label_data, str):
|
784 |
try:
|
785 |
parsed = ast.literal_eval(label_data)
|
786 |
-
if isinstance(parsed, list)
|
787 |
-
return parsed
|
788 |
-
return [str(parsed)] # Ensure it's a list even if ast.literal_eval returns a single string
|
789 |
except (ValueError, SyntaxError):
|
790 |
-
|
791 |
-
|
792 |
-
if pd.isna(label_data):
|
793 |
-
return []
|
794 |
-
return [str(label_data)] # Fallback for other types, ensuring it's a list
|
795 |
|
796 |
-
def generate_content_topic_breakdown_plot(df, topics_col='li_eb_labels',
|
797 |
-
|
798 |
-
|
799 |
-
|
800 |
-
if df is None or df.empty:
|
801 |
return create_placeholder_plot(title=title, message="No data available.")
|
802 |
-
|
803 |
-
return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}")
|
804 |
-
|
805 |
fig = None
|
806 |
try:
|
807 |
-
|
808 |
-
|
809 |
-
parsed_labels = df_copy[topics_col].apply(_parse_eb_label)
|
810 |
-
exploded_labels = parsed_labels.explode().dropna() # Explode lists into separate rows
|
811 |
-
|
812 |
-
# Filter out any empty strings that might result from parsing
|
813 |
-
exploded_labels = exploded_labels[exploded_labels != '']
|
814 |
-
|
815 |
-
|
816 |
-
if exploded_labels.empty:
|
817 |
-
return create_placeholder_plot(title=title, message="No topic data found after processing labels.")
|
818 |
-
|
819 |
-
topic_counts = exploded_labels.value_counts()
|
820 |
-
|
821 |
if topic_counts.empty:
|
822 |
-
return create_placeholder_plot(title=title, message="No
|
823 |
-
|
824 |
-
top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True)
|
825 |
-
|
826 |
-
fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6))
|
827 |
-
_apply_rounded_corners_and_transparent_bg(fig, ax)
|
828 |
|
829 |
-
|
830 |
-
bar_colors = plt.cm.get_cmap('YlGnBu', num_bars + 3) # Using a sequential colormap for horizontal bars
|
831 |
|
832 |
-
|
|
|
|
|
|
|
|
|
833 |
ax.set_xlabel('Number of Posts')
|
834 |
ax.set_ylabel('Topic')
|
835 |
|
836 |
-
|
837 |
-
|
838 |
-
|
|
|
|
|
839 |
fig.tight_layout(pad=0.5)
|
840 |
-
fig.subplots_adjust(top=0.92, bottom=0.1, left=0.3, right=0.95)
|
841 |
-
logging.info(f"Successfully generated {title} plot.")
|
842 |
return fig
|
843 |
except Exception as e:
|
844 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
845 |
if fig: plt.close(fig)
|
846 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
847 |
|
848 |
-
|
849 |
-
# --- Analytics Tab: Plot Figure Generation Function ---
|
850 |
def update_analytics_plots_figures(token_state_value, date_filter_option, custom_start_date, custom_end_date, current_plot_configs):
|
851 |
-
|
852 |
-
|
|
|
|
|
|
|
|
|
853 |
|
854 |
-
plot_data_summaries_for_chatbot = {}
|
855 |
|
856 |
if not token_state_value or not token_state_value.get("token"):
|
857 |
message = "❌ Accesso negato. Nessun token. Impossibile generare le analisi."
|
858 |
logging.warning(message)
|
859 |
-
placeholder_figs = [create_placeholder_plot(title="Accesso Negato"
|
860 |
-
|
861 |
-
|
862 |
-
|
863 |
-
return [message] + placeholder_figs + [plot_data_summaries_for_chatbot]
|
864 |
try:
|
865 |
-
(filtered_merged_posts_df,
|
866 |
-
|
867 |
-
|
868 |
-
raw_follower_stats_df, # For demographic follower plots
|
869 |
-
start_dt_for_msg, end_dt_for_msg) = \
|
870 |
-
prepare_filtered_analytics_data(
|
871 |
-
token_state_value, date_filter_option, custom_start_date, custom_end_date
|
872 |
-
)
|
873 |
|
874 |
-
# Generate data summaries for chatbot AFTER data preparation
|
875 |
plot_data_summaries_for_chatbot = generate_chatbot_data_summaries(
|
876 |
-
current_plot_configs,
|
877 |
-
|
878 |
-
filtered_mentions_df,
|
879 |
-
date_filtered_follower_stats_df,
|
880 |
-
raw_follower_stats_df,
|
881 |
-
token_state_value
|
882 |
)
|
883 |
-
|
884 |
except Exception as e:
|
885 |
error_msg = f"❌ Errore durante la preparazione dei dati per le analisi: {e}"
|
886 |
logging.error(error_msg, exc_info=True)
|
887 |
placeholder_figs = [create_placeholder_plot(title="Errore Preparazione Dati", message=str(e)) for _ in range(num_expected_plots)]
|
888 |
-
for p_cfg in current_plot_configs
|
889 |
-
|
890 |
-
|
891 |
-
|
892 |
-
|
893 |
-
|
894 |
-
|
895 |
-
|
896 |
-
|
897 |
-
|
898 |
-
|
899 |
-
|
900 |
-
|
901 |
-
|
902 |
-
|
903 |
-
|
904 |
-
|
905 |
-
|
906 |
-
|
907 |
-
|
908 |
-
|
909 |
-
|
910 |
-
|
911 |
-
|
912 |
-
|
913 |
-
|
914 |
-
|
915 |
-
|
916 |
-
|
917 |
-
|
918 |
-
|
919 |
-
|
920 |
-
|
921 |
-
|
922 |
-
|
923 |
-
|
924 |
-
|
925 |
-
|
926 |
-
|
927 |
-
|
928 |
-
|
929 |
-
|
930 |
-
|
931 |
-
|
932 |
-
|
933 |
-
if len(plot_figs) != num_expected_plots:
|
934 |
-
logging.warning(f"Mismatch in generated plots. Expected {num_expected_plots}, got {len(plot_figs)}. This will cause UI update issues.")
|
935 |
-
while len(plot_figs) < num_expected_plots:
|
936 |
-
plot_figs.append(create_placeholder_plot(title="Grafico Non Generato", message="Logica di generazione incompleta."))
|
937 |
-
|
938 |
-
message = f"📊 Analisi aggiornate per il periodo: {date_filter_option}"
|
939 |
-
if date_filter_option == "Intervallo Personalizzato":
|
940 |
-
s_display = start_dt_for_msg.strftime('%Y-%m-%d') if start_dt_for_msg else "Qualsiasi"
|
941 |
-
e_display = end_dt_for_msg.strftime('%Y-%m-%d') if end_dt_for_msg else "Qualsiasi"
|
942 |
-
message += f" (Da: {s_display} A: {e_display})"
|
943 |
-
|
944 |
-
final_plot_figs = []
|
945 |
-
for i, p_fig_candidate in enumerate(plot_figs):
|
946 |
-
if p_fig_candidate is not None and not isinstance(p_fig_candidate, str): # Basic check for a plot object
|
947 |
-
final_plot_figs.append(p_fig_candidate)
|
948 |
-
else:
|
949 |
-
err_title = plot_titles_for_errors[i] if i < len(plot_titles_for_errors) else f"Grafico {i+1}"
|
950 |
-
logging.warning(f"Plot {err_title} (index {i}) non è una figura valida: {p_fig_candidate}. Uso placeholder.")
|
951 |
-
final_plot_figs.append(create_placeholder_plot(title=f"Errore: {err_title}", message="Impossibile generare figura."))
|
952 |
-
|
953 |
-
return [message] + final_plot_figs[:num_expected_plots] + [plot_data_summaries_for_chatbot]
|
954 |
-
|
955 |
-
except (KeyError, ValueError) as e_plot_data:
|
956 |
-
logging.error(f"Errore dati durante la generazione di un grafico specifico: {e_plot_data}", exc_info=True)
|
957 |
-
error_msg_display = f"Errore dati in un grafico: {str(e_plot_data)[:100]}"
|
958 |
-
|
959 |
-
num_already_generated = len(plot_figs)
|
960 |
-
for i in range(num_already_generated, num_expected_plots):
|
961 |
-
err_title_fill = plot_titles_for_errors[i] if i < len(plot_titles_for_errors) else f"Grafico {i+1}"
|
962 |
-
plot_figs.append(create_placeholder_plot(title=f"Errore Dati: {err_title_fill}", message=f"Precedente errore: {str(e_plot_data)[:50]}"))
|
963 |
|
964 |
-
|
965 |
-
if p_cfg["id"] not in plot_data_summaries_for_chatbot:
|
966 |
-
plot_data_summaries_for_chatbot[p_cfg["id"]] = f"Errore dati grafico: {e_plot_data}"
|
967 |
-
return [error_msg_display] + plot_figs[:num_expected_plots] + [plot_data_summaries_for_chatbot]
|
968 |
-
|
969 |
-
except Exception as e_general:
|
970 |
-
error_msg = f"❌ Errore generale durante la generazione dei grafici: {e_general}"
|
971 |
-
logging.error(error_msg, exc_info=True)
|
972 |
-
placeholder_figs_general = [create_placeholder_plot(title=plot_titles_for_errors[i] if i < len(plot_titles_for_errors) else f"Grafico {i+1}", message=str(e_general)) for i in range(num_expected_plots)]
|
973 |
-
for p_cfg in current_plot_configs: # Ensure summaries dict is populated on error
|
974 |
-
if p_cfg["id"] not in plot_data_summaries_for_chatbot:
|
975 |
-
plot_data_summaries_for_chatbot[p_cfg["id"]] = f"Errore generale grafici: {e_general}"
|
976 |
-
return [error_msg] + placeholder_figs_general + [plot_data_summaries_for_chatbot]
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import matplotlib.pyplot as plt
|
3 |
import logging
|
|
|
15 |
# Configure logging for this module
|
16 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
17 |
|
18 |
+
def _apply_theme_aware_styling(fig, ax):
|
19 |
+
"""
|
20 |
+
Helper to apply theme-aware styling to a Matplotlib plot.
|
21 |
+
It reads colors from rcParams, which Gradio should set based on the current theme.
|
22 |
+
This makes text, backgrounds, and grids adapt to light/dark mode.
|
23 |
+
"""
|
24 |
+
# Get theme-aware colors from Matplotlib's runtime configuration
|
25 |
+
THEME_TEXT_COLOR = plt.rcParams.get('text.color', 'black')
|
26 |
+
THEME_GRID_COLOR = plt.rcParams.get('grid.color', 'lightgray')
|
27 |
+
THEME_AXES_FACE_COLOR = plt.rcParams.get('axes.facecolor', 'whitesmoke')
|
28 |
+
THEME_AXES_EDGE_COLOR = plt.rcParams.get('axes.edgecolor', 'lightgray')
|
29 |
+
|
30 |
+
# Make the original figure and axes backgrounds transparent to draw our own.
|
31 |
+
fig.patch.set_alpha(0.0)
|
32 |
+
ax.patch.set_alpha(0.0)
|
33 |
+
|
34 |
+
# Turn off original spines to draw a new rounded background shape.
|
35 |
ax.spines['top'].set_visible(False)
|
36 |
ax.spines['right'].set_visible(False)
|
37 |
ax.spines['bottom'].set_visible(False)
|
38 |
ax.spines['left'].set_visible(False)
|
39 |
|
40 |
+
# Add a new rounded background for the axes area using theme colors.
|
|
|
|
|
41 |
rounded_rect_bg = patches.FancyBboxPatch(
|
42 |
+
(0, 0), 1, 1,
|
43 |
+
boxstyle="round,pad=0,rounding_size=0.015",
|
44 |
+
transform=ax.transAxes,
|
45 |
+
facecolor=THEME_AXES_FACE_COLOR,
|
46 |
+
edgecolor=THEME_AXES_EDGE_COLOR,
|
47 |
+
linewidth=0.5,
|
48 |
+
zorder=-1
|
|
|
|
|
49 |
)
|
50 |
ax.add_patch(rounded_rect_bg)
|
51 |
+
|
52 |
+
# Apply the theme's text color to all major text elements.
|
53 |
+
ax.xaxis.label.set_color(THEME_TEXT_COLOR)
|
54 |
+
ax.yaxis.label.set_color(THEME_TEXT_COLOR)
|
55 |
+
ax.title.set_color(THEME_TEXT_COLOR)
|
56 |
+
|
57 |
+
# Apply the theme's text color to the tick labels and tick marks.
|
58 |
+
ax.tick_params(axis='x', colors=THEME_TEXT_COLOR)
|
59 |
+
ax.tick_params(axis='y', colors=THEME_TEXT_COLOR)
|
60 |
+
|
61 |
+
# Set grid color and ensure it's drawn behind data
|
62 |
+
ax.grid(True, linestyle='--', alpha=0.6, zorder=0, color=THEME_GRID_COLOR)
|
63 |
+
|
64 |
|
65 |
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."):
|
66 |
+
"""Creates a theme-aware placeholder Matplotlib plot."""
|
67 |
try:
|
68 |
fig, ax = plt.subplots(figsize=(8, 4))
|
69 |
+
_apply_theme_aware_styling(fig, ax)
|
70 |
|
71 |
+
# Use the theme's text color for the message
|
72 |
+
THEME_TEXT_COLOR = plt.rcParams.get('text.color', 'black')
|
73 |
+
ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True, zorder=1, color=THEME_TEXT_COLOR)
|
74 |
+
ax.axis('off')
|
75 |
+
fig.subplots_adjust(top=0.90, bottom=0.10, left=0.10, right=0.90)
|
76 |
return fig
|
77 |
except Exception as e:
|
78 |
logging.error(f"Error creating placeholder plot: {e}")
|
|
|
79 |
fig_err, ax_err = plt.subplots(figsize=(8,4))
|
80 |
fig_err.patch.set_alpha(0.0)
|
81 |
ax_err.patch.set_alpha(0.0)
|
82 |
+
ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center', zorder=1, color='red')
|
83 |
ax_err.axis('off')
|
84 |
return fig_err
|
85 |
|
86 |
def generate_posts_activity_plot(df, date_column='published_at'):
|
87 |
+
"""Generates a theme-aware plot for posts activity over time."""
|
88 |
+
if df is None or df.empty or date_column not in df.columns:
|
89 |
+
return create_placeholder_plot(title="Posts Activity Over Time", message="No data available.")
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
+
fig = None
|
92 |
try:
|
93 |
df_copy = df.copy()
|
94 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
|
|
|
|
95 |
df_copy = df_copy.dropna(subset=[date_column])
|
96 |
if df_copy.empty:
|
|
|
97 |
return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.")
|
98 |
|
99 |
posts_over_time = df_copy.set_index(date_column).resample('D').size()
|
|
|
100 |
if posts_over_time.empty:
|
|
|
101 |
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
|
102 |
|
103 |
fig, ax = plt.subplots(figsize=(10, 5))
|
104 |
+
_apply_theme_aware_styling(fig, ax)
|
105 |
|
106 |
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', zorder=1)
|
107 |
ax.set_xlabel('Date')
|
108 |
ax.set_ylabel('Number of Posts')
|
|
|
109 |
plt.xticks(rotation=45)
|
110 |
+
fig.tight_layout(pad=0.5)
|
111 |
+
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
|
|
112 |
return fig
|
113 |
except Exception as e:
|
114 |
logging.error(f"Error generating posts activity plot: {e}", exc_info=True)
|
115 |
+
if fig: plt.close(fig)
|
116 |
return create_placeholder_plot(title="Posts Activity Error", message=str(e))
|
117 |
|
118 |
|
119 |
def generate_mentions_activity_plot(df, date_column='date'):
|
120 |
+
"""Generates a theme-aware plot for mentions activity over time."""
|
121 |
+
if df is None or df.empty or date_column not in df.columns:
|
122 |
+
return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available.")
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
fig = None
|
125 |
try:
|
126 |
df_copy = df.copy()
|
127 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
|
|
|
|
128 |
df_copy = df_copy.dropna(subset=[date_column])
|
129 |
if df_copy.empty:
|
|
|
130 |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.")
|
131 |
|
132 |
mentions_over_time = df_copy.set_index(date_column).resample('D').size()
|
|
|
133 |
if mentions_over_time.empty:
|
|
|
134 |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.")
|
135 |
|
136 |
fig, ax = plt.subplots(figsize=(10, 5))
|
137 |
+
_apply_theme_aware_styling(fig, ax)
|
138 |
|
139 |
mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple', zorder=1)
|
140 |
ax.set_xlabel('Date')
|
141 |
ax.set_ylabel('Number of Mentions')
|
|
|
142 |
plt.xticks(rotation=45)
|
143 |
fig.tight_layout(pad=0.5)
|
144 |
+
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
|
|
145 |
return fig
|
146 |
except Exception as e:
|
147 |
logging.error(f"Error generating mentions activity plot: {e}", exc_info=True)
|
|
|
149 |
return create_placeholder_plot(title="Mentions Activity Error", message=str(e))
|
150 |
|
151 |
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
152 |
+
"""Generates a theme-aware pie chart for mention sentiment distribution."""
|
153 |
+
if df is None or df.empty or sentiment_column not in df.columns:
|
154 |
+
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
fig = None
|
157 |
try:
|
158 |
+
sentiment_counts = df[sentiment_column].value_counts()
|
|
|
159 |
if sentiment_counts.empty:
|
|
|
160 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
161 |
|
162 |
fig, ax = plt.subplots(figsize=(8, 5))
|
163 |
+
_apply_theme_aware_styling(fig, ax)
|
164 |
|
165 |
+
THEME_TEXT_COLOR = plt.rcParams.get('text.color', 'black')
|
166 |
pie_slice_colors = plt.cm.get_cmap('Pastel2', len(sentiment_counts))
|
167 |
|
168 |
+
wedges, texts, autotexts = ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90,
|
|
|
169 |
colors=[pie_slice_colors(i) for i in range(len(sentiment_counts))])
|
170 |
+
|
171 |
+
# Set text colors to be theme-aware
|
|
|
172 |
for text_item in texts + autotexts:
|
173 |
+
text_item.set_color(THEME_TEXT_COLOR)
|
174 |
text_item.set_zorder(2)
|
175 |
+
for wedge in wedges:
|
176 |
+
wedge.set_zorder(1)
|
177 |
|
178 |
+
ax.axis('equal')
|
179 |
+
fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95)
|
|
|
|
|
180 |
return fig
|
181 |
except Exception as e:
|
182 |
logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True)
|
183 |
if fig: plt.close(fig)
|
184 |
return create_placeholder_plot(title="Mention Sentiment Error", message=str(e))
|
185 |
|
186 |
+
def generate_followers_count_over_time_plot(df, **kwargs):
|
187 |
+
"""Generates a theme-aware plot for followers count over time."""
|
188 |
+
type_value = kwargs.get('type_value', 'follower_gains_monthly')
|
189 |
+
title = f"Followers Count Over Time ({type_value})"
|
|
|
|
|
|
|
|
|
190 |
if df is None or df.empty:
|
191 |
return create_placeholder_plot(title=title, message="No follower data available.")
|
192 |
|
|
|
|
|
|
|
|
|
|
|
193 |
fig = None
|
194 |
try:
|
195 |
+
df_filtered = df[df['follower_count_type'] == type_value].copy()
|
|
|
|
|
196 |
if df_filtered.empty:
|
197 |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
198 |
+
|
199 |
+
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered['category_name'], errors='coerce')
|
200 |
+
df_filtered['follower_count_organic'] = pd.to_numeric(df_filtered['follower_count_organic'], errors='coerce').fillna(0)
|
201 |
+
df_filtered['follower_count_paid'] = pd.to_numeric(df_filtered['follower_count_paid'], errors='coerce').fillna(0)
|
202 |
+
df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj')
|
|
|
203 |
if df_filtered.empty:
|
204 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
205 |
|
206 |
fig, ax = plt.subplots(figsize=(10, 5))
|
207 |
+
_apply_theme_aware_styling(fig, ax)
|
208 |
|
209 |
+
ax.plot(df_filtered['datetime_obj'], df_filtered['follower_count_organic'], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers', zorder=1)
|
210 |
+
ax.plot(df_filtered['datetime_obj'], df_filtered['follower_count_paid'], marker='x', linestyle='--', color='seagreen', label='Paid Followers', zorder=1)
|
211 |
ax.set_xlabel('Date')
|
212 |
ax.set_ylabel('Follower Count')
|
213 |
+
|
214 |
+
legend = ax.legend()
|
215 |
+
if legend:
|
216 |
+
for text in legend.get_texts():
|
217 |
+
text.set_color(plt.rcParams.get('text.color', 'black'))
|
218 |
+
legend.set_zorder(2)
|
219 |
+
|
220 |
plt.xticks(rotation=45)
|
221 |
fig.tight_layout(pad=0.5)
|
222 |
+
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
223 |
return fig
|
224 |
except Exception as e:
|
225 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
226 |
if fig: plt.close(fig)
|
227 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
228 |
|
229 |
+
def generate_followers_growth_rate_plot(df, **kwargs):
|
230 |
+
"""Generates a theme-aware plot for follower growth rate."""
|
231 |
+
type_value = kwargs.get('type_value', 'follower_gains_monthly')
|
|
|
|
|
232 |
title = f"Follower Growth Rate ({type_value})"
|
|
|
|
|
233 |
if df is None or df.empty:
|
234 |
return create_placeholder_plot(title=title, message="No follower data available.")
|
235 |
|
|
|
|
|
|
|
|
|
|
|
236 |
fig = None
|
237 |
try:
|
238 |
+
df_filtered = df[df['follower_count_type'] == type_value].copy()
|
|
|
|
|
239 |
if df_filtered.empty:
|
240 |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
241 |
+
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered['category_name'], errors='coerce')
|
242 |
+
df_filtered['follower_count_organic'] = pd.to_numeric(df_filtered['follower_count_organic'], errors='coerce')
|
243 |
+
df_filtered['follower_count_paid'] = pd.to_numeric(df_filtered['follower_count_paid'], errors='coerce')
|
|
|
244 |
df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj')
|
245 |
|
246 |
+
if len(df_filtered) < 2:
|
247 |
return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
|
248 |
+
|
249 |
+
df_filtered['organic_growth_rate'] = df_filtered['follower_count_organic'].pct_change() * 100
|
250 |
+
df_filtered['paid_growth_rate'] = df_filtered['follower_count_paid'].pct_change() * 100
|
251 |
df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
|
252 |
|
253 |
fig, ax = plt.subplots(figsize=(10, 5))
|
254 |
+
_apply_theme_aware_styling(fig, ax)
|
255 |
|
256 |
+
plotted = False
|
257 |
+
if not df_filtered['organic_growth_rate'].dropna().empty:
|
258 |
ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate', zorder=1)
|
259 |
+
plotted = True
|
260 |
+
if not df_filtered['paid_growth_rate'].dropna().empty:
|
|
|
261 |
ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate', zorder=1)
|
262 |
+
plotted = True
|
263 |
+
|
264 |
+
if not plotted:
|
265 |
+
return create_placeholder_plot(title=title, message="No growth rate data to display.")
|
266 |
|
267 |
ax.set_xlabel('Date')
|
268 |
ax.set_ylabel('Growth Rate (%)')
|
269 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter())
|
270 |
+
|
271 |
+
legend = ax.legend()
|
272 |
+
if legend:
|
273 |
+
for text in legend.get_texts():
|
274 |
+
text.set_color(plt.rcParams.get('text.color', 'black'))
|
275 |
+
legend.set_zorder(2)
|
276 |
+
|
277 |
plt.xticks(rotation=45)
|
278 |
fig.tight_layout(pad=0.5)
|
279 |
+
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
280 |
return fig
|
281 |
except Exception as e:
|
282 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
283 |
if fig: plt.close(fig)
|
284 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
285 |
|
286 |
+
def generate_followers_by_demographics_plot(df, **kwargs):
|
287 |
+
"""Generates a theme-aware bar plot for followers by demographics."""
|
288 |
+
plot_title = kwargs.get('plot_title', "Followers by Demographics")
|
289 |
+
type_value = kwargs.get('type_value')
|
290 |
+
category_col = 'category_name'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
|
292 |
+
if df is None or df.empty or not type_value:
|
293 |
+
return create_placeholder_plot(title=plot_title, message="No data or demographic type not specified.")
|
294 |
|
295 |
fig = None
|
296 |
try:
|
297 |
+
df_filtered = df[df['follower_count_type'] == type_value].copy()
|
|
|
|
|
298 |
if df_filtered.empty:
|
299 |
+
return create_placeholder_plot(title=plot_title, message=f"No data for type '{type_value}'.")
|
300 |
+
|
301 |
+
df_filtered['follower_count_organic'] = pd.to_numeric(df_filtered['follower_count_organic'], errors='coerce').fillna(0)
|
302 |
+
df_filtered['follower_count_paid'] = pd.to_numeric(df_filtered['follower_count_paid'], errors='coerce').fillna(0)
|
303 |
+
demographics_data = df_filtered.groupby(category_col)[['follower_count_organic', 'follower_count_paid']].sum()
|
304 |
+
demographics_data['total_for_sort'] = demographics_data.sum(axis=1)
|
305 |
+
demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).head(10).drop(columns=['total_for_sort'])
|
306 |
|
307 |
if demographics_data.empty:
|
308 |
+
return create_placeholder_plot(title=plot_title, message="No demographic data to display.")
|
309 |
+
|
310 |
+
fig, ax = plt.subplots(figsize=(12, 7))
|
311 |
+
_apply_theme_aware_styling(fig, ax)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
|
313 |
+
demographics_data.plot(kind='bar', ax=ax, zorder=1, width=0.8, color=['dodgerblue', 'seagreen'])
|
|
|
|
|
|
|
|
|
|
|
314 |
ax.set_xlabel(category_col.replace('_', ' ').title())
|
315 |
ax.set_ylabel('Number of Followers')
|
316 |
+
|
317 |
+
legend = ax.legend(['Organic', 'Paid'])
|
318 |
+
if legend:
|
319 |
+
for text in legend.get_texts():
|
320 |
+
text.set_color(plt.rcParams.get('text.color', 'black'))
|
321 |
+
legend.set_zorder(2)
|
322 |
+
|
323 |
+
plt.xticks(rotation=45, ha="right")
|
|
|
|
|
|
|
|
|
|
|
324 |
fig.tight_layout(pad=0.5)
|
325 |
+
fig.subplots_adjust(top=0.92, bottom=0.25, left=0.1, right=0.95)
|
326 |
return fig
|
327 |
except Exception as e:
|
328 |
logging.error(f"Error generating {plot_title}: {e}", exc_info=True)
|
329 |
if fig: plt.close(fig)
|
330 |
return create_placeholder_plot(title=f"{plot_title} Error", message=str(e))
|
331 |
|
332 |
+
def generate_generic_time_series_plot(df, date_column, value_column, title, ylabel, color='blue'):
|
333 |
+
"""Generic function to create a theme-aware time series plot."""
|
334 |
+
if df is None or df.empty or date_column not in df.columns or value_column not in df.columns:
|
335 |
+
return create_placeholder_plot(title=title, message="No data available.")
|
336 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
fig = None
|
338 |
try:
|
339 |
df_copy = df.copy()
|
340 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
341 |
+
df_copy[value_column] = pd.to_numeric(df_copy[value_column], errors='coerce')
|
342 |
+
df_copy = df_copy.dropna(subset=[date_column, value_column]).set_index(date_column)
|
|
|
343 |
if df_copy.empty:
|
344 |
+
return create_placeholder_plot(title=title, message="No valid data.")
|
|
|
|
|
|
|
345 |
|
346 |
+
data_over_time = df_copy.resample('D')[value_column].sum()
|
347 |
+
if data_over_time.empty:
|
348 |
+
return create_placeholder_plot(title=title, message="No data in the selected period.")
|
349 |
|
350 |
fig, ax = plt.subplots(figsize=(10, 5))
|
351 |
+
_apply_theme_aware_styling(fig, ax)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
352 |
|
353 |
+
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color=color, zorder=1)
|
354 |
+
ax.set_title(title)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
ax.set_xlabel('Date')
|
356 |
+
ax.set_ylabel(ylabel)
|
|
|
357 |
plt.xticks(rotation=45)
|
358 |
fig.tight_layout(pad=0.5)
|
359 |
+
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
360 |
return fig
|
361 |
except Exception as e:
|
362 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
363 |
if fig: plt.close(fig)
|
364 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
365 |
|
366 |
+
def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'):
|
367 |
+
"""Generates a theme-aware plot for engagement rate with special y-axis formatting."""
|
368 |
+
title = "Engagement Rate Over Time"
|
369 |
+
if df is None or df.empty or date_column not in df.columns or engagement_rate_col not in df.columns:
|
370 |
+
return create_placeholder_plot(title=title, message="No data available.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
371 |
fig = None
|
372 |
try:
|
373 |
df_copy = df.copy()
|
374 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
375 |
+
df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce')
|
376 |
+
df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col])
|
377 |
|
378 |
if df_copy.empty:
|
379 |
+
return create_placeholder_plot(title=title, message="No valid data.")
|
380 |
|
381 |
+
engagement_over_time = df_copy.set_index(date_column).resample('D')[engagement_rate_col].mean().dropna()
|
382 |
|
383 |
+
if engagement_over_time.empty:
|
384 |
+
return create_placeholder_plot(title=title, message="No data to display.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
|
386 |
+
fig, ax = plt.subplots(figsize=(10,5))
|
387 |
+
_apply_theme_aware_styling(fig,ax)
|
|
|
388 |
|
389 |
+
ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange', zorder=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
390 |
|
391 |
+
# Determine the correct formatter based on the data's scale
|
392 |
+
max_rate = engagement_over_time.max()
|
393 |
+
formatter_xmax = 1.0 if max_rate <= 1.5 else 100.0
|
394 |
+
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
|
395 |
+
|
396 |
+
ax.set_title(title)
|
397 |
ax.set_xlabel('Date')
|
398 |
+
ax.set_ylabel('Engagement Rate')
|
|
|
399 |
plt.xticks(rotation=45)
|
400 |
fig.tight_layout(pad=0.5)
|
|
|
401 |
return fig
|
402 |
except Exception as e:
|
403 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
404 |
if fig: plt.close(fig)
|
405 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
406 |
|
407 |
+
def generate_reach_over_time_plot(df, **kwargs):
|
408 |
+
return generate_generic_time_series_plot(df, 'published_at', 'clickCount', 'Reach Over Time (Clicks)', 'Total Clicks', color='mediumseagreen')
|
|
|
|
|
|
|
|
|
|
|
|
|
409 |
|
410 |
+
def generate_impressions_over_time_plot(df, **kwargs):
|
411 |
+
return generate_generic_time_series_plot(df, 'published_at', 'impressionCount', 'Impressions Over Time', 'Total Impressions', color='slateblue')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
|
413 |
+
def generate_likes_over_time_plot(df, **kwargs):
|
414 |
+
return generate_generic_time_series_plot(df, 'published_at', 'likeCount', 'Reactions (Likes) Over Time', 'Total Likes', color='crimson')
|
|
|
415 |
|
416 |
+
def generate_clicks_over_time_plot(df, **kwargs):
|
417 |
+
return generate_generic_time_series_plot(df, 'published_at', 'clickCount', 'Clicks Over Time', 'Total Clicks', color='mediumseagreen')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
418 |
|
419 |
+
def generate_shares_over_time_plot(df, **kwargs):
|
420 |
+
return generate_generic_time_series_plot(df, 'published_at', 'shareCount', 'Shares Over Time', 'Total Shares', color='teal')
|
421 |
|
422 |
+
def generate_comments_over_time_plot(df, **kwargs):
|
423 |
+
return generate_generic_time_series_plot(df, 'published_at', 'commentCount', 'Comments Over Time', 'Total Comments', color='gold')
|
424 |
|
425 |
+
def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', **kwargs):
|
426 |
+
"""Generates a theme-aware pie chart for comment sentiment."""
|
427 |
+
title = "Breakdown of Comments by Sentiment"
|
428 |
+
if df is None or df.empty or sentiment_column not in df.columns:
|
429 |
+
return create_placeholder_plot(title=title, message="No data available.")
|
430 |
+
|
431 |
fig = None
|
432 |
try:
|
433 |
+
sentiment_counts = df[sentiment_column].value_counts().dropna()
|
434 |
+
if sentiment_counts.empty:
|
435 |
+
return create_placeholder_plot(title=title, message="No sentiment data available.")
|
|
|
|
|
|
|
436 |
|
437 |
fig, ax = plt.subplots(figsize=(8, 5))
|
438 |
+
_apply_theme_aware_styling(fig, ax)
|
439 |
|
440 |
+
THEME_TEXT_COLOR = plt.rcParams.get('text.color', 'black')
|
441 |
pie_slice_colors = plt.cm.get_cmap('coolwarm', len(sentiment_counts))
|
442 |
+
wedges, texts, autotexts = ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=[pie_slice_colors(i) for i in range(len(sentiment_counts))])
|
443 |
+
|
|
|
|
|
|
|
444 |
for text_item in texts + autotexts:
|
445 |
+
text_item.set_color(THEME_TEXT_COLOR)
|
446 |
+
|
447 |
+
ax.set_title(title)
|
448 |
+
ax.axis('equal')
|
449 |
fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95)
|
450 |
return fig
|
451 |
except Exception as e:
|
|
|
453 |
if fig: plt.close(fig)
|
454 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
455 |
|
456 |
+
def generate_post_frequency_plot(df, date_column='published_at', **kwargs):
|
457 |
+
"""Generates a theme-aware plot for post frequency, using .size() for counting."""
|
458 |
+
title = "Post Frequency Over Time"
|
459 |
+
if df is None or df.empty or date_column not in df.columns:
|
|
|
460 |
return create_placeholder_plot(title=title, message="No data available.")
|
461 |
+
|
|
|
|
|
462 |
fig = None
|
463 |
try:
|
464 |
df_copy = df.copy()
|
465 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
466 |
+
df_copy = df_copy.dropna(subset=[date_column]).set_index(date_column)
|
|
|
|
|
467 |
if df_copy.empty:
|
468 |
+
return create_placeholder_plot(title=title, message="No valid data.")
|
469 |
|
470 |
+
data_over_time = df_copy.resample('D').size() # Use size() to count posts
|
471 |
+
if data_over_time.empty:
|
472 |
+
return create_placeholder_plot(title=title, message="No data in the selected period.")
|
|
|
473 |
|
474 |
fig, ax = plt.subplots(figsize=(10, 5))
|
475 |
+
_apply_theme_aware_styling(fig, ax)
|
476 |
+
|
477 |
+
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', zorder=1)
|
478 |
+
ax.set_title(title)
|
479 |
+
ax.set_xlabel('Date')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
480 |
ax.set_ylabel('Number of Posts')
|
|
|
481 |
plt.xticks(rotation=45)
|
482 |
fig.tight_layout(pad=0.5)
|
483 |
+
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95)
|
|
|
484 |
return fig
|
485 |
except Exception as e:
|
486 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
487 |
if fig: plt.close(fig)
|
488 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
489 |
|
490 |
+
def generate_content_format_breakdown_plot(df, format_col='media_type', **kwargs):
|
491 |
+
"""Generates a theme-aware bar chart for content format breakdown."""
|
492 |
+
title = "Breakdown of Content by Format"
|
493 |
+
if df is None or df.empty or format_col not in df.columns:
|
|
|
494 |
return create_placeholder_plot(title=title, message="No data available.")
|
495 |
+
|
|
|
|
|
496 |
fig = None
|
497 |
try:
|
498 |
+
format_counts = df[format_col].value_counts().dropna()
|
|
|
|
|
499 |
if format_counts.empty:
|
500 |
+
return create_placeholder_plot(title=title, message="No format data.")
|
501 |
|
502 |
+
fig, ax = plt.subplots(figsize=(8,6))
|
503 |
+
_apply_theme_aware_styling(fig,ax)
|
504 |
|
505 |
+
format_counts.plot(kind='bar', ax=ax, zorder=1, color=plt.cm.get_cmap('viridis')(np.linspace(0, 1, len(format_counts))))
|
506 |
+
ax.set_title(title)
|
|
|
|
|
507 |
ax.set_xlabel('Media Type')
|
508 |
ax.set_ylabel('Number of Posts')
|
|
|
509 |
plt.xticks(rotation=45, ha="right")
|
510 |
+
|
511 |
+
# Add text labels with theme color
|
512 |
+
TEXT_COLOR = plt.rcParams.get('text.color', 'black')
|
513 |
for i, v in enumerate(format_counts):
|
514 |
+
ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom', zorder=2, color=TEXT_COLOR)
|
515 |
+
|
516 |
fig.tight_layout(pad=0.5)
|
517 |
+
fig.subplots_adjust(top=0.92, bottom=0.20, left=0.15, right=0.95)
|
|
|
518 |
return fig
|
519 |
except Exception as e:
|
520 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
522 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
523 |
|
524 |
def _parse_eb_label(label_data):
|
525 |
+
if isinstance(label_data, list): return label_data
|
|
|
526 |
if isinstance(label_data, str):
|
527 |
try:
|
528 |
parsed = ast.literal_eval(label_data)
|
529 |
+
return parsed if isinstance(parsed, list) else [str(parsed)]
|
|
|
|
|
530 |
except (ValueError, SyntaxError):
|
531 |
+
return [label_data.strip()] if label_data.strip() else []
|
532 |
+
return [] if pd.isna(label_data) else [str(label_data)]
|
|
|
|
|
|
|
533 |
|
534 |
+
def generate_content_topic_breakdown_plot(df, topics_col='li_eb_labels', **kwargs):
|
535 |
+
"""Generates a theme-aware horizontal bar chart for content topics."""
|
536 |
+
title = "Breakdown of Content by Topics (Top 15)"
|
537 |
+
if df is None or df.empty or topics_col not in df.columns:
|
|
|
538 |
return create_placeholder_plot(title=title, message="No data available.")
|
539 |
+
|
|
|
|
|
540 |
fig = None
|
541 |
try:
|
542 |
+
topic_counts = df[topics_col].apply(_parse_eb_label).explode().dropna().value_counts()
|
543 |
+
topic_counts = topic_counts[topic_counts.index != '']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
544 |
if topic_counts.empty:
|
545 |
+
return create_placeholder_plot(title=title, message="No topic data found.")
|
|
|
|
|
|
|
|
|
|
|
546 |
|
547 |
+
top_topics = topic_counts.nlargest(15).sort_values(ascending=True)
|
|
|
548 |
|
549 |
+
fig, ax = plt.subplots(figsize=(10, 8))
|
550 |
+
_apply_theme_aware_styling(fig,ax)
|
551 |
+
|
552 |
+
top_topics.plot(kind='barh', ax=ax, zorder=1, color=plt.cm.get_cmap('YlGnBu')(np.linspace(0.3, 1, len(top_topics))))
|
553 |
+
ax.set_title(title)
|
554 |
ax.set_xlabel('Number of Posts')
|
555 |
ax.set_ylabel('Topic')
|
556 |
|
557 |
+
# Add text labels with theme color
|
558 |
+
TEXT_COLOR = plt.rcParams.get('text.color', 'black')
|
559 |
+
for i, (topic, count) in enumerate(top_topics.items()):
|
560 |
+
ax.text(count + (0.01 * top_topics.max()), i, f' {count}', va='center', ha='left', zorder=2, color=TEXT_COLOR)
|
561 |
+
|
562 |
fig.tight_layout(pad=0.5)
|
563 |
+
fig.subplots_adjust(top=0.92, bottom=0.1, left=0.3, right=0.95)
|
|
|
564 |
return fig
|
565 |
except Exception as e:
|
566 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
567 |
if fig: plt.close(fig)
|
568 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
569 |
|
|
|
|
|
570 |
def update_analytics_plots_figures(token_state_value, date_filter_option, custom_start_date, custom_end_date, current_plot_configs):
|
571 |
+
"""
|
572 |
+
Main function to generate all analytics plots based on provided data and configurations.
|
573 |
+
Uses a dictionary-based approach for cleaner execution.
|
574 |
+
"""
|
575 |
+
logging.info(f"Updating analytics plot figures for theme-aware plotting. Filter: {date_filter_option}")
|
576 |
+
num_expected_plots = len(current_plot_configs)
|
577 |
|
578 |
+
plot_data_summaries_for_chatbot = {}
|
579 |
|
580 |
if not token_state_value or not token_state_value.get("token"):
|
581 |
message = "❌ Accesso negato. Nessun token. Impossibile generare le analisi."
|
582 |
logging.warning(message)
|
583 |
+
placeholder_figs = [create_placeholder_plot(title="Accesso Negato") for _ in range(num_expected_plots)]
|
584 |
+
summaries = {p_cfg["id"]: "Accesso negato, nessun dato per il chatbot." for p_cfg in current_plot_configs}
|
585 |
+
return [message] + placeholder_figs + [summaries]
|
586 |
+
|
|
|
587 |
try:
|
588 |
+
(filtered_merged_posts_df, filtered_mentions_df, date_filtered_follower_stats_df,
|
589 |
+
raw_follower_stats_df, start_dt_for_msg, end_dt_for_msg) = \
|
590 |
+
prepare_filtered_analytics_data(token_state_value, date_filter_option, custom_start_date, custom_end_date)
|
|
|
|
|
|
|
|
|
|
|
591 |
|
|
|
592 |
plot_data_summaries_for_chatbot = generate_chatbot_data_summaries(
|
593 |
+
current_plot_configs, filtered_merged_posts_df, filtered_mentions_df,
|
594 |
+
date_filtered_follower_stats_df, raw_follower_stats_df, token_state_value
|
|
|
|
|
|
|
|
|
595 |
)
|
|
|
596 |
except Exception as e:
|
597 |
error_msg = f"❌ Errore durante la preparazione dei dati per le analisi: {e}"
|
598 |
logging.error(error_msg, exc_info=True)
|
599 |
placeholder_figs = [create_placeholder_plot(title="Errore Preparazione Dati", message=str(e)) for _ in range(num_expected_plots)]
|
600 |
+
summaries = {p_cfg["id"]: f"Errore preparazione dati: {e}" for p_cfg in current_plot_configs}
|
601 |
+
return [error_msg] + placeholder_figs + [summaries]
|
602 |
+
|
603 |
+
# Map plot IDs to their respective generation functions
|
604 |
+
plot_functions = {
|
605 |
+
"followers_count": lambda: generate_followers_count_over_time_plot(date_filtered_follower_stats_df, type_value='follower_gains_monthly'),
|
606 |
+
"followers_growth_rate": lambda: generate_followers_growth_rate_plot(date_filtered_follower_stats_df, type_value='follower_gains_monthly'),
|
607 |
+
"followers_by_location": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_geo', plot_title="Follower per Località"),
|
608 |
+
"followers_by_role": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_function', plot_title="Follower per Ruolo"),
|
609 |
+
"followers_by_industry": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_industry', plot_title="Follower per Settore"),
|
610 |
+
"followers_by_seniority": lambda: generate_followers_by_demographics_plot(raw_follower_stats_df, type_value='follower_seniority', plot_title="Follower per Anzianità"),
|
611 |
+
"engagement_rate": lambda: generate_engagement_rate_over_time_plot(filtered_merged_posts_df),
|
612 |
+
"reach_over_time": lambda: generate_reach_over_time_plot(filtered_merged_posts_df),
|
613 |
+
"impressions_over_time": lambda: generate_impressions_over_time_plot(filtered_merged_posts_df),
|
614 |
+
"likes_over_time": lambda: generate_likes_over_time_plot(filtered_merged_posts_df),
|
615 |
+
"clicks_over_time": lambda: generate_clicks_over_time_plot(filtered_merged_posts_df),
|
616 |
+
"shares_over_time": lambda: generate_shares_over_time_plot(filtered_merged_posts_df),
|
617 |
+
"comments_over_time": lambda: generate_comments_over_time_plot(filtered_merged_posts_df),
|
618 |
+
"comments_sentiment": lambda: generate_comments_sentiment_breakdown_plot(filtered_merged_posts_df),
|
619 |
+
"post_frequency_cs": lambda: generate_post_frequency_plot(filtered_merged_posts_df),
|
620 |
+
"content_format_breakdown_cs": lambda: generate_content_format_breakdown_plot(filtered_merged_posts_df, format_col=token_state_value.get("config_media_type_col", "media_type")),
|
621 |
+
"content_topic_breakdown_cs": lambda: generate_content_topic_breakdown_plot(filtered_merged_posts_df, topics_col=token_state_value.get("config_eb_labels_col", "li_eb_labels")),
|
622 |
+
"mention_analysis_volume": lambda: generate_mentions_activity_plot(filtered_mentions_df, date_column=token_state_value.get("config_date_col_mentions", "date")),
|
623 |
+
"mention_analysis_sentiment": lambda: generate_mention_sentiment_plot(filtered_mentions_df)
|
624 |
+
}
|
625 |
+
|
626 |
+
plot_figs = []
|
627 |
+
for config in current_plot_configs:
|
628 |
+
plot_id = config["id"]
|
629 |
+
if plot_id in plot_functions:
|
630 |
+
try:
|
631 |
+
fig = plot_functions[plot_id]()
|
632 |
+
plot_figs.append(fig)
|
633 |
+
except Exception as e:
|
634 |
+
logging.error(f"Failed to generate plot for '{plot_id}': {e}", exc_info=True)
|
635 |
+
plot_figs.append(create_placeholder_plot(title=f"Error: {config.get('label', plot_id)}", message=str(e)))
|
636 |
+
else:
|
637 |
+
logging.warning(f"No plot function found for ID: '{plot_id}'")
|
638 |
+
plot_figs.append(create_placeholder_plot(title=f"Plot Not Implemented: {config.get('label', plot_id)}"))
|
639 |
+
|
640 |
+
message = f"📊 Analisi aggiornate per il periodo: {date_filter_option}"
|
641 |
+
if date_filter_option == "Intervallo Personalizzato":
|
642 |
+
s_display = start_dt_for_msg.strftime('%Y-%m-%d') if start_dt_for_msg else "N/A"
|
643 |
+
e_display = end_dt_for_msg.strftime('%Y-%m-%d') if end_dt_for_msg else "N/A"
|
644 |
+
message += f" (Da: {s_display} A: {e_display})"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
645 |
|
646 |
+
return [message] + plot_figs + [plot_data_summaries_for_chatbot]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|