LinkedinMonitor / app.py
GuglielmoTor's picture
Update app.py
c6716b6 verified
raw
history blame
26.5 kB
import gradio as gr
import pandas as pd
import os
import logging
import matplotlib
matplotlib.use('Agg') # Set backend for Matplotlib to avoid GUI conflicts with Gradio
import matplotlib.pyplot as plt
# --- Module Imports ---
from gradio_utils import get_url_user_token
# Functions from newly created/refactored modules
from config import (
LINKEDIN_CLIENT_ID_ENV_VAR, BUBBLE_APP_NAME_ENV_VAR,
BUBBLE_API_KEY_PRIVATE_ENV_VAR, BUBBLE_API_ENDPOINT_ENV_VAR
)
from state_manager import process_and_store_bubble_token
from sync_logic import sync_all_linkedin_data_orchestrator
from ui_generators import (
display_main_dashboard,
run_mentions_tab_display,
run_follower_stats_tab_display
)
# Corrected import for analytics_data_processing
from analytics_data_processing import prepare_filtered_analytics_data
from analytics_plot_generator import (
generate_posts_activity_plot, generate_engagement_type_plot,
generate_mentions_activity_plot, generate_mention_sentiment_plot,
generate_followers_count_over_time_plot,
generate_followers_growth_rate_plot,
generate_followers_by_demographics_plot,
generate_engagement_rate_over_time_plot,
generate_reach_over_time_plot,
generate_impressions_over_time_plot,
create_placeholder_plot, # For initializing plots
# --- Import existing new plot functions ---
generate_likes_over_time_plot,
generate_clicks_over_time_plot, # Note: can be same as reach
generate_shares_over_time_plot,
generate_comments_over_time_plot,
generate_comments_sentiment_breakdown_plot,
# --- Import NEW plot functions for Content Strategy ---
generate_post_frequency_plot,
generate_content_format_breakdown_plot,
generate_content_topic_breakdown_plot
)
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
# --- Analytics Tab: Plot Update Function ---
def update_analytics_plots(token_state_value, date_filter_option, custom_start_date, custom_end_date):
"""
Prepares analytics data using external processing function and then generates plots.
"""
logging.info(f"Updating analytics plots. Filter: {date_filter_option}, Custom Start: {custom_start_date}, Custom End: {custom_end_date}")
# --- Increased number of expected plots ---
# Original 13 + 5 engagement = 18
# New Content Strategy (3: freq, format, topics)
# New Mention Analysis (2: volume, sentiment - these reuse existing plot objects but are new UI slots)
# Total = 18 + 3 + 2 = 23
num_expected_plots = 23
if not token_state_value or not token_state_value.get("token"):
message = "❌ Access denied. No token. Cannot generate analytics."
logging.warning(message)
placeholder_figs = [create_placeholder_plot(title="Access Denied", message="No token.") for _ in range(num_expected_plots)]
return [message] + placeholder_figs
try:
# prepare_filtered_analytics_data might need to be updated if new DFs are required for new plots
# (e.g. if 'media_type' or 'eb_labels' are not in 'bubble_posts_df' and need special handling)
# For now, we assume 'filtered_merged_posts_df' contains these columns.
(filtered_merged_posts_df,
filtered_mentions_df,
date_filtered_follower_stats_df,
raw_follower_stats_df,
start_dt_for_msg, end_dt_for_msg) = \
prepare_filtered_analytics_data(
token_state_value, date_filter_option, custom_start_date, custom_end_date
)
# Ensure 'media_type' and 'eb_labels' exist in filtered_merged_posts_df for new plots,
# or handle their absence gracefully in the plot functions themselves (which they do).
# Example: Add dummy columns if they might be missing, for robust testing:
# if 'media_type' not in filtered_merged_posts_df.columns:
# filtered_merged_posts_df['media_type'] = 'Unknown'
# if 'eb_labels' not in filtered_merged_posts_df.columns:
# filtered_merged_posts_df['eb_labels'] = None
except Exception as e:
error_msg = f"❌ Error preparing analytics data: {e}"
logging.error(error_msg, exc_info=True)
placeholder_figs = [create_placeholder_plot(title="Data Preparation Error", message=str(e)) for _ in range(num_expected_plots)]
return [error_msg] + placeholder_figs
date_column_posts = token_state_value.get("config_date_col_posts", "published_at")
date_column_mentions = token_state_value.get("config_date_col_mentions", "date")
# config_date_col_followers_source = token_state_value.get("config_date_col_followers", "date")
logging.info(f"Data for plotting - Filtered Merged Posts: {len(filtered_merged_posts_df)} rows, Filtered Mentions: {len(filtered_mentions_df)} rows.")
logging.info(f"Date-Filtered Follower Stats: {len(date_filtered_follower_stats_df)} rows, Raw Follower Stats: {len(raw_follower_stats_df)} rows.")
try:
# Existing plots (13)
plot_posts_activity = generate_posts_activity_plot(filtered_merged_posts_df, date_column=date_column_posts)
plot_engagement_type = generate_engagement_type_plot(filtered_merged_posts_df)
# These two will be used for the new "Mention Analysis" section as well
fig_mentions_activity_shared = generate_mentions_activity_plot(filtered_mentions_df, date_column=date_column_mentions)
fig_mention_sentiment_shared = generate_mention_sentiment_plot(filtered_mentions_df)
plot_followers_count = generate_followers_count_over_time_plot(
date_filtered_follower_stats_df,
type_filter_column='follower_count_type',
type_value='follower_gains_monthly'
)
plot_followers_growth_rate = generate_followers_growth_rate_plot(
date_filtered_follower_stats_df,
type_filter_column='follower_count_type',
type_value='follower_gains_monthly'
)
plot_followers_by_location = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_geo', plot_title="Followers by Location")
plot_followers_by_role = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_function', plot_title="Followers by Role")
plot_followers_by_industry = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_industry', plot_title="Followers by Industry")
plot_followers_by_seniority = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_seniority', plot_title="Followers by Seniority")
plot_engagement_rate = generate_engagement_rate_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, engagement_rate_col='engagement')
plot_reach_over_time = generate_reach_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, reach_col='clickCount')
plot_impressions_over_time = generate_impressions_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, impressions_col='impressionCount')
# Additional Engagement plots (5)
plot_likes_over_time = generate_likes_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, likes_col='likeCount')
plot_clicks_over_time = generate_clicks_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, clicks_col='clickCount')
plot_shares_over_time = generate_shares_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, shares_col='shareCount')
plot_comments_over_time = generate_comments_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, comments_col='commentCount')
# Assuming 'comment_sentiment' column might exist, or 'sentiment' as fallback (handled in plot function)
plot_comments_sentiment_breakdown = generate_comments_sentiment_breakdown_plot(
filtered_merged_posts_df,
sentiment_column='comment_sentiment'
)
# --- Generate NEW plots for Content Strategy (3) ---
# Assuming 'media_type' and 'eb_labels' are in filtered_merged_posts_df
# The plot functions themselves have fallbacks/placeholders if columns are missing.
media_type_col_name = token_state_value.get("config_media_type_col", "media_type") # Example if configurable
eb_labels_col_name = token_state_value.get("config_eb_labels_col", "eb_labels") # Example if configurable
plot_post_frequency = generate_post_frequency_plot(filtered_merged_posts_df, date_column=date_column_posts)
plot_content_format_breakdown = generate_content_format_breakdown_plot(filtered_merged_posts_df, format_col=media_type_col_name)
plot_content_topic_breakdown = generate_content_topic_breakdown_plot(filtered_merged_posts_df, topics_col=eb_labels_col_name)
message = f"πŸ“Š Analytics updated for period: {date_filter_option}"
if date_filter_option == "Custom Range":
s_display = start_dt_for_msg.strftime('%Y-%m-%d') if start_dt_for_msg else "Any"
e_display = end_dt_for_msg.strftime('%Y-%m-%d') if end_dt_for_msg else "Any"
message += f" (From: {s_display} To: {e_display})"
all_generated_plots = [
plot_posts_activity, plot_engagement_type,
fig_mentions_activity_shared, fig_mention_sentiment_shared, # Original mention plots
plot_followers_count, plot_followers_growth_rate,
plot_followers_by_location, plot_followers_by_role, plot_followers_by_industry, plot_followers_by_seniority,
plot_engagement_rate, plot_reach_over_time, plot_impressions_over_time,
# Add new engagement plot objects to the list
plot_likes_over_time, plot_clicks_over_time,
plot_shares_over_time, plot_comments_over_time,
plot_comments_sentiment_breakdown,
# --- Add NEW Content Strategy plot objects ---
plot_post_frequency, plot_content_format_breakdown, plot_content_topic_breakdown,
# --- Add plots for the NEW "Mention Analysis" section (reusing figures) ---
fig_mentions_activity_shared, # Reused figure for new UI slot
fig_mention_sentiment_shared # Reused figure for new UI slot
]
num_plots_generated = sum(1 for p in all_generated_plots if p is not None and not isinstance(p, str))
logging.info(f"Successfully generated {num_plots_generated} plot figures for {num_expected_plots} UI slots.")
# Ensure the number of returned plots matches num_expected_plots, padding with placeholders if necessary
final_plots_list = []
for i, p in enumerate(all_generated_plots): # Iterate up to the expected number of plots
if i < num_expected_plots: # Ensure we don't exceed the expected number of outputs
if p is not None and not isinstance(p, str): # isinstance check for safety
final_plots_list.append(p)
else:
logging.warning(f"A plot generation failed or returned unexpected type for slot {i}, using placeholder. Plot: {p}")
final_plots_list.append(create_placeholder_plot(title="Plot Error", message="Failed to generate this plot."))
else:
logging.warning(f"Generated more plot figures ({len(all_generated_plots)}) than expected UI slots ({num_expected_plots}). Truncating.")
break
# If fewer plots were generated than expected (e.g. due to early exit or major error in a plot function)
while len(final_plots_list) < num_expected_plots:
logging.warning(f"Padding missing plot with placeholder. Expected {num_expected_plots}, got {len(final_plots_list)} so far.")
final_plots_list.append(create_placeholder_plot(title="Missing Plot", message="Plot could not be generated."))
if len(final_plots_list) > num_expected_plots + 5: # Safety break
logging.error("Too many placeholders added, breaking loop.")
break
return [message] + final_plots_list[:num_expected_plots] # Ensure correct number of outputs
except Exception as e:
error_msg = f"❌ Error generating analytics plots: {e}"
logging.error(error_msg, exc_info=True)
placeholder_figs = [create_placeholder_plot(title="Plot Generation Error", message=str(e)) for _ in range(num_expected_plots)]
return [error_msg] + placeholder_figs
# --- Gradio UI Blocks ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
title="LinkedIn Organization Dashboard") as app:
token_state = gr.State(value={
"token": None, "client_id": None, "org_urn": None,
"bubble_posts_df": pd.DataFrame(),
"bubble_post_stats_df": pd.DataFrame(),
"bubble_mentions_df": pd.DataFrame(),
"bubble_follower_stats_df": pd.DataFrame(),
# Consider adding "bubble_comments_sentiment_df": pd.DataFrame() if you plan to fetch this data
# Add keys for new data if needed by prepare_filtered_analytics_data, e.g.
# "bubble_posts_with_content_details_df": pd.DataFrame(),
"fetch_count_for_api": 0,
"url_user_token_temp_storage": None,
"config_date_col_posts": "published_at",
"config_date_col_mentions": "date",
"config_date_col_followers": "date",
"config_media_type_col": "media_type", # For new plot
"config_eb_labels_col": "eb_labels" # For new plot
})
gr.Markdown("# πŸš€ LinkedIn Organization Dashboard")
url_user_token_display = gr.Textbox(label="User Token (from URL - Hidden)", interactive=False, visible=False)
status_box = gr.Textbox(label="Overall LinkedIn Token Status", interactive=False, value="Initializing...")
org_urn_display = gr.Textbox(label="Organization URN (from URL - Hidden)", interactive=False, visible=False)
app.load(fn=get_url_user_token, inputs=None, outputs=[url_user_token_display, org_urn_display], api_name="get_url_params", show_progress=False)
def initial_load_sequence(url_token, org_urn_val, current_state):
logging.info(f"Initial load sequence triggered. Org URN: {org_urn_val}, URL Token: {'Present' if url_token else 'Absent'}")
status_msg, new_state, btn_update = process_and_store_bubble_token(url_token, org_urn_val, current_state)
dashboard_content = display_main_dashboard(new_state)
return status_msg, new_state, btn_update, dashboard_content
with gr.Tabs() as tabs:
with gr.TabItem("1️⃣ Dashboard & Sync", id="tab_dashboard_sync"):
gr.Markdown("System checks for existing data from Bubble. The 'Sync' button activates if new data needs to be fetched from LinkedIn based on the last sync times and data availability.")
sync_data_btn = gr.Button("πŸ”„ Sync LinkedIn Data", variant="primary", visible=False, interactive=False)
sync_status_html_output = gr.HTML("<p style='text-align:center;'>Sync status will appear here.</p>")
dashboard_display_html = gr.HTML("<p style='text-align:center;'>Dashboard loading...</p>")
org_urn_display.change(
fn=initial_load_sequence,
inputs=[url_user_token_display, org_urn_display, token_state],
outputs=[status_box, token_state, sync_data_btn, dashboard_display_html],
show_progress="full"
)
sync_click_event = sync_data_btn.click(
fn=sync_all_linkedin_data_orchestrator,
inputs=[token_state],
outputs=[sync_status_html_output, token_state],
show_progress="full"
).then(
fn=process_and_store_bubble_token,
inputs=[url_user_token_display, org_urn_display, token_state],
outputs=[status_box, token_state, sync_data_btn],
show_progress=False
).then(
fn=display_main_dashboard,
inputs=[token_state],
outputs=[dashboard_display_html],
show_progress=False
)
with gr.TabItem("2️⃣ Analytics", id="tab_analytics"):
gr.Markdown("## πŸ“ˆ LinkedIn Performance Analytics")
gr.Markdown("Select a date range to filter Posts and Mentions analytics. Follower demographic plots show overall latest data. Follower time-series plots respect the selected date range if applicable to their data source (e.g. monthly gains).")
analytics_status_md = gr.Markdown("Analytics status will appear here...")
with gr.Row():
date_filter_selector = gr.Radio(
["All Time", "Last 7 Days", "Last 30 Days", "Custom Range"],
label="Select Date Range (for Posts, Mentions, and some Follower time-series)",
value="Last 30 Days"
)
custom_start_date_picker = gr.DateTime(label="Start Date (Custom)", visible=False, include_time=False, type="datetime") # Changed to datetime
custom_end_date_picker = gr.DateTime(label="End Date (Custom)", visible=False, include_time=False, type="datetime") # Changed to datetime
apply_filter_btn = gr.Button("πŸ” Apply Filter & Refresh Analytics", variant="primary")
def toggle_custom_date_pickers(selection):
is_custom = selection == "Custom Range"
return gr.update(visible=is_custom), gr.update(visible=is_custom)
date_filter_selector.change(
fn=toggle_custom_date_pickers,
inputs=[date_filter_selector],
outputs=[custom_start_date_picker, custom_end_date_picker]
)
gr.Markdown("### Posts & Engagement Overview (Filtered by Date)")
with gr.Row():
posts_activity_plot = gr.Plot(label="Posts Activity Over Time")
engagement_type_plot = gr.Plot(label="Post Engagement Types")
# Original Mentions Overview - these plots will also be used for the "Mention Analysis" section below
gr.Markdown("### Mentions Overview (Filtered by Date)")
with gr.Row():
mentions_activity_plot = gr.Plot(label="Mentions Activity Over Time") # Will be updated by fig_mentions_activity_shared
mention_sentiment_plot = gr.Plot(label="Mention Sentiment Distribution") # Will be updated by fig_mention_sentiment_shared
gr.Markdown("### Follower Dynamics")
with gr.Row():
followers_count_plot = gr.Plot(label="Followers Count Over Time (e.g., Monthly Gains)")
followers_growth_rate_plot = gr.Plot(label="Followers Growth Rate (e.g., Monthly Gains)")
gr.Markdown("### Follower Demographics (Overall Latest Data)")
with gr.Row():
followers_by_location_plot = gr.Plot(label="Followers by Location")
followers_by_role_plot = gr.Plot(label="Followers by Role (Function)")
with gr.Row():
followers_by_industry_plot = gr.Plot(label="Followers by Industry")
followers_by_seniority_plot = gr.Plot(label="Followers by Seniority")
gr.Markdown("### Post Performance Insights (Filtered by Date)")
with gr.Row():
engagement_rate_plot = gr.Plot(label="Engagement Rate Over Time")
reach_over_time_plot = gr.Plot(label="Reach Over Time (Clicks)")
with gr.Row():
impressions_over_time_plot = gr.Plot(label="Impressions Over Time")
likes_over_time_plot = gr.Plot(label="Reactions (Likes) Over Time")
gr.Markdown("### Detailed Post Engagement Over Time (Filtered by Date)")
with gr.Row():
clicks_over_time_plot = gr.Plot(label="Clicks Over Time") # Can be same as reach
shares_over_time_plot = gr.Plot(label="Shares Over Time")
with gr.Row():
comments_over_time_plot = gr.Plot(label="Comments Over Time")
comments_sentiment_plot = gr.Plot(label="Breakdown of Comments by Sentiment")
# --- NEW: Content Strategy Analysis ---
gr.Markdown("### πŸ“Š Content Strategy Analysis (Filtered by Date)")
with gr.Row():
post_frequency_cs_plot = gr.Plot(label="Post Frequency") # New plot component
content_format_breakdown_cs_plot = gr.Plot(label="Breakdown of Content by Format") # New
with gr.Row():
content_topic_breakdown_cs_plot = gr.Plot(label="Breakdown of Content by Topics") # New (might need more width)
# You can add another plot here or make the topic plot wider if needed, e.g. by itself in a row.
# For now, placing it here. If it's too cramped:
# content_topic_breakdown_cs_plot = gr.Plot(label="Breakdown of Content by Topics", elem_id="topic_plot_wide") # and use CSS for width if needed
# --- NEW: Mention Analysis (reusing plots from above) ---
gr.Markdown("### πŸ’¬ Mention Analysis (Filtered by Date)")
with gr.Row():
mention_analysis_volume_plot = gr.Plot(label="Mentions Volume Over Time") # New UI slot, uses fig_mentions_activity_shared
mention_analysis_sentiment_plot = gr.Plot(label="Breakdown of Mentions by Sentiment") # New UI slot, uses fig_mention_sentiment_shared
analytics_plot_outputs = [
analytics_status_md,
posts_activity_plot, engagement_type_plot,
mentions_activity_plot, mention_sentiment_plot, # Original mention plots
followers_count_plot, followers_growth_rate_plot,
followers_by_location_plot, followers_by_role_plot,
followers_by_industry_plot, followers_by_seniority_plot,
engagement_rate_plot, reach_over_time_plot, impressions_over_time_plot,
# Add new engagement plot components to the output list
likes_over_time_plot, clicks_over_time_plot,
shares_over_time_plot, comments_over_time_plot,
comments_sentiment_plot,
# --- Add NEW Content Strategy plot components ---
post_frequency_cs_plot, content_format_breakdown_cs_plot, content_topic_breakdown_cs_plot,
# --- Add NEW Mention Analysis plot components (these will receive the reused figures) ---
mention_analysis_volume_plot, mention_analysis_sentiment_plot
]
# Expected length: 1 (status) + 13 (original plots) + 5 (new engagement) + 3 (content strategy) + 2 (mention analysis) = 24
# The update_analytics_plots function returns message + 23 plots. So len(analytics_plot_outputs) should be 24.
# Current count: 1 + 2 + 2 + 2 + 4 + 3 + 5 + 3 + 2 = 24. Correct.
apply_filter_btn.click(
fn=update_analytics_plots,
inputs=[token_state, date_filter_selector, custom_start_date_picker, custom_end_date_picker],
outputs=analytics_plot_outputs,
show_progress="full"
)
# Also update analytics after sync
sync_click_event.then(
fn=update_analytics_plots,
inputs=[token_state, date_filter_selector, custom_start_date_picker, custom_end_date_picker],
outputs=analytics_plot_outputs,
show_progress="full"
)
with gr.TabItem("3️⃣ Mentions", id="tab_mentions"):
refresh_mentions_display_btn = gr.Button("πŸ”„ Refresh Mentions Display (from local data)", variant="secondary")
mentions_html = gr.HTML("Mentions data loads from Bubble after sync. Click refresh to view current local data.")
mentions_sentiment_dist_plot = gr.Plot(label="Mention Sentiment Distribution")
refresh_mentions_display_btn.click(
fn=run_mentions_tab_display, inputs=[token_state],
outputs=[mentions_html, mentions_sentiment_dist_plot],
show_progress="full"
)
with gr.TabItem("4️⃣ Follower Stats", id="tab_follower_stats"):
refresh_follower_stats_btn = gr.Button("πŸ”„ Refresh Follower Stats Display (from local data)", variant="secondary")
follower_stats_html = gr.HTML("Follower statistics load from Bubble after sync. Click refresh to view current local data.")
with gr.Row():
fs_plot_monthly_gains = gr.Plot(label="Monthly Follower Gains")
with gr.Row():
fs_plot_seniority = gr.Plot(label="Followers by Seniority (Top 10 Organic)")
fs_plot_industry = gr.Plot(label="Followers by Industry (Top 10 Organic)")
refresh_follower_stats_btn.click(
fn=run_follower_stats_tab_display, inputs=[token_state],
outputs=[follower_stats_html, fs_plot_monthly_gains, fs_plot_seniority, fs_plot_industry],
show_progress="full"
)
if __name__ == "__main__":
if not os.environ.get(LINKEDIN_CLIENT_ID_ENV_VAR):
logging.warning(f"WARNING: '{LINKEDIN_CLIENT_ID_ENV_VAR}' environment variable not set.")
if not os.environ.get(BUBBLE_APP_NAME_ENV_VAR) or \
not os.environ.get(BUBBLE_API_KEY_PRIVATE_ENV_VAR) or \
not os.environ.get(BUBBLE_API_ENDPOINT_ENV_VAR):
logging.warning("WARNING: Bubble environment variables not fully set.")
try:
logging.info(f"Matplotlib version: {matplotlib.__version__} found. Backend: {matplotlib.get_backend()}")
except ImportError:
logging.error("Matplotlib is not installed. Plots will not be generated.")
app.launch(server_name="0.0.0.0", server_port=7860, debug=True)