GuglielmoTor commited on
Commit
5c5c0fc
Β·
verified Β·
1 Parent(s): 340244c

Update state_manager.py

Browse files
Files changed (1) hide show
  1. state_manager.py +168 -141
state_manager.py CHANGED
@@ -1,63 +1,112 @@
1
  # state_manager.py
2
  """
3
  Manages the application state, including token processing,
4
- initial data loading from Bubble, and determining sync requirements.
 
5
  """
6
  import pandas as pd
7
  import logging
8
  import os
9
- from datetime import datetime, timedelta, timezone # Added timezone to ensure it's available
10
  import gradio as gr
11
 
12
  # Assuming Bubble_API_Calls contains fetch_linkedin_token_from_bubble and fetch_linkedin_posts_data_from_bubble
13
  from Bubble_API_Calls import (
14
  fetch_linkedin_token_from_bubble,
15
- fetch_linkedin_posts_data_from_bubble
16
  )
17
  # Assuming config.py contains all necessary constants
18
  from config import (
19
- DEFAULT_INITIAL_FETCH_COUNT, BUBBLE_POST_DATE_COLUMN_NAME, BUBBLE_POSTS_TABLE_NAME,
 
20
  BUBBLE_MENTIONS_TABLE_NAME, BUBBLE_MENTIONS_DATE_COLUMN_NAME,
21
  BUBBLE_FOLLOWER_STATS_TABLE_NAME, FOLLOWER_STATS_TYPE_COLUMN, FOLLOWER_STATS_CATEGORY_COLUMN,
22
- LINKEDIN_CLIENT_ID_ENV_VAR
 
 
 
23
  )
24
 
25
  def check_token_status(token_state):
26
  """Checks the status of the LinkedIn token."""
27
  return "βœ… Token available" if token_state and token_state.get("token") else "❌ Token not available"
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def process_and_store_bubble_token(url_user_token, org_urn, token_state):
30
  """
31
- Processes user token, fetches LinkedIn token, fetches existing Bubble data (posts, mentions, follower stats),
32
- and determines if an initial fetch or update is needed for each data type.
33
  Updates token state and UI for the sync button.
34
  """
35
  logging.info(f"Processing token with URL user token: '{url_user_token}', Org URN: '{org_urn}'")
36
 
37
- # Initialize or update state safely
38
- new_state = token_state.copy() if token_state else {
39
- "token": None, "client_id": None, "org_urn": None,
40
- "bubble_posts_df": pd.DataFrame(), "fetch_count_for_api": 0,
41
- "bubble_mentions_df": pd.DataFrame(),
42
- "bubble_follower_stats_df": pd.DataFrame(),
43
- "url_user_token_temp_storage": None
44
- }
45
  new_state.update({
 
 
46
  "org_urn": org_urn,
47
  "bubble_posts_df": new_state.get("bubble_posts_df", pd.DataFrame()),
48
- "fetch_count_for_api": new_state.get("fetch_count_for_api", 0),
49
  "bubble_mentions_df": new_state.get("bubble_mentions_df", pd.DataFrame()),
 
50
  "bubble_follower_stats_df": new_state.get("bubble_follower_stats_df", pd.DataFrame()),
 
 
51
  "url_user_token_temp_storage": url_user_token
52
  })
53
 
54
- button_update = gr.update(visible=False, interactive=False, value="πŸ”„ Sync LinkedIn Data") # Default to hidden
55
 
56
  client_id = os.environ.get(LINKEDIN_CLIENT_ID_ENV_VAR)
57
  new_state["client_id"] = client_id if client_id else "ENV VAR MISSING"
58
  if not client_id: logging.error(f"CRITICAL ERROR: '{LINKEDIN_CLIENT_ID_ENV_VAR}' environment variable not set.")
59
 
60
- # Fetch LinkedIn Token from Bubble
61
  if url_user_token and "not found" not in url_user_token and "Could not access" not in url_user_token:
62
  logging.info(f"Attempting to fetch LinkedIn token from Bubble with user token: {url_user_token}")
63
  try:
@@ -75,148 +124,126 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
75
  new_state["token"] = None
76
  logging.info("No valid URL user token provided for LinkedIn token fetch, or an error was indicated.")
77
 
78
- # Fetch existing data from Bubble if Org URN is available
79
  current_org_urn = new_state.get("org_urn")
80
  if current_org_urn:
81
- # Fetch Posts from Bubble
82
- logging.info(f"Attempting to fetch posts from Bubble for org_urn: {current_org_urn}")
83
- try:
84
- fetched_posts_df, error_message_posts = fetch_linkedin_posts_data_from_bubble(current_org_urn, BUBBLE_POSTS_TABLE_NAME)
85
- new_state["bubble_posts_df"] = pd.DataFrame() if error_message_posts or fetched_posts_df is None else fetched_posts_df
86
- if error_message_posts: logging.warning(f"Error fetching {BUBBLE_POSTS_TABLE_NAME} from Bubble: {error_message_posts}.")
87
- except Exception as e:
88
- logging.error(f"❌ Error fetching posts from Bubble: {e}.", exc_info=True)
89
- new_state["bubble_posts_df"] = pd.DataFrame()
90
-
91
- # Fetch Mentions from Bubble
92
- logging.info(f"Attempting to fetch mentions from Bubble for org_urn: {current_org_urn}")
93
- try:
94
- fetched_mentions_df, error_message_mentions = fetch_linkedin_posts_data_from_bubble(current_org_urn, BUBBLE_MENTIONS_TABLE_NAME)
95
- new_state["bubble_mentions_df"] = pd.DataFrame() if error_message_mentions or fetched_mentions_df is None else fetched_mentions_df
96
- if error_message_mentions: logging.warning(f"Error fetching {BUBBLE_MENTIONS_TABLE_NAME} from Bubble: {error_message_mentions}.")
97
- except Exception as e:
98
- logging.error(f"❌ Error fetching mentions from Bubble: {e}.", exc_info=True)
99
- new_state["bubble_mentions_df"] = pd.DataFrame()
100
-
101
- # Fetch Follower Stats from Bubble
102
- logging.info(f"Attempting to fetch follower stats from Bubble for org_urn: {current_org_urn}")
103
- try:
104
- fetched_follower_stats_df, error_message_fs = fetch_linkedin_posts_data_from_bubble(current_org_urn, BUBBLE_FOLLOWER_STATS_TABLE_NAME)
105
- new_state["bubble_follower_stats_df"] = pd.DataFrame() if error_message_fs or fetched_follower_stats_df is None else fetched_follower_stats_df
106
- if error_message_fs: logging.warning(f"Error fetching {BUBBLE_FOLLOWER_STATS_TABLE_NAME} from Bubble: {error_message_fs}.")
107
- except Exception as e:
108
- logging.error(f"❌ Error fetching follower stats from Bubble: {e}.", exc_info=True)
109
- new_state["bubble_follower_stats_df"] = pd.DataFrame()
110
  else:
111
  logging.warning("Org URN not available in state. Cannot fetch data from Bubble.")
112
- new_state["bubble_posts_df"] = pd.DataFrame()
113
- new_state["bubble_mentions_df"] = pd.DataFrame()
114
- new_state["bubble_follower_stats_df"] = pd.DataFrame()
 
 
 
115
 
116
- # Determine fetch count for Posts API
117
- if new_state["bubble_posts_df"].empty:
118
- logging.info(f"ℹ️ No posts in Bubble. Setting to fetch initial {DEFAULT_INITIAL_FETCH_COUNT} posts.")
 
119
  new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
120
  else:
121
- try:
122
- df_posts_check = new_state["bubble_posts_df"].copy()
123
- if BUBBLE_POST_DATE_COLUMN_NAME not in df_posts_check.columns or df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME].isnull().all():
124
- logging.warning(f"Date column '{BUBBLE_POST_DATE_COLUMN_NAME}' for posts missing or all null values. Triggering initial fetch.")
125
- new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
126
- else:
127
- df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME] = pd.to_datetime(df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME], errors='coerce', utc=True)
128
- last_post_date_utc = df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME].dropna().max()
129
- if pd.isna(last_post_date_utc):
130
- logging.warning("No valid post dates found after conversion. Triggering initial fetch.")
131
- new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
132
- else:
133
- days_diff = (pd.Timestamp('now', tz='UTC').normalize() - last_post_date_utc.normalize()).days
134
- if days_diff >= 7:
135
- new_state['fetch_count_for_api'] = max(1, days_diff // 7) * 10
136
- logging.info(f"Posts data is {days_diff} days old. Setting fetch count to {new_state['fetch_count_for_api']}.")
137
- else:
138
- new_state['fetch_count_for_api'] = 0
139
- logging.info("Posts data is recent. No new posts fetch needed based on date.")
140
- except Exception as e:
141
- logging.error(f"Error processing post dates: {e}. Defaulting to initial fetch for posts.", exc_info=True)
142
- new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
143
-
144
- # Determine if Mentions need sync
145
- mentions_need_sync = False
146
- if new_state["bubble_mentions_df"].empty:
147
- mentions_need_sync = True
148
- logging.info("Mentions need sync: Bubble mentions DF is empty.")
149
- else:
150
- if BUBBLE_MENTIONS_DATE_COLUMN_NAME not in new_state["bubble_mentions_df"].columns or \
151
- new_state["bubble_mentions_df"][BUBBLE_MENTIONS_DATE_COLUMN_NAME].isnull().all():
152
- mentions_need_sync = True
153
- logging.info(f"Mentions need sync: Date column '{BUBBLE_MENTIONS_DATE_COLUMN_NAME}' missing or all null values.")
154
  else:
155
- df_mentions_check = new_state["bubble_mentions_df"].copy()
156
- df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce', utc=True)
157
- last_mention_date_utc = df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME].dropna().max()
158
- if pd.isna(last_mention_date_utc) or \
159
- (pd.Timestamp('now', tz='UTC').normalize() - last_mention_date_utc.normalize()).days >= 7:
160
- mentions_need_sync = True
161
- logging.info(f"Mentions need sync: Last mention date {last_mention_date_utc} is old or invalid.")
162
- else:
163
- logging.info(f"Mentions up-to-date. Last mention: {last_mention_date_utc}")
164
-
165
- # Determine if Follower Stats need sync
166
- follower_stats_need_sync = False
167
- fs_df = new_state.get("bubble_follower_stats_df", pd.DataFrame())
168
- if fs_df.empty:
169
- follower_stats_need_sync = True
170
- logging.info("Follower stats need sync: Bubble follower stats DF is empty.")
171
  else:
172
- monthly_gains_df = fs_df[fs_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].copy()
173
- if monthly_gains_df.empty:
174
- follower_stats_need_sync = True
175
- logging.info("Follower stats need sync: No monthly gains data in Bubble.")
176
- elif FOLLOWER_STATS_CATEGORY_COLUMN not in monthly_gains_df.columns:
177
- follower_stats_need_sync = True
178
- logging.info(f"Follower stats need sync: Date column '{FOLLOWER_STATS_CATEGORY_COLUMN}' missing in monthly gains.")
179
  else:
180
- monthly_gains_df.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN] = pd.to_datetime(monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce').dt.normalize()
181
- last_gain_date = monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN].dropna().max()
182
- if pd.isna(last_gain_date):
183
- follower_stats_need_sync = True
184
- logging.info("Follower stats need sync: No valid dates in monthly gains after conversion.")
185
- else:
186
- if last_gain_date.tzinfo is None or last_gain_date.tzinfo.utcoffset(last_gain_date) is None:
187
- last_gain_date = last_gain_date.tz_localize('UTC') # Localize naive to UTC
188
- else:
189
- last_gain_date = last_gain_date.tz_convert('UTC') # Convert aware to UTC
190
-
191
- start_of_current_month = pd.Timestamp('now', tz='UTC').normalize().replace(day=1)
192
- if last_gain_date < start_of_current_month:
193
- follower_stats_need_sync = True
194
- logging.info(f"Follower stats need sync: Last gain date {last_gain_date} is before current month start {start_of_current_month}.")
195
- else:
196
- logging.info(f"Follower monthly gains up-to-date. Last gain recorded on: {last_gain_date}")
197
-
198
- if fs_df[fs_df[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly'].empty:
199
- follower_stats_need_sync = True
200
- logging.info("Follower stats need sync: Demographic data (non-monthly types) missing.")
201
-
202
- # Update Sync Button based on token and needed actions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  sync_actions = []
204
- if new_state['fetch_count_for_api'] > 0:
205
- sync_actions.append(f"{new_state['fetch_count_for_api']} Posts")
206
- if mentions_need_sync: # This flag is set based on data freshness
207
  sync_actions.append("Mentions")
208
- if follower_stats_need_sync: # This flag is set based on data freshness
209
  sync_actions.append("Follower Stats")
210
 
211
  if new_state["token"] and sync_actions:
212
  button_label = f"πŸ”„ Sync LinkedIn Data ({', '.join(sync_actions)})"
213
  button_update = gr.update(value=button_label, visible=True, interactive=True)
214
  elif new_state["token"]:
215
- button_label = "βœ… Data Up-to-Date"
216
  button_update = gr.update(value=button_label, visible=True, interactive=False)
217
- else:
218
- button_update = gr.update(visible=False, interactive=False)
 
219
 
220
  token_status_message = check_token_status(new_state)
221
- logging.info(f"Token processing complete. Status: {token_status_message}. Button: {button_update}. Sync actions: {sync_actions}")
222
  return token_status_message, new_state, button_update
 
1
  # state_manager.py
2
  """
3
  Manages the application state, including token processing,
4
+ initial data loading from Bubble, and determining sync requirements
5
+ based on the operations log.
6
  """
7
  import pandas as pd
8
  import logging
9
  import os
10
+ from datetime import timezone # Python's datetime, not to be confused with pandas'
11
  import gradio as gr
12
 
13
  # Assuming Bubble_API_Calls contains fetch_linkedin_token_from_bubble and fetch_linkedin_posts_data_from_bubble
14
  from Bubble_API_Calls import (
15
  fetch_linkedin_token_from_bubble,
16
+ fetch_linkedin_posts_data_from_bubble # This is generic, used for all tables
17
  )
18
  # Assuming config.py contains all necessary constants
19
  from config import (
20
+ DEFAULT_INITIAL_FETCH_COUNT, DEFAULT_POSTS_UPDATE_FETCH_COUNT,
21
+ BUBBLE_POST_DATE_COLUMN_NAME, BUBBLE_POSTS_TABLE_NAME,
22
  BUBBLE_MENTIONS_TABLE_NAME, BUBBLE_MENTIONS_DATE_COLUMN_NAME,
23
  BUBBLE_FOLLOWER_STATS_TABLE_NAME, FOLLOWER_STATS_TYPE_COLUMN, FOLLOWER_STATS_CATEGORY_COLUMN,
24
+ LINKEDIN_CLIENT_ID_ENV_VAR,
25
+ BUBBLE_OPERATIONS_LOG_TABLE_NAME, BUBBLE_OPERATIONS_LOG_DATE_COLUMN,
26
+ BUBBLE_OPERATIONS_LOG_SUBJECT_COLUMN, BUBBLE_OPERATIONS_LOG_ORG_URN_COLUMN,
27
+ LOG_SUBJECT_POSTS, LOG_SUBJECT_MENTIONS, LOG_SUBJECT_FOLLOWER_STATS
28
  )
29
 
30
  def check_token_status(token_state):
31
  """Checks the status of the LinkedIn token."""
32
  return "βœ… Token available" if token_state and token_state.get("token") else "❌ Token not available"
33
 
34
+ def get_last_sync_attempt_date(operations_log_df, subject, org_urn):
35
+ """
36
+ Retrieves the last sync attempt date for a given subject and organization URN
37
+ from the operations log DataFrame.
38
+
39
+ Args:
40
+ operations_log_df (pd.DataFrame): DataFrame containing operations log data.
41
+ Expected columns defined in config:
42
+ BUBBLE_OPERATIONS_LOG_DATE_COLUMN,
43
+ BUBBLE_OPERATIONS_LOG_SUBJECT_COLUMN,
44
+ BUBBLE_OPERATIONS_LOG_ORG_URN_COLUMN.
45
+ subject (str): The subject of the sync operation (e.g., "post", "mention").
46
+ org_urn (str): The organization URN.
47
+
48
+ Returns:
49
+ pd.Timestamp: The last sync attempt date (UTC), or pd.NaT if no relevant log entry is found.
50
+ """
51
+ if operations_log_df.empty or not org_urn:
52
+ return pd.NaT
53
+
54
+ # Ensure required columns exist
55
+ required_cols = [BUBBLE_OPERATIONS_LOG_DATE_COLUMN, BUBBLE_OPERATIONS_LOG_SUBJECT_COLUMN, BUBBLE_OPERATIONS_LOG_ORG_URN_COLUMN]
56
+ if not all(col in operations_log_df.columns for col in required_cols):
57
+ logging.warning(f"Operations log DF is missing one or more required columns: {required_cols}")
58
+ return pd.NaT
59
+
60
+ try:
61
+ # Filter for the specific subject and organization URN
62
+ # Ensure data types are consistent for comparison, especially org_urn
63
+ filtered_df = operations_log_df[
64
+ (operations_log_df[BUBBLE_OPERATIONS_LOG_SUBJECT_COLUMN].astype(str) == str(subject)) &
65
+ (operations_log_df[BUBBLE_OPERATIONS_LOG_ORG_URN_COLUMN].astype(str) == str(org_urn))
66
+ ]
67
+
68
+ if filtered_df.empty:
69
+ return pd.NaT
70
+
71
+ # Convert date column to datetime objects (UTC) and find the maximum (latest)
72
+ # The dates should ideally be stored in UTC or converted upon fetch.
73
+ # Assuming fetch_linkedin_posts_data_from_bubble handles date parsing correctly or provides strings.
74
+ dates = pd.to_datetime(filtered_df[BUBBLE_OPERATIONS_LOG_DATE_COLUMN], errors='coerce', utc=True)
75
+ return dates.dropna().max()
76
+ except Exception as e:
77
+ logging.error(f"Error processing operations log for last sync attempt date: {e}", exc_info=True)
78
+ return pd.NaT
79
+
80
+
81
  def process_and_store_bubble_token(url_user_token, org_urn, token_state):
82
  """
83
+ Processes user token, fetches LinkedIn token, fetches existing Bubble data (posts, mentions, follower stats, operations log),
84
+ and determines if a sync is needed for each data type based on the operations log.
85
  Updates token state and UI for the sync button.
86
  """
87
  logging.info(f"Processing token with URL user token: '{url_user_token}', Org URN: '{org_urn}'")
88
 
89
+ new_state = token_state.copy() if token_state else {}
 
 
 
 
 
 
 
90
  new_state.update({
91
+ "token": new_state.get("token"), # Preserve existing token if any
92
+ "client_id": new_state.get("client_id"),
93
  "org_urn": org_urn,
94
  "bubble_posts_df": new_state.get("bubble_posts_df", pd.DataFrame()),
95
+ "fetch_count_for_api": 0, # Will be determined based on log
96
  "bubble_mentions_df": new_state.get("bubble_mentions_df", pd.DataFrame()),
97
+ "mentions_should_sync_now": False, # Will be determined based on log
98
  "bubble_follower_stats_df": new_state.get("bubble_follower_stats_df", pd.DataFrame()),
99
+ "fs_should_sync_now": False, # Will be determined based on log
100
+ "bubble_operations_log_df": new_state.get("bubble_operations_log_df", pd.DataFrame()), # NEW
101
  "url_user_token_temp_storage": url_user_token
102
  })
103
 
104
+ button_update = gr.update(visible=False, interactive=False, value="πŸ”„ Sync LinkedIn Data")
105
 
106
  client_id = os.environ.get(LINKEDIN_CLIENT_ID_ENV_VAR)
107
  new_state["client_id"] = client_id if client_id else "ENV VAR MISSING"
108
  if not client_id: logging.error(f"CRITICAL ERROR: '{LINKEDIN_CLIENT_ID_ENV_VAR}' environment variable not set.")
109
 
 
110
  if url_user_token and "not found" not in url_user_token and "Could not access" not in url_user_token:
111
  logging.info(f"Attempting to fetch LinkedIn token from Bubble with user token: {url_user_token}")
112
  try:
 
124
  new_state["token"] = None
125
  logging.info("No valid URL user token provided for LinkedIn token fetch, or an error was indicated.")
126
 
 
127
  current_org_urn = new_state.get("org_urn")
128
  if current_org_urn:
129
+ data_tables_to_fetch = {
130
+ "bubble_posts_df": BUBBLE_POSTS_TABLE_NAME,
131
+ "bubble_mentions_df": BUBBLE_MENTIONS_TABLE_NAME,
132
+ "bubble_follower_stats_df": BUBBLE_FOLLOWER_STATS_TABLE_NAME,
133
+ "bubble_operations_log_df": BUBBLE_OPERATIONS_LOG_TABLE_NAME # NEW
134
+ }
135
+ for state_key, table_name in data_tables_to_fetch.items():
136
+ logging.info(f"Attempting to fetch {table_name} from Bubble for org_urn: {current_org_urn}")
137
+ try:
138
+ fetched_df, error_message = fetch_linkedin_posts_data_from_bubble(current_org_urn, table_name)
139
+ new_state[state_key] = pd.DataFrame() if error_message or fetched_df is None else fetched_df
140
+ if error_message: logging.warning(f"Error fetching {table_name} from Bubble: {error_message}.")
141
+ # Ensure date column in operations log is parsed correctly if it's fetched as string
142
+ if state_key == "bubble_operations_log_df" and not new_state[state_key].empty and BUBBLE_OPERATIONS_LOG_DATE_COLUMN in new_state[state_key].columns:
143
+ new_state[state_key][BUBBLE_OPERATIONS_LOG_DATE_COLUMN] = pd.to_datetime(new_state[state_key][BUBBLE_OPERATIONS_LOG_DATE_COLUMN], errors='coerce', utc=True)
144
+
145
+ except Exception as e:
146
+ logging.error(f"❌ Error fetching {table_name} from Bubble: {e}.", exc_info=True)
147
+ new_state[state_key] = pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
148
  else:
149
  logging.warning("Org URN not available in state. Cannot fetch data from Bubble.")
150
+ for key in ["bubble_posts_df", "bubble_mentions_df", "bubble_follower_stats_df", "bubble_operations_log_df"]:
151
+ new_state[key] = pd.DataFrame()
152
+
153
+ # --- Determine sync needs based on Operations Log ---
154
+ ops_log_df = new_state.get("bubble_operations_log_df", pd.DataFrame())
155
+ now_utc = pd.Timestamp.now(tz='UTC')
156
 
157
+ # 1. Posts Sync Logic
158
+ last_post_sync_attempt = get_last_sync_attempt_date(ops_log_df, LOG_SUBJECT_POSTS, current_org_urn)
159
+ if pd.isna(last_post_sync_attempt):
160
+ logging.info(f"ℹ️ No previous '{LOG_SUBJECT_POSTS}' sync attempt logged. Setting to fetch initial {DEFAULT_INITIAL_FETCH_COUNT} posts.")
161
  new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
162
  else:
163
+ days_since_last_attempt = (now_utc.normalize() - last_post_sync_attempt.normalize()).days
164
+ if days_since_last_attempt >= 7:
165
+ # Dynamic fetch count based on how many weeks have passed, or a fixed update count
166
+ # For simplicity, using DEFAULT_POSTS_UPDATE_FETCH_COUNT
167
+ new_state['fetch_count_for_api'] = DEFAULT_POSTS_UPDATE_FETCH_COUNT
168
+ logging.info(f"Posts sync attempt is {days_since_last_attempt} days old. Setting fetch count to {new_state['fetch_count_for_api']}.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  else:
170
+ new_state['fetch_count_for_api'] = 0
171
+ logging.info(f"Posts sync attempt was recent ({days_since_last_attempt} days ago). No new posts fetch scheduled based on log.")
172
+
173
+ # 2. Mentions Sync Logic
174
+ last_mention_sync_attempt = get_last_sync_attempt_date(ops_log_df, LOG_SUBJECT_MENTIONS, current_org_urn)
175
+ if pd.isna(last_mention_sync_attempt):
176
+ new_state['mentions_should_sync_now'] = True
177
+ logging.info(f"Mentions sync needed: No previous '{LOG_SUBJECT_MENTIONS}' sync attempt logged.")
 
 
 
 
 
 
 
 
178
  else:
179
+ days_since_last_attempt_mentions = (now_utc.normalize() - last_mention_sync_attempt.normalize()).days
180
+ if days_since_last_attempt_mentions >= 7:
181
+ new_state['mentions_should_sync_now'] = True
182
+ logging.info(f"Mentions sync needed: Last attempt was {days_since_last_attempt_mentions} days ago.")
 
 
 
183
  else:
184
+ new_state['mentions_should_sync_now'] = False
185
+ logging.info(f"Mentions sync attempt was recent ({days_since_last_attempt_mentions} days ago). Sync not scheduled.")
186
+
187
+ # 3. Follower Stats Sync Logic
188
+ last_fs_sync_attempt = get_last_sync_attempt_date(ops_log_df, LOG_SUBJECT_FOLLOWER_STATS, current_org_urn)
189
+ fs_df_current = new_state.get("bubble_follower_stats_df", pd.DataFrame())
190
+
191
+ demographics_missing = False
192
+ if fs_df_current.empty:
193
+ demographics_missing = True # If entire table is empty, demographics are missing
194
+ logging.info("Follower stats: Main table is empty, considering demographics missing.")
195
+ elif FOLLOWER_STATS_TYPE_COLUMN not in fs_df_current.columns:
196
+ demographics_missing = True # If type column is missing, cannot check demographics
197
+ logging.info(f"Follower stats: Column '{FOLLOWER_STATS_TYPE_COLUMN}' is missing, considering demographics missing.")
198
+ else:
199
+ # Check if any rows exist that are NOT 'follower_gains_monthly'
200
+ if fs_df_current[fs_df_current[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly'].empty:
201
+ demographics_missing = True
202
+ logging.info("Follower stats: Demographic data (non-monthly types) is missing.")
203
+
204
+ time_based_need_fs = False
205
+ if pd.isna(last_fs_sync_attempt):
206
+ time_based_need_fs = True
207
+ logging.info(f"Follower stats sync needed: No previous '{LOG_SUBJECT_FOLLOWER_STATS}' sync attempt logged.")
208
+ else:
209
+ start_of_current_month = now_utc.normalize().replace(day=1)
210
+ # Ensure last_fs_sync_attempt is timezone-aware (should be by get_last_sync_attempt_date)
211
+ if last_fs_sync_attempt.tzinfo is None: # Should not happen if get_last_sync_attempt_date works
212
+ last_fs_sync_attempt = last_fs_sync_attempt.tz_localize('UTC')
213
+
214
+ if last_fs_sync_attempt < start_of_current_month:
215
+ time_based_need_fs = True
216
+ logging.info(f"Follower stats sync needed: Last attempt {last_fs_sync_attempt.date()} is before current month start {start_of_current_month.date()}.")
217
+
218
+ if time_based_need_fs or demographics_missing:
219
+ new_state['fs_should_sync_now'] = True
220
+ if demographics_missing and not time_based_need_fs:
221
+ logging.info("Follower stats sync triggered: Demographic data missing, even if last sync attempt is recent.")
222
+ elif time_based_need_fs:
223
+ logging.info("Follower stats sync triggered by schedule.")
224
+ else:
225
+ new_state['fs_should_sync_now'] = False
226
+ logging.info("Follower stats sync not currently required by schedule or data presence.")
227
+
228
+ # Update Sync Button based on determined needs
229
  sync_actions = []
230
+ if new_state.get('fetch_count_for_api', 0) > 0:
231
+ sync_actions.append(f"Posts ({new_state['fetch_count_for_api']})")
232
+ if new_state.get('mentions_should_sync_now', False):
233
  sync_actions.append("Mentions")
234
+ if new_state.get('fs_should_sync_now', False):
235
  sync_actions.append("Follower Stats")
236
 
237
  if new_state["token"] and sync_actions:
238
  button_label = f"πŸ”„ Sync LinkedIn Data ({', '.join(sync_actions)})"
239
  button_update = gr.update(value=button_label, visible=True, interactive=True)
240
  elif new_state["token"]:
241
+ button_label = "βœ… Data Up-to-Date (based on sync log)"
242
  button_update = gr.update(value=button_label, visible=True, interactive=False)
243
+ else: # No token
244
+ button_update = gr.update(visible=False, interactive=False, value="πŸ”„ Sync LinkedIn Data")
245
+
246
 
247
  token_status_message = check_token_status(new_state)
248
+ logging.info(f"Token processing complete. Status: {token_status_message}. Button: {button_update.get('value', 'N/A') if button_update else 'N/A'}. Sync actions needed: {sync_actions}")
249
  return token_status_message, new_state, button_update