GuglielmoTor commited on
Commit
58eb0f9
·
verified ·
1 Parent(s): 3b902c0

Update analytics_data_processing.py

Browse files
Files changed (1) hide show
  1. analytics_data_processing.py +24 -16
analytics_data_processing.py CHANGED
@@ -1,5 +1,5 @@
1
  import pandas as pd
2
- from datetime import datetime, timedelta
3
  import logging
4
 
5
  # Configure logging for this module
@@ -16,10 +16,14 @@ def filter_dataframe_by_date(df, date_column, start_date, end_date):
16
 
17
  df_copy = df.copy() # Work on a copy to avoid SettingWithCopyWarning
18
  try:
 
19
  if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
20
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
 
 
 
21
  except Exception as e:
22
- logging.error(f"Error converting date column '{date_column}' to datetime: {e}")
23
  return pd.DataFrame() # Return empty if conversion fails
24
 
25
  df_filtered = df_copy.dropna(subset=[date_column])
@@ -27,21 +31,17 @@ def filter_dataframe_by_date(df, date_column, start_date, end_date):
27
  logging.info(f"Filter by date: DataFrame became empty after dropping NaNs in date column '{date_column}'.")
28
  return pd.DataFrame()
29
 
30
- # Convert start_date and end_date to datetime objects if they are not None
31
- # Normalize to remove time part for consistent date comparisons if dates are just dates
32
  start_dt_obj = pd.to_datetime(start_date, errors='coerce').normalize() if start_date else None
33
  end_dt_obj = pd.to_datetime(end_date, errors='coerce').normalize() if end_date else None
34
 
35
 
36
  if start_dt_obj and end_dt_obj:
37
- # Ensure the DataFrame's date column is also normalized if it contains time
38
- df_filtered[date_column] = df_filtered[date_column].dt.normalize()
39
  return df_filtered[(df_filtered[date_column] >= start_dt_obj) & (df_filtered[date_column] <= end_dt_obj)]
40
  elif start_dt_obj:
41
- df_filtered[date_column] = df_filtered[date_column].dt.normalize()
42
  return df_filtered[df_filtered[date_column] >= start_dt_obj]
43
  elif end_dt_obj:
44
- df_filtered[date_column] = df_filtered[date_column].dt.normalize()
45
  return df_filtered[df_filtered[date_column] <= end_dt_obj]
46
  return df_filtered # No date filtering if neither start_date nor end_date is provided
47
 
@@ -62,20 +62,28 @@ def prepare_filtered_analytics_data(token_state_value, date_filter_option, custo
62
  date_column_mentions = token_state_value.get("config_date_col_mentions", "date")
63
 
64
  # Determine date range for filtering posts and mentions
65
- # Ensure end_dt is also normalized if it's datetime.now() for consistent comparison with normalized dates
66
- current_time_normalized = datetime.now().normalize()
 
 
67
  end_dt_filter = current_time_normalized
68
  start_dt_filter = None
69
 
70
  if date_filter_option == "Last 7 Days":
71
- start_dt_filter = current_time_normalized - timedelta(days=6) # Inclusive of start day
72
  elif date_filter_option == "Last 30 Days":
73
- start_dt_filter = current_time_normalized - timedelta(days=29) # Inclusive of start day
74
  elif date_filter_option == "Custom Range":
75
- start_dt_filter = pd.to_datetime(custom_start_date, errors='coerce').normalize() if custom_start_date else None
76
- # If custom_end_date is not provided, use current_time_normalized for end_dt_filter
77
- end_dt_filter = pd.to_datetime(custom_end_date, errors='coerce').normalize() if custom_end_date else current_time_normalized
78
- # "All Time" means start_dt_filter remains None, end_dt_filter effectively means up to now or unbounded if None
 
 
 
 
 
 
79
 
80
  logging.info(f"Date range for filtering: Start: {start_dt_filter}, End: {end_dt_filter}")
81
 
 
1
  import pandas as pd
2
+ from datetime import datetime, timedelta, time # Added time for min.time
3
  import logging
4
 
5
  # Configure logging for this module
 
16
 
17
  df_copy = df.copy() # Work on a copy to avoid SettingWithCopyWarning
18
  try:
19
+ # Convert the DataFrame's date column to pandas datetime objects first
20
  if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
21
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
22
+ # Normalize the DataFrame's date column to midnight (date part only)
23
+ df_copy[date_column] = df_copy[date_column].dt.normalize()
24
+
25
  except Exception as e:
26
+ logging.error(f"Error converting or normalizing date column '{date_column}' to datetime: {e}")
27
  return pd.DataFrame() # Return empty if conversion fails
28
 
29
  df_filtered = df_copy.dropna(subset=[date_column])
 
31
  logging.info(f"Filter by date: DataFrame became empty after dropping NaNs in date column '{date_column}'.")
32
  return pd.DataFrame()
33
 
34
+ # Convert start_date and end_date (which are expected to be datetime.datetime or None)
35
+ # to pandas Timestamps and normalize them for comparison
36
  start_dt_obj = pd.to_datetime(start_date, errors='coerce').normalize() if start_date else None
37
  end_dt_obj = pd.to_datetime(end_date, errors='coerce').normalize() if end_date else None
38
 
39
 
40
  if start_dt_obj and end_dt_obj:
 
 
41
  return df_filtered[(df_filtered[date_column] >= start_dt_obj) & (df_filtered[date_column] <= end_dt_obj)]
42
  elif start_dt_obj:
 
43
  return df_filtered[df_filtered[date_column] >= start_dt_obj]
44
  elif end_dt_obj:
 
45
  return df_filtered[df_filtered[date_column] <= end_dt_obj]
46
  return df_filtered # No date filtering if neither start_date nor end_date is provided
47
 
 
62
  date_column_mentions = token_state_value.get("config_date_col_mentions", "date")
63
 
64
  # Determine date range for filtering posts and mentions
65
+ # Normalize current time to midnight using datetime.replace
66
+ current_datetime_obj = datetime.now()
67
+ current_time_normalized = current_datetime_obj.replace(hour=0, minute=0, second=0, microsecond=0)
68
+
69
  end_dt_filter = current_time_normalized
70
  start_dt_filter = None
71
 
72
  if date_filter_option == "Last 7 Days":
73
+ start_dt_filter = current_time_normalized - timedelta(days=6)
74
  elif date_filter_option == "Last 30 Days":
75
+ start_dt_filter = current_time_normalized - timedelta(days=29)
76
  elif date_filter_option == "Custom Range":
77
+ # custom_start_date and custom_end_date are strings from gr.DateTime(type="string")
78
+ # Convert to datetime objects and then normalize
79
+ start_dt_filter_temp = pd.to_datetime(custom_start_date, errors='coerce')
80
+ start_dt_filter = start_dt_filter_temp.replace(hour=0, minute=0, second=0, microsecond=0) if pd.notna(start_dt_filter_temp) else None
81
+
82
+ end_dt_filter_temp = pd.to_datetime(custom_end_date, errors='coerce')
83
+ # If custom_end_date is not provided or invalid, use current_time_normalized
84
+ end_dt_filter = end_dt_filter_temp.replace(hour=0, minute=0, second=0, microsecond=0) if pd.notna(end_dt_filter_temp) else current_time_normalized
85
+
86
+ # "All Time" means start_dt_filter remains None, end_dt_filter effectively means up to now.
87
 
88
  logging.info(f"Date range for filtering: Start: {start_dt_filter}, End: {end_dt_filter}")
89