GuglielmoTor commited on
Commit
489cb0a
·
verified ·
1 Parent(s): dea52fa

Update eb_agent_module.py

Browse files
Files changed (1) hide show
  1. eb_agent_module.py +26 -3
eb_agent_module.py CHANGED
@@ -122,23 +122,40 @@ def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
122
  if df.empty:
123
  return f"DataFrame '{df_name}': Empty\n"
124
 
 
 
 
 
 
 
125
  schema_parts = [f"DataFrame '{df_name}':"]
126
  schema_parts.append(f" Shape: {df.shape}")
127
  schema_parts.append(" Columns:")
128
- for col in df.columns:
 
 
129
  col_type = str(df[col].dtype)
130
  null_count = df[col].isnull().sum()
131
  unique_count = df[col].nunique()
132
  schema_parts.append(f" - {col} (Type: {col_type}, Nulls: {null_count}/{len(df)}, Uniques: {unique_count})")
133
 
134
- if not df.empty:
 
 
 
 
 
135
  schema_parts.append(" Sample Data (first 2 rows):")
136
  try:
137
- sample_df_str = df.head(2).to_string(index=True, max_colwidth=50) # Show index for context
 
 
138
  indented_sample_df = "\n".join([" " + line for line in sample_df_str.split('\n')])
139
  schema_parts.append(indented_sample_df)
140
  except Exception as e:
141
  schema_parts.append(f" Could not generate sample data: {e}")
 
 
142
 
143
  return "\n".join(schema_parts) + "\n"
144
 
@@ -439,6 +456,12 @@ class EmployerBrandingAgent:
439
  - **Use the exact DataFrame names** from the 'Available DataFrame Schemas' section for internal processing
440
  - **Handle data issues gracefully**: If data is missing or incomplete, explain limitations in business terms
441
  - **Create visual summaries**: When possible, describe trends and patterns in easy-to-understand formats
 
 
 
 
 
 
442
 
443
  ## Response Structure Guidelines:
444
  1. **Executive Summary**: Start with 2-3 key takeaways
 
122
  if df.empty:
123
  return f"DataFrame '{df_name}': Empty\n"
124
 
125
+ # Define system columns to exclude from schema representation
126
+ system_columns = ['Created Date', 'Modified Date', '_id']
127
+
128
+ # Filter out system columns for schema representation
129
+ filtered_columns = [col for col in df.columns if col not in system_columns]
130
+
131
  schema_parts = [f"DataFrame '{df_name}':"]
132
  schema_parts.append(f" Shape: {df.shape}")
133
  schema_parts.append(" Columns:")
134
+
135
+ # Show only filtered columns in schema
136
+ for col in filtered_columns:
137
  col_type = str(df[col].dtype)
138
  null_count = df[col].isnull().sum()
139
  unique_count = df[col].nunique()
140
  schema_parts.append(f" - {col} (Type: {col_type}, Nulls: {null_count}/{len(df)}, Uniques: {unique_count})")
141
 
142
+ # Add note if system columns were excluded
143
+ excluded_columns = [col for col in df.columns if col in system_columns]
144
+ if excluded_columns:
145
+ schema_parts.append(f" Note: System columns excluded from display: {', '.join(excluded_columns)}")
146
+
147
+ if not df.empty and filtered_columns:
148
  schema_parts.append(" Sample Data (first 2 rows):")
149
  try:
150
+ # Create sample with only filtered columns
151
+ sample_df = df[filtered_columns].head(2)
152
+ sample_df_str = sample_df.to_string(index=True, max_colwidth=50)
153
  indented_sample_df = "\n".join([" " + line for line in sample_df_str.split('\n')])
154
  schema_parts.append(indented_sample_df)
155
  except Exception as e:
156
  schema_parts.append(f" Could not generate sample data: {e}")
157
+ elif not df.empty and not filtered_columns:
158
+ schema_parts.append(" Sample Data: Only system columns present, no business data to display")
159
 
160
  return "\n".join(schema_parts) + "\n"
161
 
 
456
  - **Use the exact DataFrame names** from the 'Available DataFrame Schemas' section for internal processing
457
  - **Handle data issues gracefully**: If data is missing or incomplete, explain limitations in business terms
458
  - **Create visual summaries**: When possible, describe trends and patterns in easy-to-understand formats
459
+ - **Specific instructions for `follower_stats` DataFrame (if available):**
460
+ - When asked about the amount of followers or follower gains based on `follower_stats`:
461
+ - The relevant date information (formatted as strings) is typically found in the `category_name` column.
462
+ - To get the monthly follower gains, filter the data where the `follower_count_type` column is equal to `"follower_gains_monthly"`.
463
+ - The actual numeric follower count for that period will be in another column (e.g., 'follower_count_organic' or 'follower_count_paid')
464
+
465
 
466
  ## Response Structure Guidelines:
467
  1. **Executive Summary**: Start with 2-3 key takeaways