Spaces:
Running
Running
Update eb_agent_module.py
Browse files- eb_agent_module.py +26 -3
eb_agent_module.py
CHANGED
@@ -122,23 +122,40 @@ def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
|
|
122 |
if df.empty:
|
123 |
return f"DataFrame '{df_name}': Empty\n"
|
124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
schema_parts = [f"DataFrame '{df_name}':"]
|
126 |
schema_parts.append(f" Shape: {df.shape}")
|
127 |
schema_parts.append(" Columns:")
|
128 |
-
|
|
|
|
|
129 |
col_type = str(df[col].dtype)
|
130 |
null_count = df[col].isnull().sum()
|
131 |
unique_count = df[col].nunique()
|
132 |
schema_parts.append(f" - {col} (Type: {col_type}, Nulls: {null_count}/{len(df)}, Uniques: {unique_count})")
|
133 |
|
134 |
-
if
|
|
|
|
|
|
|
|
|
|
|
135 |
schema_parts.append(" Sample Data (first 2 rows):")
|
136 |
try:
|
137 |
-
|
|
|
|
|
138 |
indented_sample_df = "\n".join([" " + line for line in sample_df_str.split('\n')])
|
139 |
schema_parts.append(indented_sample_df)
|
140 |
except Exception as e:
|
141 |
schema_parts.append(f" Could not generate sample data: {e}")
|
|
|
|
|
142 |
|
143 |
return "\n".join(schema_parts) + "\n"
|
144 |
|
@@ -439,6 +456,12 @@ class EmployerBrandingAgent:
|
|
439 |
- **Use the exact DataFrame names** from the 'Available DataFrame Schemas' section for internal processing
|
440 |
- **Handle data issues gracefully**: If data is missing or incomplete, explain limitations in business terms
|
441 |
- **Create visual summaries**: When possible, describe trends and patterns in easy-to-understand formats
|
|
|
|
|
|
|
|
|
|
|
|
|
442 |
|
443 |
## Response Structure Guidelines:
|
444 |
1. **Executive Summary**: Start with 2-3 key takeaways
|
|
|
122 |
if df.empty:
|
123 |
return f"DataFrame '{df_name}': Empty\n"
|
124 |
|
125 |
+
# Define system columns to exclude from schema representation
|
126 |
+
system_columns = ['Created Date', 'Modified Date', '_id']
|
127 |
+
|
128 |
+
# Filter out system columns for schema representation
|
129 |
+
filtered_columns = [col for col in df.columns if col not in system_columns]
|
130 |
+
|
131 |
schema_parts = [f"DataFrame '{df_name}':"]
|
132 |
schema_parts.append(f" Shape: {df.shape}")
|
133 |
schema_parts.append(" Columns:")
|
134 |
+
|
135 |
+
# Show only filtered columns in schema
|
136 |
+
for col in filtered_columns:
|
137 |
col_type = str(df[col].dtype)
|
138 |
null_count = df[col].isnull().sum()
|
139 |
unique_count = df[col].nunique()
|
140 |
schema_parts.append(f" - {col} (Type: {col_type}, Nulls: {null_count}/{len(df)}, Uniques: {unique_count})")
|
141 |
|
142 |
+
# Add note if system columns were excluded
|
143 |
+
excluded_columns = [col for col in df.columns if col in system_columns]
|
144 |
+
if excluded_columns:
|
145 |
+
schema_parts.append(f" Note: System columns excluded from display: {', '.join(excluded_columns)}")
|
146 |
+
|
147 |
+
if not df.empty and filtered_columns:
|
148 |
schema_parts.append(" Sample Data (first 2 rows):")
|
149 |
try:
|
150 |
+
# Create sample with only filtered columns
|
151 |
+
sample_df = df[filtered_columns].head(2)
|
152 |
+
sample_df_str = sample_df.to_string(index=True, max_colwidth=50)
|
153 |
indented_sample_df = "\n".join([" " + line for line in sample_df_str.split('\n')])
|
154 |
schema_parts.append(indented_sample_df)
|
155 |
except Exception as e:
|
156 |
schema_parts.append(f" Could not generate sample data: {e}")
|
157 |
+
elif not df.empty and not filtered_columns:
|
158 |
+
schema_parts.append(" Sample Data: Only system columns present, no business data to display")
|
159 |
|
160 |
return "\n".join(schema_parts) + "\n"
|
161 |
|
|
|
456 |
- **Use the exact DataFrame names** from the 'Available DataFrame Schemas' section for internal processing
|
457 |
- **Handle data issues gracefully**: If data is missing or incomplete, explain limitations in business terms
|
458 |
- **Create visual summaries**: When possible, describe trends and patterns in easy-to-understand formats
|
459 |
+
- **Specific instructions for `follower_stats` DataFrame (if available):**
|
460 |
+
- When asked about the amount of followers or follower gains based on `follower_stats`:
|
461 |
+
- The relevant date information (formatted as strings) is typically found in the `category_name` column.
|
462 |
+
- To get the monthly follower gains, filter the data where the `follower_count_type` column is equal to `"follower_gains_monthly"`.
|
463 |
+
- The actual numeric follower count for that period will be in another column (e.g., 'follower_count_organic' or 'follower_count_paid')
|
464 |
+
|
465 |
|
466 |
## Response Structure Guidelines:
|
467 |
1. **Executive Summary**: Start with 2-3 key takeaways
|