Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on about 1 month ago

Commit

489cb0a

verified ·

1 Parent(s): dea52fa

Update eb_agent_module.py

Browse files

Files changed (1) hide show

eb_agent_module.py +26 -3

eb_agent_module.py CHANGED Viewed

@@ -122,23 +122,40 @@ def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
     if df.empty:
         return f"DataFrame '{df_name}': Empty\n"
     schema_parts = [f"DataFrame '{df_name}':"]
     schema_parts.append(f"  Shape: {df.shape}")
     schema_parts.append("  Columns:")
-    for col in df.columns:
         col_type = str(df[col].dtype)
         null_count = df[col].isnull().sum()
         unique_count = df[col].nunique()
         schema_parts.append(f"    - {col} (Type: {col_type}, Nulls: {null_count}/{len(df)}, Uniques: {unique_count})")
-    if not df.empty:
         schema_parts.append("  Sample Data (first 2 rows):")
         try:
-            sample_df_str = df.head(2).to_string(index=True, max_colwidth=50) # Show index for context
             indented_sample_df = "\n".join(["    " + line for line in sample_df_str.split('\n')])
             schema_parts.append(indented_sample_df)
         except Exception as e:
             schema_parts.append(f"    Could not generate sample data: {e}")
     return "\n".join(schema_parts) + "\n"
@@ -439,6 +456,12 @@ class EmployerBrandingAgent:
         - **Use the exact DataFrame names** from the 'Available DataFrame Schemas' section for internal processing
         - **Handle data issues gracefully**: If data is missing or incomplete, explain limitations in business terms
         - **Create visual summaries**: When possible, describe trends and patterns in easy-to-understand formats
         ## Response Structure Guidelines:
         1. **Executive Summary**: Start with 2-3 key takeaways

     if df.empty:
         return f"DataFrame '{df_name}': Empty\n"
+    # Define system columns to exclude from schema representation
+    system_columns = ['Created Date', 'Modified Date', '_id']
+    # Filter out system columns for schema representation
+    filtered_columns = [col for col in df.columns if col not in system_columns]
     schema_parts = [f"DataFrame '{df_name}':"]
     schema_parts.append(f"  Shape: {df.shape}")
     schema_parts.append("  Columns:")
+    # Show only filtered columns in schema
+    for col in filtered_columns:
         col_type = str(df[col].dtype)
         null_count = df[col].isnull().sum()
         unique_count = df[col].nunique()
         schema_parts.append(f"    - {col} (Type: {col_type}, Nulls: {null_count}/{len(df)}, Uniques: {unique_count})")
+    # Add note if system columns were excluded
+    excluded_columns = [col for col in df.columns if col in system_columns]
+    if excluded_columns:
+        schema_parts.append(f"  Note: System columns excluded from display: {', '.join(excluded_columns)}")
+    if not df.empty and filtered_columns:
         schema_parts.append("  Sample Data (first 2 rows):")
         try:
+            # Create sample with only filtered columns
+            sample_df = df[filtered_columns].head(2)
+            sample_df_str = sample_df.to_string(index=True, max_colwidth=50)
             indented_sample_df = "\n".join(["    " + line for line in sample_df_str.split('\n')])
             schema_parts.append(indented_sample_df)
         except Exception as e:
             schema_parts.append(f"    Could not generate sample data: {e}")
+    elif not df.empty and not filtered_columns:
+        schema_parts.append("  Sample Data: Only system columns present, no business data to display")
     return "\n".join(schema_parts) + "\n"
         - **Use the exact DataFrame names** from the 'Available DataFrame Schemas' section for internal processing
         - **Handle data issues gracefully**: If data is missing or incomplete, explain limitations in business terms
         - **Create visual summaries**: When possible, describe trends and patterns in easy-to-understand formats
+        - **Specific instructions for `follower_stats` DataFrame (if available):**
+            - When asked about the amount of followers or follower gains based on `follower_stats`:
+                - The relevant date information (formatted as strings) is typically found in the `category_name` column.
+                - To get the monthly follower gains, filter the data where the `follower_count_type` column is equal to `"follower_gains_monthly"`.
+                - The actual numeric follower count for that period will be in another column (e.g., 'follower_count_organic' or 'follower_count_paid')
         ## Response Structure Guidelines:
         1. **Executive Summary**: Start with 2-3 key takeaways