Spaces:

chenguittiMaroua
/

asm-app

Sleeping

App Files Files Community

chenguittiMaroua commited on Apr 11

Commit

5b32457

verified ·

1 Parent(s): 9333c98

Update main.py

Browse files

Files changed (1) hide show

main.py +93 -161

main.py CHANGED Viewed

@@ -432,78 +432,70 @@ def generate_visualization_code(df: pd.DataFrame, request: VisualizationRequest)
     )
 from typing import Optional
-def interpret_natural_language(prompt: str, df_columns: list) -> Optional[VisualizationRequest]:
-    """Convert natural language prompt to visualization parameters with enhanced parsing"""
     if not prompt or not df_columns:
         return None
-    # Normalize the prompt and columns
     prompt = prompt.lower().strip()
-    normalized_columns = [col.lower().strip() for col in df_columns]
-    # Initialize default values
     chart_type = "bar"
     x_col = None
     y_col = None
     hue_col = None
-    title = f"Visualization of {prompt[:50]}"  # Default title
-    # Common chart type detection
-    chart_keywords = {
-        "line": ["line", "trend", "over time"],
-        "bar": ["bar", "compare", "comparison"],
-        "scatter": ["scatter", "correlation", "relationship"],
-        "histogram": ["histogram", "distribution", "frequency"],
-        "boxplot": ["box", "quartile", "distribution"],
-        "heatmap": ["heatmap", "correlation", "matrix"]
-    }
-    # Detect chart type
-    for chart, keywords in chart_keywords.items():
-        if any(keyword in prompt for keyword in keywords):
-            chart_type = chart
-            break
-    # Column detection with improved matching
-    for col in df_columns:
-        col_lower = col.lower()
-        # Check if column name appears in prompt
         if col_lower in prompt:
-            # Look for context clues about the column's role
-            if not x_col and ("by " + col_lower in prompt or
-                            "for " + col_lower in prompt or
-                            "across " + col_lower in prompt):
                 x_col = col
-            elif not y_col and ("of " + col_lower in prompt or
-                              "show " + col_lower in prompt or
-                              "plot " + col_lower in prompt):
                 y_col = col
-            elif not hue_col and ("color by " + col_lower in prompt or
-                                "group by " + col_lower in prompt):
                 hue_col = col
-    # Fallback logic if columns not detected
-    if not x_col and len(df_columns) > 0:
         x_col = df_columns[0]  # First column as default x-axis
-    if not y_col and len(df_columns) > 1:
-        # Try to find a numeric column for y-axis
-        numeric_cols = [col for col in df_columns if pd.api.types.is_numeric_dtype(df[col])]
-        y_col = numeric_cols[0] if numeric_cols else df_columns[1]
-    # Special handling for certain chart types
     if chart_type == "heatmap":
-        x_col = None
         y_col = None
-        hue_col = None
     return VisualizationRequest(
         chart_type=chart_type,
         x_column=x_col,
         y_column=y_col,
         hue_column=hue_col,
-        title=title,
         style="seaborn-v0_8"
     )
@@ -839,142 +831,82 @@ from fastapi.responses import FileResponse  # Add this import at the top
 # [Previous imports remain exactly the same...]
 @app.post("/visualize/natural")
-@limiter.limit("5/minute")
 async def visualize_with_natural_language(
-    request: Request,
     file: UploadFile = File(...),
     prompt: str = Form(""),
     style: str = Form("seaborn-v0_8")
 ):
     try:
-        # Debugging: Log incoming request
-        logger.info(f"Incoming request with file: {file.filename if file else 'None'}")
-        # Verify file exists and has content
-        if not file or not file.filename:
-            logger.error("No file uploaded")
-            raise HTTPException(400, "Please upload an Excel file")
-        # Read file content
         content = await file.read()
-        if not content:
-            logger.error("Empty file uploaded")
-            raise HTTPException(400, "The uploaded file is empty")
-        # Verify Excel file extension
-        file_ext = file.filename.split('.')[-1].lower()
-        if file_ext not in {"xlsx", "xls"}:
-            logger.error(f"Unsupported file type: {file_ext}")
-            raise HTTPException(400, "Only Excel files (.xlsx, .xls) are supported")
-        # Read Excel file with multiple engine fallbacks
-        try:
-            df = pd.read_excel(BytesIO(content), engine='openpyxl')
-        except Exception as e:
-            logger.warning(f"Openpyxl failed, trying xlrd: {str(e)}")
-            try:
-                df = pd.read_excel(BytesIO(content), engine='xlrd')
-            except Exception as e:
-                logger.error(f"Excel read failed: {str(e)}")
-                raise HTTPException(400, "Failed to read Excel file - may be corrupt or password protected")
         if df.empty:
-            logger.error("Empty DataFrame after reading Excel")
             raise HTTPException(400, "Excel file contains no data")
         # Generate prompt if empty
         if not prompt.strip():
-            prompt = generate_smart_prompt(df)
-            logger.info(f"Auto-generated prompt: {prompt}")
-        # Create visualization request
-        try:
-            vis_request = interpret_natural_language(prompt, df.columns.tolist())
-            if not vis_request:
-                raise ValueError("Could not interpret visualization request")
-            # Validate columns exist in DataFrame
-            if vis_request.x_column and vis_request.x_column not in df.columns:
-                raise ValueError(f"X-axis column '{vis_request.x_column}' not found")
-            if vis_request.y_column and vis_request.y_column not in df.columns:
-                raise ValueError(f"Y-axis column '{vis_request.y_column}' not found")
-            if vis_request.hue_column and vis_request.hue_column not in df.columns:
-                raise ValueError(f"Hue column '{vis_request.hue_column}' not found")
-        except ValueError as e:
-            logger.error(f"Visualization interpretation failed: {str(e)}")
-            raise HTTPException(
-                status_code=400,
-                detail={
-                    "error": "Could not create visualization request",
-                    "message": str(e),
-                    "available_columns": list(df.columns),
-                    "your_prompt": prompt
-                }
-            )
         vis_request.style = style
         # Generate visualization
-        try:
-            visualization_code = generate_dynamic_visualization_code(df, vis_request)
-            plt.style.use(vis_request.style)
-            fig, ax = plt.subplots(figsize=(10, 6))
-            # Safe execution with limited globals
-            exec_globals = {
-                'plt': plt,
-                'sns': sns,
-                'df': df,
-                'np': np,
-                'pd': pd
-            }
-            exec(visualization_code, exec_globals)
-            buffer = BytesIO()
-            plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight')
-            plt.close()
-            buffer.seek(0)
-            return {
-                "status": "success",
-                "image_data": base64.b64encode(buffer.getvalue()).decode('utf-8'),
-                "code": visualization_code,
-                "columns": list(df.columns),
-                "prompt": prompt,
-                "chart_type": vis_request.chart_type,
-                "x_column": vis_request.x_column,
-                "y_column": vis_request.y_column,
-                "hue_column": vis_request.hue_column
             }
-        except Exception as e:
-            logger.error(f"Visualization generation failed: {str(e)}")
-            raise HTTPException(
-                status_code=400,
-                detail={
-                    "error": "Visualization generation failed",
-                    "message": str(e),
-                    "suggestion": "Try modifying your prompt or using different columns"
-                }
-            )
-    except HTTPException as he:
-        raise
     except Exception as e:
-        logger.error(f"Unexpected error: {traceback.format_exc()}")
-        raise HTTPException(
-            status_code=500,
-            detail={
-                "error": "Internal server error",
-                "message": str(e)
-            }
-        )

     )
 from typing import Optional
+ddef interpret_natural_language(prompt: str, df_columns: list) -> Optional[VisualizationRequest]:
+    """Fully dynamic prompt interpretation that works with any Excel columns"""
     if not prompt or not df_columns:
         return None
     prompt = prompt.lower().strip()
+    col_names = [col.lower() for col in df_columns]
+    # Initialize with defaults
     chart_type = "bar"
     x_col = None
     y_col = None
     hue_col = None
+    # Dynamic chart type detection
+    if any(word in prompt for word in ["line", "trend", "over time"]):
+        chart_type = "line"
+    elif any(word in prompt for word in ["scatter", "relationship", "correlat"]):
+        chart_type = "scatter"
+    elif any(word in prompt for word in ["histogram", "distribut", "frequenc"]):
+        chart_type = "histogram"
+    elif any(word in prompt for word in ["box", "quartile"]):
+        chart_type = "boxplot"
+    elif any(word in prompt for word in ["heatmap", "matrix"]):
+        chart_type = "heatmap"
+    # Dynamic column assignment - looks for column names mentioned in prompt
+    for col, col_lower in zip(df_columns, col_names):
         if col_lower in prompt:
+            # First mentioned column becomes x-axis
+            if not x_col:
                 x_col = col
+            # Second mentioned becomes y-axis (except for histograms)
+            elif not y_col and chart_type != "histogram":
                 y_col = col
+            # Third mentioned could be hue
+            elif not hue_col and chart_type in ["bar", "scatter", "line"]:
                 hue_col = col
+    # Smart defaults when columns aren't specified
+    if not x_col and df_columns:
         x_col = df_columns[0]  # First column as default x-axis
+    if not y_col and len(df_columns) > 1 and chart_type != "histogram":
+        y_col = df_columns[1]  # Second column as default y-axis
+    # Special handling for specific chart types
     if chart_type == "heatmap":
+        return VisualizationRequest(
+            chart_type="heatmap",
+            title=f"Heatmap: {prompt[:30]}...",
+            style="seaborn-v0_8"
+        )
+    if chart_type == "histogram" and y_col:
+        # Histograms only need x-axis
         y_col = None
     return VisualizationRequest(
         chart_type=chart_type,
         x_column=x_col,
         y_column=y_col,
         hue_column=hue_col,
+        title=f"{chart_type.title()} of {prompt[:30]}...",
         style="seaborn-v0_8"
     )
 # [Previous imports remain exactly the same...]
 @app.post("/visualize/natural")
 async def visualize_with_natural_language(
     file: UploadFile = File(...),
     prompt: str = Form(""),
     style: str = Form("seaborn-v0_8")
 ):
     try:
+        # Read and validate Excel file
         content = await file.read()
+        df = pd.read_excel(BytesIO(content))
         if df.empty:
             raise HTTPException(400, "Excel file contains no data")
+        # Clean column names (remove special characters)
+        df.columns = [str(col).strip() for col in df.columns]
         # Generate prompt if empty
         if not prompt.strip():
+            prompt = f"Visualize {', '.join(df.columns[:2])}"  # Default to first two columns
+        # Create visualization request
+        vis_request = interpret_natural_language(prompt, list(df.columns))
+        if not vis_request:
+            raise HTTPException(400, "Couldn't understand your request. Try mentioning column names.")
         vis_request.style = style
         # Generate visualization
+        plt.style.use(style)
+        fig, ax = plt.subplots(figsize=(10, 6))
+        # Dynamic visualization based on chart type
+        if vis_request.chart_type == "heatmap":
+            numeric_df = df.select_dtypes(include=['number'])
+            if numeric_df.empty:
+                raise HTTPException(400, "No numeric columns found for heatmap")
+            sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm')
+        else:
+            # For other chart types
+            plot_func = {
+                "line": sns.lineplot,
+                "bar": sns.barplot,
+                "scatter": sns.scatterplot,
+                "histogram": lambda data, x, **kwargs: plt.hist(data[x].dropna()),
+                "boxplot": sns.boxplot
+            }[vis_request.chart_type]
+            plot_kwargs = {
+                "data": df,
+                "x": vis_request.x_column,
+                "y": vis_request.y_column if vis_request.chart_type != "histogram" else None,
+                "hue": vis_request.hue_column
             }
+            plot_func(**{k: v for k, v in plot_kwargs.items() if v is not None})
+        plt.title(vis_request.title)
+        plt.tight_layout()
+        # Save to buffer
+        buffer = BytesIO()
+        plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight')
+        plt.close()
+        buffer.seek(0)
+        return {
+            "image": base64.b64encode(buffer.getvalue()).decode('utf-8'),
+            "chart_type": vis_request.chart_type,
+            "x_column": vis_request.x_column,
+            "y_column": vis_request.y_column,
+            "hue_column": vis_request.hue_column,
+            "columns": list(df.columns)
+        }
     except Exception as e:
+        raise HTTPException(500, f"Error generating visualization: {str(e)}")