Spaces:

jaihodigital
/

visio-ai

Sleeping

App Files Files Community

jaihodigital commited on Jun 30

Commit

956b835

verified ·

1 Parent(s): 4b3889f

Upload 6 files

Browse files

Files changed (6) hide show

src/app.py +811 -0
src/calculator.py +113 -0
src/notepad_lite.py +49 -0
src/viz_ai_img.py +110 -0
src/viz_report.py +164 -0
src/word_cloud.py +122 -0

src/app.py ADDED Viewed

	@@ -0,0 +1,811 @@

+import streamlit as st
+import webbrowser
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+import pickle
+import base64
+import io
+import plotly.graph_objects as go
+#import viz_report
+import viz_ai_img
+import word_cloud
+import notepad_lite
+import calculator
+# Import ML libraries
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
+from sklearn.svm import SVC, SVR
+from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
+from sklearn.naive_bayes import GaussianNB # For classification
+from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
+import numpy as np # For numerical operations, especially with metrics
+st.set_page_config("Visio AI", page_icon="images/favicon.png", layout='wide')
+st.markdown("<h1 style='text-align: center; color: #4A90E2;'>📊 VISIO AI</h1>", unsafe_allow_html=True)
+st.markdown("<h4 style='text-align: center; color: orange;'>Machine Learning and Data Analysis Platform</h4>", unsafe_allow_html=True)
+st.markdown("<hr>", unsafe_allow_html=True)
+#-------------------------------------------------#
+# --- Session State Initialization ---
+if 'updated_df' not in st.session_state:
+    st.session_state.updated_df = None
+if 'original_df_uploaded' not in st.session_state:
+    st.session_state.original_df_uploaded = False
+if 'last_uploaded_file_name' not in st.session_state:
+    st.session_state.last_uploaded_file_name = None
+if 'X_train' not in st.session_state:
+    st.session_state.X_train = None
+if 'X_test' not in st.session_state:
+    st.session_state.X_test = None
+if 'y_train' not in st.session_state:
+    st.session_state.y_train = None
+if 'y_test' not in st.session_state:
+    st.session_state.y_test = None
+if 'target_column' not in st.session_state:
+    st.session_state.target_column = None
+if 'feature_columns' not in st.session_state:
+    st.session_state.feature_columns = None
+if 'problem_type' not in st.session_state:
+    st.session_state.problem_type = None # 'classification' or 'regression'
+if 'trained_model' not in st.session_state:
+    st.session_state.trained_model = None
+if 'model_metrics' not in st.session_state:
+    st.session_state.model_metrics = None
+if 'scaler' not in st.session_state:
+    st.session_state.scaler = None
+# Navigation Bar
+col1, col2, col3, col4, col5 = st.columns((1, 1, 1, 1, 1))
+with col1:
+    about_url = "https://jaiho-labs.onrender.com/pages/products_resources/docs/visio_ai_docs/visio_about.html"
+    if st.button('About'):
+        st.markdown("check out this [link](%s)" % about_url)
+        #webbrowser.open_new_tab(about_url)
+with col2:
+    guide_url = "https://jaiho-labs.onrender.com/pages/products_resources/docs/visio_ai_docs/visio_helper.html"
+    if st.button('Guide'):
+        st.markdown("check out this [link](%s)" % guide_url)
+with col3:
+    docs_url = "https://jaiho-labs.onrender.com/pages/products_resources/docs/visio_ai_docs/visio_docs.html"
+    if st.button('Docs'):
+        st.markdown("check out this [link](%s)" % docs_url)
+with col4:
+    joinus_url = "https://jaiho-labs.onrender.com/pages/products_resources/docs/visio_ai_docs/visio_join.html"
+    if st.button('Join Us'):
+        st.markdown("check out this [link](%s)" % joinus_url)
+with col5:
+    elite_access = "https://jaiho-labs.onrender.com/pages/products_resources/docs/visio_ai_docs/get_elite_access.html"
+    if st.button('Get Elite Access'):
+        st.markdown("check out this [link](%s)" % elite_access)
+#-------------------------------------------------#
+# Top Expander Columns (Data Operations & Algorithms, Select Plot Type, Pre Analysis)
+col11, col12, col13 = st.columns([1, 1, 1])
+# --- Data Operations & Algorithms Expander ---
+with col11:
+    with st.expander("⚙️ Data Operations & Algorithms", expanded=False):
+        if st.session_state.updated_df is not None:
+            st.markdown("#### 1. Define Target Variable and Problem Type")
+            all_columns = st.session_state.updated_df.columns.tolist()
+            target_column = st.selectbox("Select your **Target Column (Y)**:", ["--- Select ---"] + all_columns, key="target_col_select")
+            if target_column != "--- Select ---":
+                st.session_state.target_column = target_column
+                # Heuristic to guess problem type
+                if st.session_state.updated_df[target_column].dtype in ['int64', 'float64']:
+                    if st.session_state.updated_df[target_column].nunique() < 20 and st.session_state.updated_df[target_column].dtype == 'int64':
+                        st.session_state.problem_type = 'classification'
+                        st.info(f"Detected **Classification** problem based on target column '{target_column}' (integer with few unique values).")
+                    else:
+                        st.session_state.problem_type = 'regression'
+                        st.info(f"Detected **Regression** problem based on target column '{target_column}' (numerical).")
+                elif st.session_state.updated_df[target_column].dtype == 'object' or st.session_state.updated_df[target_column].dtype == 'bool':
+                    st.session_state.problem_type = 'classification'
+                    st.info(f"Detected **Classification** problem based on target column '{target_column}' (categorical).")
+                else:
+                    st.session_state.problem_type = None
+                    st.warning("Could not definitively determine problem type. Please proceed with caution.")
+                st.markdown("---")
+                st.markdown("#### 2. Select Independent Variables (Features)")
+                available_features = [col for col in all_columns if col != target_column]
+                feature_columns = st.multiselect("Select your **Independent Variables (X)**:", available_features, default=available_features, key="feature_select")
+                if feature_columns:
+                    st.session_state.feature_columns = feature_columns
+                    st.markdown("---")
+                    st.markdown("#### 3. Split Data into Train and Test Sets")
+                    test_size = st.slider("Select Test Set Size:", min_value=0.1, max_value=0.5, value=0.2, step=0.05, key="test_size_slider")
+                    random_state = st.number_input("Random State (for reproducibility):", value=42, step=1, key="random_state_input")
+                    # Use only selected features
+                    features = st.session_state.updated_df[feature_columns]
+                    target = st.session_state.updated_df[target_column]
+                    # Handle categorical features by encoding
+                    for col in features.select_dtypes(include=['object', 'bool']).columns:
+                        le = LabelEncoder()
+                        features[col] = le.fit_transform(features[col].astype(str))
+                    # Handle numerical features by scaling
+                    numerical_cols = features.select_dtypes(include=['number']).columns
+                    if not numerical_cols.empty:
+                        scaler = StandardScaler()
+                        features[numerical_cols] = scaler.fit_transform(features[numerical_cols])
+                        st.session_state.scaler = scaler # Save the scaler
+                    try:
+                        X_train, X_test, y_train, y_test = train_test_split(
+                            features, target, test_size=test_size, random_state=random_state,
+                            stratify=target if st.session_state.problem_type == 'classification' else None
+                        )
+                        st.session_state.X_train = X_train
+                        st.session_state.X_test = X_test
+                        st.session_state.y_train = y_train
+                        st.session_state.y_test = y_test
+                        st.success(f"Data split successfully! Training: {len(X_train)} samples, Testing: {len(X_test)} samples.")
+                        st.markdown("---")
+                        st.markdown("#### 4. Select Machine Learning Algorithm")
+                        if st.session_state.problem_type == 'classification':
+                            algo_options = {
+                                "Logistic Regression": LogisticRegression(random_state=random_state),
+                                "Decision Tree Classifier": DecisionTreeClassifier(random_state=random_state),
+                                "Random Forest Classifier": RandomForestClassifier(random_state=random_state),
+                                "Support Vector Classifier (SVC)": SVC(random_state=random_state),
+                                "K-Nearest Neighbors Classifier": KNeighborsClassifier(),
+                                "Gaussian Naive Bayes": GaussianNB()
+                            }
+                            algo_name = st.selectbox("Choose a Classification Algorithm:", list(algo_options.keys()), key="classification_algo_select")
+                            selected_algo = algo_options.get(algo_name)
+                        elif st.session_state.problem_type == 'regression':
+                            algo_options = {
+                                "Linear Regression": LinearRegression(),
+                                "Decision Tree Regressor": DecisionTreeRegressor(random_state=random_state),
+                                "Random Forest Regressor": RandomForestRegressor(random_state=random_state),
+                                "Support Vector Regressor (SVR)": SVR(),
+                                "K-Nearest Neighbors Regressor": KNeighborsRegressor()
+                            }
+                            algo_name = st.selectbox("Choose a Regression Algorithm:", list(algo_options.keys()), key="regression_algo_select")
+                            selected_algo = algo_options.get(algo_name)
+                        else:
+                            st.warning("Please define target column and problem type to select an algorithm.")
+                            selected_algo = None
+                        if selected_algo:
+                            st.info(f"Selected Algorithm: **{algo_name}**")
+                            st.session_state.selected_algo = selected_algo
+                            st.session_state.selected_algo_name = algo_name
+                            st.markdown("---")
+                            if st.button("🚀 Train Model"):
+                                if st.session_state.X_train is not None and st.session_state.y_train is not None:
+                                    try:
+                                        with st.spinner(f"Training {st.session_state.selected_algo_name}..."):
+                                            st.session_state.selected_algo.fit(st.session_state.X_train, st.session_state.y_train)
+                                        st.session_state.trained_model = st.session_state.selected_algo
+                                        st.success(f"Model **{st.session_state.selected_algo_name}** trained successfully!")
+                                        y_pred = st.session_state.trained_model.predict(st.session_state.X_test)
+                                        metrics = {}
+                                        if st.session_state.problem_type == 'classification':
+                                            metrics['Accuracy'] = accuracy_score(st.session_state.y_test, y_pred)
+                                            metrics['Precision'] = precision_score(st.session_state.y_test, y_pred, average='weighted', zero_division=0)
+                                            metrics['Recall'] = recall_score(st.session_state.y_test, y_pred, average='weighted', zero_division=0)
+                                            metrics['F1 Score'] = f1_score(st.session_state.y_test, y_pred, average='weighted', zero_division=0)
+                                            metrics['Confusion Matrix'] = confusion_matrix(st.session_state.y_test, y_pred)
+                                        elif st.session_state.problem_type == 'regression':
+                                            metrics['Mean Squared Error'] = mean_squared_error(st.session_state.y_test, y_pred)
+                                            metrics['R2 Score'] = r2_score(st.session_state.y_test, y_pred)
+                                        st.session_state.model_metrics = metrics
+                                        st.rerun()
+                                    except Exception as e:
+                                        st.error(f"Error training model: {e}")
+                                else:
+                                    st.warning("Please split the data first before training the model.")
+                        else:
+                             st.warning("Please select a target column and problem type to enable algorithm selection.")
+                    except Exception as e:
+                        st.error(f"Error splitting data or preparing features: {e}")
+                        st.info("Ensure your data is clean and suitable for splitting (e.g., no remaining NaN values after imputation).")
+                else:
+                    st.warning("Please select at least one independent variable.")
+            else:
+                st.info("Please select a target column to proceed with data operations.")
+        else:
+            st.info("Please upload a dataset first to access Data Operations & Algorithms.")
+with col12:
+    with st.expander("🎨 Select Plot Type", expanded=False):
+        if st.session_state.updated_df is not None:
+            df = st.session_state.updated_df
+            numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
+            categorical_cols = df.select_dtypes(include='object').columns.tolist()
+            plot_type = st.selectbox("Select a plot type", ["---Select---", "Bar Chart", "Histogram", "Scatter Plot", "Box Plot", "Heatmap",
+                                                            "Line Chart", "Pie Chart", "Violin Plot", "Pair Plot",
+                                                             "3D Scatter Plot", "3D Surface Plot"])
+            if plot_type == "Bar Chart":
+                st.info("A bar chart shows counts of categories within a column.")
+                selected_col = st.selectbox("Select a categorical column", categorical_cols)
+                if st.button("Generate Bar Chart"):
+                    if selected_col:
+                        fig = px.bar(df, x=selected_col, title=f'Bar Chart of {selected_col}', color=selected_col)
+                        st.plotly_chart(fig, use_container_width=True)
+            elif plot_type == "Histogram":
+                st.info("A histogram shows the distribution of a numerical column.")
+                selected_col = st.selectbox("Select a numerical column", numerical_cols)
+                if st.button("Generate Histogram"):
+                    if selected_col:
+                        fig = px.histogram(df, x=selected_col, title=f'Histogram of {selected_col}')
+                        st.plotly_chart(fig, use_container_width=True)
+            elif plot_type == "Scatter Plot":
+                st.info("A scatter plot shows the relationship between two numerical columns.")
+                x_col = st.selectbox("Select X-axis column", numerical_cols, key='scatter_x')
+                y_col = st.selectbox("Select Y-axis column", numerical_cols, key='scatter_y')
+                if st.button("Generate Scatter Plot"):
+                    if x_col and y_col:
+                        fig = px.scatter(df, x=x_col, y=y_col, title=f'Scatter Plot of {x_col} vs {y_col}')
+                        st.plotly_chart(fig, use_container_width=True)
+            elif plot_type == "Box Plot":
+                st.info("A box plot shows the distribution of a numerical column grouped by a categorical column.")
+                num_col = st.selectbox("Select a numerical column", numerical_cols, key='box_num')
+                cat_col = st.selectbox("Select a categorical column for grouping", categorical_cols, key='box_cat')
+                if st.button("Generate Box Plot"):
+                    if num_col and cat_col:
+                        fig = px.box(df, x=cat_col, y=num_col, title=f'Box Plot of {num_col} by {cat_col}', color=cat_col)
+                        st.plotly_chart(fig, use_container_width=True)
+            elif plot_type == "Heatmap":
+                st.info("A heatmap shows the correlation between all numerical columns.")
+                if st.button("Generate Heatmap"):
+                    corr = df[numerical_cols].corr()
+                    fig = px.imshow(corr, text_auto=True, title='Correlation Heatmap')
+                    st.plotly_chart(fig, use_container_width=True)
+            elif plot_type == "Line Chart":
+                st.info("A line chart shows trends over time or ordered categories.")
+                x_col = st.selectbox("Select X-axis column", df.columns, key='line_x')
+                y_col = st.selectbox("Select Y-axis (numerical) column", numerical_cols, key='line_y')
+                if st.button("Generate Line Chart"):
+                    if x_col and y_col:
+                        fig = px.line(df, x=x_col, y=y_col, title=f'Line Chart of {y_col} over {x_col}')
+                        st.plotly_chart(fig, use_container_width=True)
+            elif plot_type == "Pie Chart":
+                st.info("A pie chart shows proportions of categories within a column.")
+                selected_col = st.selectbox("Select a categorical column for Pie Chart", categorical_cols, key='pie_col')
+                if st.button("Generate Pie Chart"):
+                    if selected_col:
+                        pie_data = df[selected_col].value_counts().reset_index()
+                        pie_data.columns = [selected_col, 'Count']
+                        fig = px.pie(pie_data, names=selected_col, values='Count', title=f'Pie Chart of {selected_col}')
+                        st.plotly_chart(fig, use_container_width=True)
+            elif plot_type == "Violin Plot":
+                st.info("A violin plot shows the distribution of a numerical column by categories.")
+                num_col = st.selectbox("Select a numerical column", numerical_cols, key='violin_num')
+                cat_col = st.selectbox("Select a categorical column for grouping", categorical_cols, key='violin_cat')
+                if st.button("Generate Violin Plot"):
+                    if num_col and cat_col:
+                        fig = px.violin(df, x=cat_col, y=num_col, box=True, points="all", title=f'Violin Plot of {num_col} by {cat_col}')
+                        st.plotly_chart(fig, use_container_width=True)
+            elif plot_type == "Pair Plot":
+                st.info("A pair plot shows scatter plots for all combinations of numerical columns.")
+                if st.button("Generate Pair Plot"):
+                    fig = px.scatter_matrix(df[numerical_cols], dimensions=numerical_cols, title='Pair Plot of Numerical Features')
+                    st.plotly_chart(fig, use_container_width=True)
+            elif plot_type == "3D Scatter Plot":
+                st.info("A 3D scatter plot shows the relationship between three numerical columns.")
+                x_col = st.selectbox("Select X-axis column", numerical_cols, key='3d_scatter_x')
+                y_col = st.selectbox("Select Y-axis column", numerical_cols, key='3d_scatter_y')
+                z_col = st.selectbox("Select Z-axis column", numerical_cols, key='3d_scatter_z')
+                color_col = st.selectbox("Optional: Select a column for color grouping (optional)", df.columns, key='3d_scatter_color')
+                if st.button("Generate 3D Scatter Plot"):
+                    if x_col and y_col and z_col:
+                        fig = px.scatter_3d(df, x=x_col, y=y_col, z=z_col, color=color_col if color_col else None,
+                                title=f'3D Scatter Plot: {x_col} vs {y_col} vs {z_col}')
+                        st.plotly_chart(fig, use_container_width=True)
+            elif plot_type == "3D Surface Plot":
+                st.info("A 3D surface plot shows a continuous surface over two variables.")
+                x_col = st.selectbox("Select X-axis column", numerical_cols, key='3d_surface_x')
+                y_col = st.selectbox("Select Y-axis column", numerical_cols, key='3d_surface_y')
+                z_col = st.selectbox("Select Z-axis column", numerical_cols, key='3d_surface_z')
+                if st.button("Generate 3D Surface Plot"):
+                    if x_col and y_col and z_col:
+                        try:
+                            pivot_table = df.pivot_table(index=y_col, columns=x_col, values=z_col, aggfunc='mean')
+                            fig = go.Figure(data=[go.Surface(z=pivot_table.values,
+                                                            x=pivot_table.columns,
+                                                            y=pivot_table.index)])
+                            fig.update_layout(title=f'3D Surface Plot of {z_col} over {x_col} and {y_col}',
+                                            scene=dict(
+                                                xaxis_title=x_col,
+                                                yaxis_title=y_col,
+                                                zaxis_title=z_col
+                                            ))
+                            st.plotly_chart(fig, use_container_width=True)
+                        except Exception as e:
+                            st.error(f"Error generating surface plot: {e}")
+        else:
+            st.info("Please upload a dataset first to generate plots.")
+with col13:
+    with st.expander("📈 Pre Analysis", expanded=False):
+        if st.session_state.updated_df is not None:
+            # Create tabs for different analyses
+            tab1, tab2 = st.tabs(["Statistical Summary", "Dataset Info"])
+            with tab1:
+                st.subheader("Statistical Summary (describe)")
+                numeric_df = st.session_state.updated_df.select_dtypes(include=['float64', 'int64'])
+                if not numeric_df.empty:
+                # Display statistical summary
+                    st.dataframe(numeric_df.describe())
+                else:
+                    st.warning("No numerical columns found in the dataset")
+                if st.checkbox("Show additional statistics"):
+                    st.write("Skewness:")
+                    st.dataframe(numeric_df.skew())
+                    st.write("Kurtosis:")
+                    st.dataframe(numeric_df.kurtosis())
+            with tab2:
+                st.subheader("Dataset Information (info)")
+                # Get DataFrame info
+                buffer = io.StringIO()
+                st.session_state.updated_df.info(buf=buffer)
+                info_str = buffer.getvalue()
+            # Display formatted info
+                st.text(info_str)
+                st.write("Quick Facts:")
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    st.metric("Total Rows", st.session_state.updated_df.shape[0])
+                with col2:
+                    st.metric("Total Columns", st.session_state.updated_df.shape[1])
+                with col3:
+                    st.metric("Missing Values", st.session_state.updated_df.isna().sum().sum())
+            # Display column types
+                st.write("Column Data Types:")
+                dtypes_df = pd.DataFrame(st.session_state.updated_df.dtypes, columns=['Data Type'])
+                st.dataframe(dtypes_df)
+        else:
+            st.info("Please upload a dataset first.")
+#----------------------------------------------------#
+# Sidebar (Keep as is if you are simulating pages in a single file)
+with st.sidebar:
+    st.markdown('<b>🛠️ Tools</b>', unsafe_allow_html=True)
+    # Store the active page in session state
+    if 'current_page' not in st.session_state:
+        st.session_state.current_page = "main"
+    if st.button("🏠 Home"):
+        st.session_state.current_page = "main"
+        st.rerun()
+    if st.button("📝 Note -- Lite"):
+        st.session_state.current_page = "note_lite"
+        st.rerun()
+    if st.button("😶‍🌫️ WordCloud"):
+        st.session_state.current_page = "word_cloud"
+        st.rerun()
+    if st.button("🤖 Viz AI (img)"):
+        st.session_state.current_page = "viz_ai_img"
+        st.rerun()
+    if st.button("🧮 Calculator"):
+        st.session_state.current_page = "calculator"
+        st.rerun()
+    if st.button("⚙️ Viz Editor"):
+        st.session_state.current_page = "note_edit"
+        # No rerun here — handled differently maybe?
+    if st.button("📄 Viz Report"):
+        st.session_state.current_page = "generate_report"
+        st.rerun()
+    st.markdown("<hr>",unsafe_allow_html=True)
+    st.markdown("### <center>Other Products</center>", unsafe_allow_html=True)
+#---------------------------------------------------------------#
+#---------------------------------------------------------------#
+# Main content columns
+col_main_left, col_main_right = st.columns([0.6, 0.4]) # Adjusted column widths for better layout
+with col_main_left:
+    st.markdown("<b style='font-size:20px;'>📂 Upload Your Dataset</b>", unsafe_allow_html=True)
+    dataset = st.file_uploader("Choose a dataset file", type=["csv", "xlsx", "txt"], key="file_uploader_main") # Added key
+    if dataset is not None:
+        if 'last_uploaded_file_object' not in st.session_state or st.session_state.last_uploaded_file_object != dataset:
+            st.session_state.last_uploaded_file_object = dataset
+            st.session_state.original_df_uploaded = False
+            st.session_state.updated_df = None
+            st.session_state.X_train = st.session_state.X_test = st.session_state.y_train = st.session_state.y_test = None
+            st.session_state.target_column = None
+            st.session_state.feature_columns = None
+            st.session_state.problem_type = None
+            st.session_state.trained_model = None
+            st.session_state.model_metrics = None
+            st.session_state.scaler = None
+            st.success("✅ File uploaded successfully!")
+            st.write(f"File name: **{dataset.name}**")
+            try:
+                if dataset.name.endswith(".csv"):
+                    df = pd.read_csv(dataset)
+                elif dataset.name.endswith(".xlsx"):
+                    df = pd.read_excel(dataset)
+                elif dataset.name.endswith(".txt"):
+                    df = pd.read_csv(dataset, delimiter="\t")
+                else:
+                    st.error("Unsupported file type. Please upload a CSV, XLSX, or TXT (tab-separated) file.")
+                    df = None
+                if df is not None:
+                    st.session_state.updated_df = df.copy()
+                    st.session_state.original_df_uploaded = True
+                    st.rerun()
+            except Exception as e:
+                st.error(f"Error reading file: {e}. Please ensure it's a valid CSV, XLSX, or tab-separated TXT.")
+                st.session_state.original_df_uploaded = False
+                st.session_state.updated_df = None
+    # Original Dataset Preview
+    if st.session_state.original_df_uploaded and st.session_state.updated_df is not None:
+        st.markdown('<div class="dataset-preview">', unsafe_allow_html=True)
+        st.subheader("🔍 Original Dataset Preview")
+        st.dataframe(st.session_state.updated_df, use_container_width=True)
+        st.markdown('</div>', unsafe_allow_html=True)
+        # Updated Dataset Preview (after imputation)
+        st.markdown('<div class="dataset-preview">', unsafe_allow_html=True)
+        st.subheader("🔄 Updated Dataset Preview (After Imputation)")
+        st.dataframe(st.session_state.updated_df, use_container_width=True)
+        st.markdown('</div>', unsafe_allow_html=True)
+with col_main_right:
+    if st.session_state.updated_df is not None:
+        st.markdown('<div class="section-title">📊 Missing Values Report</div>', unsafe_allow_html=True)
+        null_counts = st.session_state.updated_df.isnull().sum()
+        total_nulls = null_counts.sum()
+        if total_nulls == 0:
+            st.success("✅ No null values found in the dataset!")
+        else:
+            st.warning(f"⚠️ Found {total_nulls} null values in the dataset.")
+            st.write(null_counts[null_counts > 0])
+            # Automatic Missing Value Handling
+            st.markdown('<div class="section-title">🤖 Automatic Missing Value Handling</div>', unsafe_allow_html=True)
+            with st.form("auto_impute_form"):
+                st.write("Apply default handling for all missing values:")
+                auto_impute_option = st.selectbox(
+                    "Choose imputation method:",
+                    ["None", "Mean (Numerical)", "Median (Numerical)", "Mode (All)", "Forward Fill", "Backward Fill"],
+                    key="auto_impute_method"
+                )
+                auto_impute_button = st.form_submit_button("Apply Automatic Imputation")
+                if auto_impute_button and auto_impute_option != "None":
+                    df_to_impute = st.session_state.updated_df.copy()
+                    if auto_impute_option == "Mean (Numerical)":
+                        for col in df_to_impute.select_dtypes(include=['number']).columns:
+                            if df_to_impute[col].isnull().sum() > 0:
+                                df_to_impute[col].fillna(df_to_impute[col].mean(), inplace=True)
+                    elif auto_impute_option == "Median (Numerical)":
+                        for col in df_to_impute.select_dtypes(include=['number']).columns:
+                            if df_to_impute[col].isnull().sum() > 0:
+                                df_to_impute[col].fillna(df_to_impute[col].median(), inplace=True)
+                    elif auto_impute_option == "Mode (All)":
+                        for col in df_to_impute.columns:
+                            if df_to_impute[col].isnull().sum() > 0:
+                                if not df_to_impute[col].mode().empty:
+                                    df_to_impute[col].fillna(df_to_impute[col].mode()[0], inplace=True)
+                                else:
+                                    st.warning(f"Could not compute mode for column '{col}'. Skipping.")
+                    elif auto_impute_option == "Forward Fill":
+                        df_to_impute.fillna(method='ffill', inplace=True)
+                    elif auto_impute_option == "Backward Fill":
+                        df_to_impute.fillna(method='bfill', inplace=True)
+                    st.session_state.updated_df = df_to_impute
+                    st.success(f"🎉 Missing values have been handled automatically using **{auto_impute_option}**!")
+                    st.rerun()
+            # Manual Missing Value Handling
+            st.markdown('<div class="section-title">🛠️ Manual Missing Value Handling</div>', unsafe_allow_html=True)
+            cols_with_missing = st.session_state.updated_df.columns[st.session_state.updated_df.isnull().any()].tolist()
+            if cols_with_missing:
+                selected_col_manual = st.selectbox(
+                    "Select a column to manually handle missing values:",
+                    ["--- Select a Column ---"] + cols_with_missing,
+                    key="manual_col_select"
+                )
+                if selected_col_manual != "--- Select a Column ---":
+                    col_dtype = st.session_state.updated_df[selected_col_manual].dtype
+                    num_missing = st.session_state.updated_df[selected_col_manual].isnull().sum()
+                    st.write(f"Column: **{selected_col_manual}** (Missing values: **{num_missing}**)")
+                    with st.form(key=f"manual_impute_form_{selected_col_manual}"):
+                        fill_value_to_apply = None
+                        if col_dtype == "object":
+                            manual_fill_option = st.selectbox(
+                                f"Choose a method for '{selected_col_manual}'",
+                                ["Mode", "Fill with custom value"],
+                                key=f"cat_method_{selected_col_manual}"
+                            )
+                            if manual_fill_option == "Fill with custom value":
+                                fill_value_to_apply = st.text_input(f"Enter the custom value to fill for '{selected_col_manual}'", key=f"cat_value_{selected_col_manual}")
+                            elif manual_fill_option == "Mode":
+                                if not st.session_state.updated_df[selected_col_manual].mode().empty:
+                                    fill_value_to_apply = st.session_state.updated_df[selected_col_manual].mode()[0]
+                                else:
+                                    st.warning(f"Mode cannot be calculated for {selected_col_manual}. Please enter a custom value.")
+                        else:
+                            manual_fill_option = st.selectbox(
+                                f"Choose a method for '{selected_col_manual}'",
+                                ["Mean", "Median", "Mode", "Fill with custom value"],
+                                key=f"num_method_{selected_col_manual}"
+                            )
+                            if manual_fill_option == "Fill with custom value":
+                                fill_value_to_apply = st.number_input(f"Enter the custom value to fill for '{selected_col_manual}'", value=0.0, key=f"num_value_{selected_col_manual}")
+                            elif manual_fill_option == "Mean":
+                                fill_value_to_apply = st.session_state.updated_df[selected_col_manual].mean()
+                            elif manual_fill_option == "Median":
+                                fill_value_to_apply = st.session_state.updated_df[selected_col_manual].median()
+                            elif manual_fill_option == "Mode":
+                                if not st.session_state.updated_df[selected_col_manual].mode().empty:
+                                    fill_value_to_apply = st.session_state.updated_df[selected_col_manual].mode()[0]
+                                else:
+                                    st.warning(f"Mode cannot be calculated for {selected_col_manual}. Please enter a custom value.")
+                        submit_button = st.form_submit_button(f"Apply Manual Imputation to {selected_col_manual}")
+                        if submit_button and fill_value_to_apply is not None:
+                            st.session_state.updated_df[selected_col_manual].fillna(fill_value_to_apply, inplace=True)
+                            st.success(f"Filled '{selected_col_manual}' missing values with **'{fill_value_to_apply}'** using {manual_fill_option}!")
+                            st.rerun()
+            else:
+                st.info("No columns with missing values to display for manual handling.")
+        # Pair Plot button is now below the missing values report
+        st.markdown("---")
+        if st.button("📈 Generate Pair Plot of Numerical Columns"):
+            if st.session_state.updated_df is not None:
+                numerical_data = st.session_state.updated_df.select_dtypes(include=['float64', 'int64'])
+                if not numerical_data.empty:
+                    st.markdown("##### 📘 Pair Plot - Seaborn (Static)", unsafe_allow_html=True)
+                    fig1 = sns.pairplot(numerical_data)
+                    st.pyplot(fig1)
+                    plt.clf()
+                    st.markdown("##### 🧠 Pair Plot - Plotly (Interactive)", unsafe_allow_html=True)
+                    fig2 = px.scatter_matrix(numerical_data,
+                                             dimensions=numerical_data.columns,
+                                             height=800, width=800)
+                    st.plotly_chart(fig2, use_container_width=True)
+                else:
+                    st.warning("No numerical columns found to generate a pair plot.")
+            else:
+                st.warning("Please upload and process a dataset first.")
+# --- Machine Learning Operations Section (Full Width, below the two columns) ---
+st.markdown("---")
+st.markdown("<h2 style='text-align: center; color: #4A90E2;'>🧠 Machine Learning Operations</h2>", unsafe_allow_html=True)
+if st.session_state.updated_df is not None and st.session_state.trained_model is not None:
+    st.markdown(f"### Model Training Results for **{st.session_state.selected_algo_name}**")
+    if st.session_state.model_metrics:
+        if st.session_state.problem_type == 'classification':
+            st.markdown("#### Classification Metrics:")
+            col_m1, col_m2, col_m3, col_m4 = st.columns(4)
+            with col_m1:
+                st.metric(label="Accuracy", value=f"{st.session_state.model_metrics['Accuracy']:.4f}")
+            with col_m2:
+                st.metric(label="Precision", value=f"{st.session_state.model_metrics['Precision']:.4f}")
+            with col_m3:
+                st.metric(label="Recall", value=f"{st.session_state.model_metrics['Recall']:.4f}")
+            with col_m4:
+                st.metric(label="F1 Score", value=f"{st.session_state.model_metrics['F1 Score']:.4f}")
+            st.markdown("#### Confusion Matrix:")
+            fig_cm, ax_cm = plt.subplots(figsize=(6, 5))
+            sns.heatmap(st.session_state.model_metrics['Confusion Matrix'], annot=True, fmt='d', cmap='Blues', ax=ax_cm)
+            ax_cm.set_xlabel('Predicted')
+            ax_cm.set_ylabel('True')
+            ax_cm.set_title('Confusion Matrix')
+            st.pyplot(fig_cm)
+            plt.clf()
+        elif st.session_state.problem_type == 'regression':
+            st.markdown("#### Regression Metrics:")
+            col_r1, col_r2 = st.columns(2)
+            with col_r1:
+                st.metric(label="Mean Squared Error", value=f"{st.session_state.model_metrics['Mean Squared Error']:.4f}")
+            with col_r2:
+                st.metric(label="R2 Score", value=f"{st.session_state.model_metrics['R2 Score']:.4f}")
+    st.markdown("---")
+    # --- Test Your Own Values and Download Model ---
+    col_test, col_download = st.columns(2)
+    with col_test:
+        st.markdown("### 🧪 Test with Your Own Values")
+        if st.session_state.feature_columns:
+            input_data = {}
+            for col in st.session_state.feature_columns:
+                if st.session_state.updated_df[col].dtype == 'object':
+                    unique_vals = st.session_state.updated_df[col].unique()
+                    input_data[col] = st.selectbox(f"Select value for **{col}**", unique_vals)
+                else:
+                    input_data[col] = st.number_input(f"Enter value for **{col}**", value=float(st.session_state.updated_df[col].mean()))
+            if st.button("Get Prediction"):
+                input_df = pd.DataFrame([input_data])
+                # Preprocess the input data similarly to the training data
+                for col in input_df.select_dtypes(include=['object', 'bool']).columns:
+                    le = LabelEncoder()
+                    input_df[col] = le.fit_transform(input_df[col].astype(str))
+                if st.session_state.scaler:
+                    numerical_cols = input_df.select_dtypes(include=['number']).columns
+                    if not numerical_cols.empty:
+                        input_df[numerical_cols] = st.session_state.scaler.transform(input_df[numerical_cols])
+                prediction = st.session_state.trained_model.predict(input_df)
+                st.success(f"**Prediction:** {prediction[0]}")
+    with col_download:
+        st.markdown("### 📥 Download Trained Model")
+        # Serialize the model for download
+        model_pkl = pickle.dumps(st.session_state.trained_model)
+        b64 = base64.b64encode(model_pkl).decode()
+        st.download_button(
+            label="Download Model as .pkl",
+            data=base64.b64decode(b64),
+            file_name=f"{st.session_state.selected_algo_name}_model.pkl",
+            mime="application/octet-stream"
+        )
+else:
+    st.info("Upload a dataset and train a model to see results and test your own values.")
+if st.session_state.current_page == "viz_ai_img":
+    viz_ai_img.analyze_image_ui()
+elif st.session_state.current_page == "word_cloud":
+    # Make sure to import your word_cloud module if you have it
+    word_cloud.render_word_cloud_page()
+elif st.session_state.current_page == "note_lite":
+    notepad_lite.render_notepad()
+elif st.session_state.current_page == "calculator":
+    calculator.render_calculator()
+elif st.session_state.current_page == "generate_report":
+    # Make sure to import your viz_report module if you have it
+    # viz_report.generate_report()
+    #viz_report.render_report_page()
+    st.write("Viz Report Page (Implement logic here)")
+# Add custom CSS for better styling
+st.markdown("""
+<style>
+    .stButton>button {
+        width: 100%;
+        border-radius: 5px;
+        border: 1px solid #4A90E2;
+        color: #4A90E2;
+        background-color: white;
+        padding: 10px;
+        font-size: 16px;
+        transition: all 0.2s ease-in-out;
+    }
+    .stButton>button:hover {
+        background-color: #4A90E2;
+        color: white;
+    }
+    .section-title {
+        color: #4A90E2;
+        font-size: 18px;
+        margin-top: 15px;
+        margin-bottom: 10px;
+        font-weight: bold;
+    }
+    .dataset-preview {
+        border: 1px solid #ddd;
+        border-radius: 5px;
+        padding: 10px;
+        margin-top: 20px;
+        background-color: #f9f9f9;
+    }
+    h1 {
+        color: #4A90E2;
+    }
+    h2 {
+        color: #4A90E2;
+    }
+    h3 {
+        color: #333;
+    }
+    h4 {
+        color: #555;
+    }
+    .st-emotion-cache-1jmvejs { # Targeting expander header for slightly different styling
+        background-color: #f0f2f6;
+        border-radius: 5px;
+    }
+</style>
+""", unsafe_allow_html=True)
+st.markdown("""
+    <div style="position: fixed; bottom: 0; left: 0; width: 100%; text-align: center; background-color: ; padding: 10px;">
+        <p style="font-size: 12px;">Made with ❤️ by <a href = "https://avarshvir.github.io/arshvir">Arshvir</a> and <a href = "https://jaiho-labs.onrender.com">Jaiho Labs</a></p>
+    </div>
+""", unsafe_allow_html=True)

src/calculator.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import streamlit as st
+from functools import partial
+def render_calculator():
+    """
+    Renders a fully functional calculator UI and handles its logic.
+    """
+    st.markdown("<h2 style='text-align: center; color: #4A90E2;'>🧮 Calculator</h2>", unsafe_allow_html=True)
+    # --- State Initialization ---
+    # We use session state to keep track of the calculator's current state.
+    if 'calc_display' not in st.session_state:
+        st.session_state.calc_display = '0'
+        st.session_state.first_operand = None
+        st.session_state.operator = None
+        st.session_state.waiting_for_second_operand = False
+    # --- Callback Functions ---
+    # These functions modify the state in response to button clicks.
+    def handle_digit(digit):
+        """Appends a digit to the display."""
+        if st.session_state.waiting_for_second_operand:
+            st.session_state.calc_display = digit
+            st.session_state.waiting_for_second_operand = False
+        else:
+            st.session_state.calc_display = st.session_state.calc_display + digit if st.session_state.calc_display != '0' else digit
+    def handle_decimal():
+        """Adds a decimal point if one doesn't already exist."""
+        if '.' not in st.session_state.calc_display:
+            st.session_state.calc_display += '.'
+    def handle_operator(op):
+        """Handles an operator click (+, -, *, /)."""
+        current_value = float(st.session_state.calc_display)
+        # This block handles chained operations like 5 * 2 + (result is 10, then we wait for next number)
+        if st.session_state.first_operand is not None and st.session_state.operator is not None and not st.session_state.waiting_for_second_operand:
+            handle_equals()
+            st.session_state.first_operand = float(st.session_state.calc_display)
+        else:
+            st.session_state.first_operand = current_value
+        st.session_state.operator = op
+        st.session_state.waiting_for_second_operand = True
+    def handle_equals():
+        """Performs the calculation."""
+        if st.session_state.operator is None or st.session_state.first_operand is None:
+            return
+        second_operand = float(st.session_state.calc_display)
+        first_operand = st.session_state.first_operand
+        operator = st.session_state.operator
+        if operator == '+':
+            result = first_operand + second_operand
+        elif operator == '-':
+            result = first_operand - second_operand
+        elif operator == '*':
+            result = first_operand * second_operand
+        elif operator == '/':
+            if second_operand == 0:
+                result = 'Error'
+            else:
+                result = first_operand / second_operand
+        st.session_state.calc_display = str(result)
+        st.session_state.first_operand = result # So you can chain operations with the result
+        st.session_state.operator = None
+        st.session_state.waiting_for_second_operand = True
+    def handle_clear():
+        """Resets the calculator to its initial state."""
+        st.session_state.calc_display = '0'
+        st.session_state.first_operand = None
+        st.session_state.operator = None
+        st.session_state.waiting_for_second_operand = False
+    # --- UI Layout ---
+    # Display screen
+    st.text_input("Result", st.session_state.calc_display, key="display", disabled=True)
+    # Calculator buttons layout
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.button('7', on_click=partial(handle_digit, '7'), use_container_width=True)
+        st.button('4', on_click=partial(handle_digit, '4'), use_container_width=True)
+        st.button('1', on_click=partial(handle_digit, '1'), use_container_width=True)
+        st.button('0', on_click=partial(handle_digit, '0'), use_container_width=True)
+    with col2:
+        st.button('8', on_click=partial(handle_digit, '8'), use_container_width=True)
+        st.button('5', on_click=partial(handle_digit, '5'), use_container_width=True)
+        st.button('2', on_click=partial(handle_digit, '2'), use_container_width=True)
+        st.button('.', on_click=handle_decimal, use_container_width=True)
+    with col3:
+        st.button('9', on_click=partial(handle_digit, '9'), use_container_width=True)
+        st.button('6', on_click=partial(handle_digit, '6'), use_container_width=True)
+        st.button('3', on_click=partial(handle_digit, '3'), use_container_width=True)
+        st.button('=', on_click=handle_equals, use_container_width=True)
+    with col4:
+        st.button('/', on_click=partial(handle_operator, '/'), use_container_width=True)
+        st.button('Mul', on_click=partial(handle_operator, '*'), use_container_width=True)
+        st.button('Sub', on_click=partial(handle_operator, '-'), use_container_width=True)
+        st.button('Add', on_click=partial(handle_operator, '+'), use_container_width=True)
+    st.button('C', on_click=handle_clear, use_container_width=True)

src/notepad_lite.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import streamlit as st
+from datetime import datetime
+def render_notepad():
+    """
+    Renders a simple, session-based notepad page.
+    """
+    st.markdown("<h2 style='text-align: center; color: #4A90E2;'>📝 Note -- Lite</h2>", unsafe_allow_html=True)
+    st.markdown("<p style='text-align: center;'>Jot down your thoughts, findings, or reminders. Your notes are saved for the current session.</p>", unsafe_allow_html=True)
+    # Initialize the note content in session state if it doesn't exist
+    if 'notepad_text' not in st.session_state:
+        st.session_state.notepad_text = "## My Analysis Notes\n\n- Finding 1:\n- Finding 2:\n"
+    # --- DEFINE THE CALLBACK FUNCTION ---
+    # This function will be called when the button is clicked.
+    # It modifies the session state *before* the page is re-rendered.
+    def clear_note_callback():
+        st.session_state.notepad_text = ""
+    # ------------------------------------
+    st.text_area(
+        "Your Notes",
+        key='notepad_text',
+        height=400,
+        help="Your text is saved automatically as you type."
+    )
+    st.markdown("---")
+    col1, col2 = st.columns(2)
+    with col1:
+        # --- ATTACH THE CALLBACK TO THE BUTTON ---
+        # Instead of an if-block, we use the on_click parameter.
+        st.button(
+            "🗑️ Clear Note",
+            on_click=clear_note_callback,
+            help="Click to permanently delete the text in the notepad."
+        )
+        # -----------------------------------------
+    with col2:
+        st.download_button(
+            label="📥 Download Note as .txt",
+            data=st.session_state.notepad_text,
+            file_name=f"visio_ai_note_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
+            mime="text/plain"
+        )

src/viz_ai_img.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import streamlit as st
+import base64
+import requests
+from PIL import Image
+import io
+def analyze_image_ui():
+    """
+    Renders the UI for the Viz AI Image Analysis tool and handles the
+    logic for sending requests to the OpenRouter API.
+    """
+    st.markdown("---")
+    st.markdown("<h2 style='text-align: center; color: #4A90E2;'>🤖 Viz AI (Image)</h2>", unsafe_allow_html=True)
+    st.markdown("<h5 style='text-align: center; color: grey;'>Uncover hidden patterns and details in your images.</h5>", unsafe_allow_html=True)
+    # Use a two-column layout
+    col1, col2 = st.columns(2)
+    with col1:
+        uploaded_image = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
+        if uploaded_image:
+            # Display the uploaded image
+            image = Image.open(uploaded_image)
+            st.image(image, caption="Uploaded Image", use_container_width=True)
+    with col2:
+        prompt_text = st.text_area(
+            "Your Prompt:",
+            "Describe this image in detail. What are the key objects, arrangements, and potential hidden patterns or meanings?",
+            height=150
+        )
+        # Add a select box for the model
+        model_selection = st.selectbox(
+            "Choose a model:",
+            (
+                "meta-llama/llama-4-maverick:free",
+                "opengvlab/internvl3-14b:free",
+                "mistralai/mistral-small-3.1-24b-instruct:free",
+                "google/gemma-3-27b-it:free",
+            )
+        )
+        analyze_button = st.button("Analyze Image ✨")
+    if analyze_button and uploaded_image:
+        if not prompt_text.strip():
+            st.error("Please enter a prompt.")
+            return
+        with st.spinner(f"AI is analyzing the image using {model_selection}..."):
+            try:
+                # Get the API key from secrets
+                api_key = st.secrets["OPENROUTER_API_KEY"]
+                if not api_key:
+                    st.error("OpenRouter API key is not set. Please add it to your secrets.")
+                    return
+                # Convert image to base64
+                buffered = io.BytesIO()
+                image.save(buffered, format="PNG")
+                img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
+                response = requests.post(
+                    url="https://openrouter.ai/api/v1/chat/completions",
+                    headers={
+                        "Authorization": f"Bearer {api_key}",
+                        "Content-Type": "application/json"
+                    },
+                    json={
+                        "model": model_selection,
+                        "messages": [
+                            {
+                                "role": "user",
+                                "content": [
+                                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}"}},
+                                    {"type": "text", "text": prompt_text}
+                                ]
+                            }
+                        ]
+                    }
+                )
+                response.raise_for_status()  # Will raise an HTTPError for bad responses (4xx or 5xx)
+                result = response.json()
+                ai_response = result['choices'][0]['message']['content']
+                st.markdown("---")
+                st.subheader("Analysis Result:")
+                st.markdown(ai_response)
+                # --- ADD THIS BLOCK ---
+                # Save results to session state for the report
+                st.session_state['viz_ai_img_result'] = {
+                    "image": image,  # The PIL Image object
+                    "prompt": prompt_text,
+                    "analysis": ai_response,
+                    "model": model_selection
+                }
+                st.success("✅ Analysis saved to the session report.")
+            except requests.exceptions.HTTPError as http_err:
+                st.error(f"HTTP error occurred: {http_err} - {response.text}")
+            except Exception as e:
+                st.error(f"An error occurred: {e}")
+    elif analyze_button and not uploaded_image:
+        st.warning("Please upload an image first.")

src/viz_report.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import streamlit as st
+import pandas as pd
+from fpdf import FPDF
+import matplotlib.pyplot as plt
+import seaborn as sns
+import io
+from datetime import datetime
+import tempfile # To handle temporary files
+import os       # To interact with the operating system
+class ComprehensivePDF(FPDF):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.user_name = ""
+    def header(self):
+        self.set_font('Arial', 'B', 12)
+        self.cell(0, 10, 'Visio AI - Comprehensive Analysis Report', 0, 1, 'C')
+        self.ln(5)
+    def footer(self):
+        self.set_y(-15)
+        self.set_font('Arial', 'I', 8)
+        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
+    def add_title_page(self, user_name=""):
+        self.user_name = user_name
+        self.add_page()
+        self.set_font('Arial', 'B', 24)
+        self.cell(0, 20, 'Comprehensive Analysis Report', 0, 1, 'C')
+        self.ln(20)
+        self.set_font('Arial', '', 12)
+        if self.user_name:
+            self.cell(0, 10, f"Prepared for: {self.user_name}", 0, 1, 'C')
+        self.cell(0, 10, f"Date Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", 0, 1, 'C')
+        self.ln(20)
+        try:
+            self.image("images/favicon.png", x=85, y=100, w=40)
+        except FileNotFoundError:
+            self.set_font('Arial', 'I', 10)
+            self.cell(0, 10, "[Logo Not Found]", 0, 1, 'C')
+        self.set_y(-40)
+        self.set_font('Arial', 'I', 10)
+        self.cell(0, 10, "Generated by Visio AI", 0, 1, 'C')
+    def add_section_title(self, title):
+        self.add_page()
+        self.set_font('Arial', 'B', 16)
+        self.cell(0, 10, title, 0, 1, 'L')
+        self.ln(5)
+    def add_text_content(self, title, content):
+        self.set_font('Arial', 'B', 12)
+        self.cell(0, 10, title, 0, 1, 'L')
+        self.set_font('Courier', '', 10)
+        self.multi_cell(0, 5, content)
+        self.ln(5)
+    # --- NEW BULLETPROOF HELPER FUNCTION ---
+    def add_image_from_object(self, image_object, width):
+        """
+        Saves a matplotlib figure or PIL image to a temporary file
+        and adds it to the PDF, then deletes the file.
+        """
+        fp = None
+        try:
+            # Create a named temporary file
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as fp:
+                # Save the image object to the temporary file
+                image_object.save(fp, format="PNG")
+                temp_path = fp.name
+            # Add the image to the PDF from the temporary file path
+            self.image(temp_path, w=width)
+        finally:
+            # Ensure the temporary file is deleted
+            if fp and os.path.exists(fp.name):
+                os.remove(fp.name)
+def render_report_page():
+    st.markdown("<h2 style='text-align: center; color: #4A90E2;'>📄 Comprehensive Report Generator</h2>", unsafe_allow_html=True)
+    st.markdown("<p style='text-align: center;'>Generate a complete PDF report of your session's activities.</p>", unsafe_allow_html=True)
+    user_name = st.text_input("Enter your name (optional, will be shown on the report cover)")
+    if st.button("Generate Full Report 🚀"):
+        if 'updated_df' not in st.session_state and 'viz_ai_img_result' not in st.session_state and 'word_cloud_result' not in st.session_state:
+            st.warning("There is no activity to report. Please train a model or use the AI tools first.")
+            return
+        with st.spinner("Assembling your comprehensive report..."):
+            pdf = ComprehensivePDF()
+            pdf.add_title_page(user_name)
+            # --- Section 1: Data Analysis & ML ---
+            if 'updated_df' in st.session_state and st.session_state.updated_df is not None:
+                df = st.session_state.updated_df
+                pdf.add_section_title("1. Dataset & Machine Learning Analysis")
+                buffer = io.StringIO()
+                df.info(buf=buffer)
+                pdf.add_text_content("Dataset Information", buffer.getvalue())
+                pdf.add_text_content("Numerical Summary", df.describe(include='number').to_string())
+                if not df.select_dtypes(include='object').empty:
+                    pdf.add_text_content("Categorical Summary", df.describe(include='object').to_string())
+                if 'trained_model' in st.session_state and st.session_state.trained_model is not None:
+                    metrics = st.session_state.model_metrics
+                    algo_name = st.session_state.selected_algo_name
+                    pdf.set_font('Arial', 'B', 12)
+                    pdf.cell(0, 10, f"Machine Learning Model: {algo_name}", 0, 1, 'L')
+                    metrics_str = ""
+                    for key, val in metrics.items():
+                        if key != 'Confusion Matrix':
+                            metrics_str += f"{key}: {val:.4f}\n" if isinstance(val, float) else f"{key}: {val}\n"
+                    pdf.add_text_content("Performance Metrics", metrics_str)
+                    if 'Confusion Matrix' in metrics:
+                        fig_cm, ax_cm = plt.subplots()
+                        sns.heatmap(metrics['Confusion Matrix'], annot=True, fmt='d', cmap='Blues', ax=ax_cm)
+                        ax_cm.set_title('Confusion Matrix')
+                        pdf.set_font('Arial', 'B', 12)
+                        pdf.cell(0, 10, "Confusion Matrix", 0, 1, 'L')
+                        pdf.add_image_from_object(fig_cm, width=170)
+                        plt.close(fig_cm) # Close the figure to free memory
+            # --- Section 2: Viz AI Image Analysis ---
+            if 'viz_ai_img_result' in st.session_state and st.session_state.viz_ai_img_result is not None:
+                img_result = st.session_state.viz_ai_img_result
+                pdf.add_section_title("2. Viz AI Image Analysis")
+                pdf.add_image_from_object(img_result['image'], width=150)
+                pdf.ln(5)
+                pdf.add_text_content("Model Used", img_result['model'])
+                pdf.add_text_content("User Prompt", img_result['prompt'])
+                pdf.add_text_content("AI Analysis", img_result['analysis'])
+            # --- Section 3: Word Cloud ---
+            if 'word_cloud_result' in st.session_state and st.session_state.word_cloud_result is not None:
+                wc_result = st.session_state.word_cloud_result
+                pdf.add_section_title("3. Word Cloud Analysis")
+                pdf.add_text_content("Source File", wc_result['source'])
+                pdf.add_text_content("Settings", wc_result['settings'])
+                pdf.set_font('Arial', 'B', 12)
+                pdf.cell(0, 10, "Generated Word Cloud", 0, 1, 'L')
+                pdf.add_image_from_object(wc_result['figure'], width=170)
+                plt.close(wc_result['figure']) # Close the figure to free memory
+            # --- Generate Download ---
+            pdf_output = pdf.output()
+            st.success("Report Generated!")
+            st.download_button(
+                label="📥 Download Full Report",
+                data=pdf_output,
+                file_name=f"VisioAI_Comprehensive_Report_{datetime.now().strftime('%Y%m%d')}.pdf",
+                mime="application/pdf"
+            )

src/word_cloud.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import streamlit as st
+import pandas as pd
+from wordcloud import WordCloud, STOPWORDS
+import matplotlib.pyplot as plt
+import io
+# Libraries for file processing
+import PyPDF2
+from docx import Document
+def extract_text_from_file(uploaded_file):
+    """Extracts text from various file formats."""
+    if uploaded_file.name.endswith('.pdf'):
+        pdf_reader = PyPDF2.PdfReader(uploaded_file)
+        text = ""
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+        return text
+    elif uploaded_file.name.endswith('.docx'):
+        doc = Document(uploaded_file)
+        text = "\n".join([para.text for para in doc.paragraphs])
+        return text
+    elif uploaded_file.name.endswith('.txt'):
+        # To read bytes, decode it to string
+        return uploaded_file.read().decode('utf-8')
+    elif uploaded_file.name.endswith(('.csv', '.xlsx')):
+        return pd.read_excel(uploaded_file) if uploaded_file.name.endswith('.xlsx') else pd.read_csv(uploaded_file)
+    return None
+def render_word_cloud_page():
+    """
+    Renders the UI and logic for the Word Cloud Generator page.
+    """
+    st.markdown("<h2 style='text-align: center; color: #4A90E2;'>😶‍🌫️ Word Cloud Generator</h2>", unsafe_allow_html=True)
+    st.markdown("<p style='text-align: center;'>Create beautiful word clouds from your text data. Supports PDF, DOCX, TXT, CSV, and Excel files.</p>", unsafe_allow_html=True)
+    uploaded_file = st.file_uploader(
+        "Choose a file",
+        type=['pdf', 'docx', 'txt', 'csv', 'xlsx']
+    )
+    text_data = None
+    if uploaded_file is not None:
+        with st.spinner("Processing file..."):
+            extracted_content = extract_text_from_file(uploaded_file)
+        if isinstance(extracted_content, pd.DataFrame):
+            st.info("CSV/Excel file detected. Please select the column to generate the word cloud from.")
+            df = extracted_content
+            text_columns = df.select_dtypes(include=['object', 'string']).columns.tolist()
+            if not text_columns:
+                st.error("No text-based columns found in the uploaded file.")
+                return
+            column_to_use = st.selectbox("Select a column:", text_columns)
+            if column_to_use:
+                text_data = " ".join(df[column_to_use].dropna().astype(str))
+        else:
+            text_data = extracted_content
+    if text_data:
+        st.markdown("---")
+        st.subheader("Customize Your Word Cloud")
+        col1, col2 = st.columns(2)
+        with col1:
+            colormap = st.selectbox("Color Scheme", ["viridis", "plasma", "inferno", "magma", "cividis", "Greys", "Purples", "Blues", "Greens", "Oranges", "Reds"])
+            max_words = st.slider("Maximum Words", 50, 500, 200)
+            bg_color = st.color_picker("Background Color", "#FFFFFF")
+        with col2:
+            contour_width = st.slider("Contour Width", 0.0, 5.0, 0.0, 0.1)
+            contour_color = st.color_picker("Contour Color", "#0000FF")
+            add_stopwords = st.text_area("Add Custom Stopwords (comma-separated)")
+        if st.button("Generate Word Cloud ✨"):
+            with st.spinner("Creating your masterpiece..."):
+                custom_stopwords = set(STOPWORDS)
+                if add_stopwords:
+                    custom_stopwords.update(add_stopwords.lower().split(','))
+                try:
+                    wordcloud = WordCloud(
+                        width=1200,
+                        height=600,
+                        background_color=bg_color,
+                        stopwords=custom_stopwords,
+                        max_words=max_words,
+                        colormap=colormap,
+                        contour_width=contour_width,
+                        contour_color=contour_color
+                    ).generate(text_data)
+                    st.markdown("---")
+                    st.subheader("Generated Word Cloud")
+                    fig, ax = plt.subplots(figsize=(12, 6))
+                    ax.imshow(wordcloud, interpolation='bilinear')
+                    ax.axis('off')
+                    st.pyplot(fig)
+                    # --- ADD THIS BLOCK ---
+                    st.session_state['word_cloud_result'] = {
+                        "figure": fig, # The matplotlib figure object
+                        "source": uploaded_file.name,
+                        "settings": f"Colors: {colormap}, Max Words: {max_words}"
+                    }
+                    st.success("✅ Word cloud saved to the session report.")
+                    # ----------------------
+                    # Create a download button for the image
+                    buf = io.BytesIO()
+                    fig.savefig(buf, format="png", bbox_inches='tight')
+                    st.download_button(
+                        label="📥 Download Image",
+                        data=buf.getvalue(),
+                        file_name="word_cloud.png",
+                        mime="image/png"
+                    )
+                except Exception as e:
+                    st.error(f"An error occurred while generating the word cloud: {e}")