Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- src/app.py +811 -0
- src/calculator.py +113 -0
- src/notepad_lite.py +49 -0
- src/viz_ai_img.py +110 -0
- src/viz_report.py +164 -0
- src/word_cloud.py +122 -0
src/app.py
ADDED
@@ -0,0 +1,811 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import webbrowser
|
3 |
+
import pandas as pd
|
4 |
+
import seaborn as sns
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import plotly.express as px
|
7 |
+
import pickle
|
8 |
+
import base64
|
9 |
+
import io
|
10 |
+
import plotly.graph_objects as go
|
11 |
+
#import viz_report
|
12 |
+
import viz_ai_img
|
13 |
+
import word_cloud
|
14 |
+
import notepad_lite
|
15 |
+
import calculator
|
16 |
+
|
17 |
+
|
18 |
+
# Import ML libraries
|
19 |
+
from sklearn.model_selection import train_test_split
|
20 |
+
from sklearn.preprocessing import LabelEncoder, StandardScaler
|
21 |
+
from sklearn.linear_model import LinearRegression, LogisticRegression
|
22 |
+
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
23 |
+
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
|
24 |
+
from sklearn.svm import SVC, SVR
|
25 |
+
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
|
26 |
+
from sklearn.naive_bayes import GaussianNB # For classification
|
27 |
+
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
28 |
+
import numpy as np # For numerical operations, especially with metrics
|
29 |
+
|
30 |
+
st.set_page_config("Visio AI", page_icon="images/favicon.png", layout='wide')
|
31 |
+
|
32 |
+
st.markdown("<h1 style='text-align: center; color: #4A90E2;'>📊 VISIO AI</h1>", unsafe_allow_html=True)
|
33 |
+
st.markdown("<h4 style='text-align: center; color: orange;'>Machine Learning and Data Analysis Platform</h4>", unsafe_allow_html=True)
|
34 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
35 |
+
#-------------------------------------------------#
|
36 |
+
|
37 |
+
# --- Session State Initialization ---
|
38 |
+
if 'updated_df' not in st.session_state:
|
39 |
+
st.session_state.updated_df = None
|
40 |
+
if 'original_df_uploaded' not in st.session_state:
|
41 |
+
st.session_state.original_df_uploaded = False
|
42 |
+
if 'last_uploaded_file_name' not in st.session_state:
|
43 |
+
st.session_state.last_uploaded_file_name = None
|
44 |
+
if 'X_train' not in st.session_state:
|
45 |
+
st.session_state.X_train = None
|
46 |
+
if 'X_test' not in st.session_state:
|
47 |
+
st.session_state.X_test = None
|
48 |
+
if 'y_train' not in st.session_state:
|
49 |
+
st.session_state.y_train = None
|
50 |
+
if 'y_test' not in st.session_state:
|
51 |
+
st.session_state.y_test = None
|
52 |
+
if 'target_column' not in st.session_state:
|
53 |
+
st.session_state.target_column = None
|
54 |
+
if 'feature_columns' not in st.session_state:
|
55 |
+
st.session_state.feature_columns = None
|
56 |
+
if 'problem_type' not in st.session_state:
|
57 |
+
st.session_state.problem_type = None # 'classification' or 'regression'
|
58 |
+
if 'trained_model' not in st.session_state:
|
59 |
+
st.session_state.trained_model = None
|
60 |
+
if 'model_metrics' not in st.session_state:
|
61 |
+
st.session_state.model_metrics = None
|
62 |
+
if 'scaler' not in st.session_state:
|
63 |
+
st.session_state.scaler = None
|
64 |
+
|
65 |
+
|
66 |
+
# Navigation Bar
|
67 |
+
col1, col2, col3, col4, col5 = st.columns((1, 1, 1, 1, 1))
|
68 |
+
with col1:
|
69 |
+
about_url = "https://jaiho-labs.onrender.com/pages/products_resources/docs/visio_ai_docs/visio_about.html"
|
70 |
+
if st.button('About'):
|
71 |
+
st.markdown("check out this [link](%s)" % about_url)
|
72 |
+
|
73 |
+
#webbrowser.open_new_tab(about_url)
|
74 |
+
|
75 |
+
with col2:
|
76 |
+
guide_url = "https://jaiho-labs.onrender.com/pages/products_resources/docs/visio_ai_docs/visio_helper.html"
|
77 |
+
if st.button('Guide'):
|
78 |
+
st.markdown("check out this [link](%s)" % guide_url)
|
79 |
+
|
80 |
+
with col3:
|
81 |
+
docs_url = "https://jaiho-labs.onrender.com/pages/products_resources/docs/visio_ai_docs/visio_docs.html"
|
82 |
+
if st.button('Docs'):
|
83 |
+
st.markdown("check out this [link](%s)" % docs_url)
|
84 |
+
|
85 |
+
with col4:
|
86 |
+
joinus_url = "https://jaiho-labs.onrender.com/pages/products_resources/docs/visio_ai_docs/visio_join.html"
|
87 |
+
if st.button('Join Us'):
|
88 |
+
st.markdown("check out this [link](%s)" % joinus_url)
|
89 |
+
|
90 |
+
with col5:
|
91 |
+
elite_access = "https://jaiho-labs.onrender.com/pages/products_resources/docs/visio_ai_docs/get_elite_access.html"
|
92 |
+
if st.button('Get Elite Access'):
|
93 |
+
st.markdown("check out this [link](%s)" % elite_access)
|
94 |
+
|
95 |
+
#-------------------------------------------------#
|
96 |
+
|
97 |
+
# Top Expander Columns (Data Operations & Algorithms, Select Plot Type, Pre Analysis)
|
98 |
+
col11, col12, col13 = st.columns([1, 1, 1])
|
99 |
+
|
100 |
+
# --- Data Operations & Algorithms Expander ---
|
101 |
+
with col11:
|
102 |
+
with st.expander("⚙️ Data Operations & Algorithms", expanded=False):
|
103 |
+
if st.session_state.updated_df is not None:
|
104 |
+
st.markdown("#### 1. Define Target Variable and Problem Type")
|
105 |
+
all_columns = st.session_state.updated_df.columns.tolist()
|
106 |
+
target_column = st.selectbox("Select your **Target Column (Y)**:", ["--- Select ---"] + all_columns, key="target_col_select")
|
107 |
+
|
108 |
+
if target_column != "--- Select ---":
|
109 |
+
st.session_state.target_column = target_column
|
110 |
+
# Heuristic to guess problem type
|
111 |
+
if st.session_state.updated_df[target_column].dtype in ['int64', 'float64']:
|
112 |
+
if st.session_state.updated_df[target_column].nunique() < 20 and st.session_state.updated_df[target_column].dtype == 'int64':
|
113 |
+
st.session_state.problem_type = 'classification'
|
114 |
+
st.info(f"Detected **Classification** problem based on target column '{target_column}' (integer with few unique values).")
|
115 |
+
else:
|
116 |
+
st.session_state.problem_type = 'regression'
|
117 |
+
st.info(f"Detected **Regression** problem based on target column '{target_column}' (numerical).")
|
118 |
+
elif st.session_state.updated_df[target_column].dtype == 'object' or st.session_state.updated_df[target_column].dtype == 'bool':
|
119 |
+
st.session_state.problem_type = 'classification'
|
120 |
+
st.info(f"Detected **Classification** problem based on target column '{target_column}' (categorical).")
|
121 |
+
else:
|
122 |
+
st.session_state.problem_type = None
|
123 |
+
st.warning("Could not definitively determine problem type. Please proceed with caution.")
|
124 |
+
|
125 |
+
st.markdown("---")
|
126 |
+
st.markdown("#### 2. Select Independent Variables (Features)")
|
127 |
+
|
128 |
+
available_features = [col for col in all_columns if col != target_column]
|
129 |
+
feature_columns = st.multiselect("Select your **Independent Variables (X)**:", available_features, default=available_features, key="feature_select")
|
130 |
+
|
131 |
+
if feature_columns:
|
132 |
+
st.session_state.feature_columns = feature_columns
|
133 |
+
st.markdown("---")
|
134 |
+
st.markdown("#### 3. Split Data into Train and Test Sets")
|
135 |
+
|
136 |
+
test_size = st.slider("Select Test Set Size:", min_value=0.1, max_value=0.5, value=0.2, step=0.05, key="test_size_slider")
|
137 |
+
random_state = st.number_input("Random State (for reproducibility):", value=42, step=1, key="random_state_input")
|
138 |
+
|
139 |
+
# Use only selected features
|
140 |
+
features = st.session_state.updated_df[feature_columns]
|
141 |
+
target = st.session_state.updated_df[target_column]
|
142 |
+
|
143 |
+
# Handle categorical features by encoding
|
144 |
+
for col in features.select_dtypes(include=['object', 'bool']).columns:
|
145 |
+
le = LabelEncoder()
|
146 |
+
features[col] = le.fit_transform(features[col].astype(str))
|
147 |
+
|
148 |
+
# Handle numerical features by scaling
|
149 |
+
numerical_cols = features.select_dtypes(include=['number']).columns
|
150 |
+
if not numerical_cols.empty:
|
151 |
+
scaler = StandardScaler()
|
152 |
+
features[numerical_cols] = scaler.fit_transform(features[numerical_cols])
|
153 |
+
st.session_state.scaler = scaler # Save the scaler
|
154 |
+
|
155 |
+
try:
|
156 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
157 |
+
features, target, test_size=test_size, random_state=random_state,
|
158 |
+
stratify=target if st.session_state.problem_type == 'classification' else None
|
159 |
+
)
|
160 |
+
st.session_state.X_train = X_train
|
161 |
+
st.session_state.X_test = X_test
|
162 |
+
st.session_state.y_train = y_train
|
163 |
+
st.session_state.y_test = y_test
|
164 |
+
st.success(f"Data split successfully! Training: {len(X_train)} samples, Testing: {len(X_test)} samples.")
|
165 |
+
|
166 |
+
st.markdown("---")
|
167 |
+
st.markdown("#### 4. Select Machine Learning Algorithm")
|
168 |
+
|
169 |
+
if st.session_state.problem_type == 'classification':
|
170 |
+
algo_options = {
|
171 |
+
"Logistic Regression": LogisticRegression(random_state=random_state),
|
172 |
+
"Decision Tree Classifier": DecisionTreeClassifier(random_state=random_state),
|
173 |
+
"Random Forest Classifier": RandomForestClassifier(random_state=random_state),
|
174 |
+
"Support Vector Classifier (SVC)": SVC(random_state=random_state),
|
175 |
+
"K-Nearest Neighbors Classifier": KNeighborsClassifier(),
|
176 |
+
"Gaussian Naive Bayes": GaussianNB()
|
177 |
+
}
|
178 |
+
algo_name = st.selectbox("Choose a Classification Algorithm:", list(algo_options.keys()), key="classification_algo_select")
|
179 |
+
selected_algo = algo_options.get(algo_name)
|
180 |
+
|
181 |
+
elif st.session_state.problem_type == 'regression':
|
182 |
+
algo_options = {
|
183 |
+
"Linear Regression": LinearRegression(),
|
184 |
+
"Decision Tree Regressor": DecisionTreeRegressor(random_state=random_state),
|
185 |
+
"Random Forest Regressor": RandomForestRegressor(random_state=random_state),
|
186 |
+
"Support Vector Regressor (SVR)": SVR(),
|
187 |
+
"K-Nearest Neighbors Regressor": KNeighborsRegressor()
|
188 |
+
}
|
189 |
+
algo_name = st.selectbox("Choose a Regression Algorithm:", list(algo_options.keys()), key="regression_algo_select")
|
190 |
+
selected_algo = algo_options.get(algo_name)
|
191 |
+
else:
|
192 |
+
st.warning("Please define target column and problem type to select an algorithm.")
|
193 |
+
selected_algo = None
|
194 |
+
|
195 |
+
if selected_algo:
|
196 |
+
st.info(f"Selected Algorithm: **{algo_name}**")
|
197 |
+
st.session_state.selected_algo = selected_algo
|
198 |
+
st.session_state.selected_algo_name = algo_name
|
199 |
+
st.markdown("---")
|
200 |
+
if st.button("🚀 Train Model"):
|
201 |
+
if st.session_state.X_train is not None and st.session_state.y_train is not None:
|
202 |
+
try:
|
203 |
+
with st.spinner(f"Training {st.session_state.selected_algo_name}..."):
|
204 |
+
st.session_state.selected_algo.fit(st.session_state.X_train, st.session_state.y_train)
|
205 |
+
st.session_state.trained_model = st.session_state.selected_algo
|
206 |
+
st.success(f"Model **{st.session_state.selected_algo_name}** trained successfully!")
|
207 |
+
|
208 |
+
y_pred = st.session_state.trained_model.predict(st.session_state.X_test)
|
209 |
+
metrics = {}
|
210 |
+
if st.session_state.problem_type == 'classification':
|
211 |
+
metrics['Accuracy'] = accuracy_score(st.session_state.y_test, y_pred)
|
212 |
+
metrics['Precision'] = precision_score(st.session_state.y_test, y_pred, average='weighted', zero_division=0)
|
213 |
+
metrics['Recall'] = recall_score(st.session_state.y_test, y_pred, average='weighted', zero_division=0)
|
214 |
+
metrics['F1 Score'] = f1_score(st.session_state.y_test, y_pred, average='weighted', zero_division=0)
|
215 |
+
metrics['Confusion Matrix'] = confusion_matrix(st.session_state.y_test, y_pred)
|
216 |
+
elif st.session_state.problem_type == 'regression':
|
217 |
+
metrics['Mean Squared Error'] = mean_squared_error(st.session_state.y_test, y_pred)
|
218 |
+
metrics['R2 Score'] = r2_score(st.session_state.y_test, y_pred)
|
219 |
+
st.session_state.model_metrics = metrics
|
220 |
+
st.rerun()
|
221 |
+
except Exception as e:
|
222 |
+
st.error(f"Error training model: {e}")
|
223 |
+
else:
|
224 |
+
st.warning("Please split the data first before training the model.")
|
225 |
+
else:
|
226 |
+
st.warning("Please select a target column and problem type to enable algorithm selection.")
|
227 |
+
except Exception as e:
|
228 |
+
st.error(f"Error splitting data or preparing features: {e}")
|
229 |
+
st.info("Ensure your data is clean and suitable for splitting (e.g., no remaining NaN values after imputation).")
|
230 |
+
else:
|
231 |
+
st.warning("Please select at least one independent variable.")
|
232 |
+
else:
|
233 |
+
st.info("Please select a target column to proceed with data operations.")
|
234 |
+
else:
|
235 |
+
st.info("Please upload a dataset first to access Data Operations & Algorithms.")
|
236 |
+
|
237 |
+
|
238 |
+
with col12:
|
239 |
+
with st.expander("🎨 Select Plot Type", expanded=False):
|
240 |
+
if st.session_state.updated_df is not None:
|
241 |
+
df = st.session_state.updated_df
|
242 |
+
numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
|
243 |
+
categorical_cols = df.select_dtypes(include='object').columns.tolist()
|
244 |
+
|
245 |
+
plot_type = st.selectbox("Select a plot type", ["---Select---", "Bar Chart", "Histogram", "Scatter Plot", "Box Plot", "Heatmap",
|
246 |
+
"Line Chart", "Pie Chart", "Violin Plot", "Pair Plot",
|
247 |
+
"3D Scatter Plot", "3D Surface Plot"])
|
248 |
+
|
249 |
+
if plot_type == "Bar Chart":
|
250 |
+
st.info("A bar chart shows counts of categories within a column.")
|
251 |
+
selected_col = st.selectbox("Select a categorical column", categorical_cols)
|
252 |
+
if st.button("Generate Bar Chart"):
|
253 |
+
if selected_col:
|
254 |
+
fig = px.bar(df, x=selected_col, title=f'Bar Chart of {selected_col}', color=selected_col)
|
255 |
+
st.plotly_chart(fig, use_container_width=True)
|
256 |
+
|
257 |
+
elif plot_type == "Histogram":
|
258 |
+
st.info("A histogram shows the distribution of a numerical column.")
|
259 |
+
selected_col = st.selectbox("Select a numerical column", numerical_cols)
|
260 |
+
if st.button("Generate Histogram"):
|
261 |
+
if selected_col:
|
262 |
+
fig = px.histogram(df, x=selected_col, title=f'Histogram of {selected_col}')
|
263 |
+
st.plotly_chart(fig, use_container_width=True)
|
264 |
+
|
265 |
+
elif plot_type == "Scatter Plot":
|
266 |
+
st.info("A scatter plot shows the relationship between two numerical columns.")
|
267 |
+
x_col = st.selectbox("Select X-axis column", numerical_cols, key='scatter_x')
|
268 |
+
y_col = st.selectbox("Select Y-axis column", numerical_cols, key='scatter_y')
|
269 |
+
if st.button("Generate Scatter Plot"):
|
270 |
+
if x_col and y_col:
|
271 |
+
fig = px.scatter(df, x=x_col, y=y_col, title=f'Scatter Plot of {x_col} vs {y_col}')
|
272 |
+
st.plotly_chart(fig, use_container_width=True)
|
273 |
+
|
274 |
+
elif plot_type == "Box Plot":
|
275 |
+
st.info("A box plot shows the distribution of a numerical column grouped by a categorical column.")
|
276 |
+
num_col = st.selectbox("Select a numerical column", numerical_cols, key='box_num')
|
277 |
+
cat_col = st.selectbox("Select a categorical column for grouping", categorical_cols, key='box_cat')
|
278 |
+
if st.button("Generate Box Plot"):
|
279 |
+
if num_col and cat_col:
|
280 |
+
fig = px.box(df, x=cat_col, y=num_col, title=f'Box Plot of {num_col} by {cat_col}', color=cat_col)
|
281 |
+
st.plotly_chart(fig, use_container_width=True)
|
282 |
+
|
283 |
+
elif plot_type == "Heatmap":
|
284 |
+
st.info("A heatmap shows the correlation between all numerical columns.")
|
285 |
+
if st.button("Generate Heatmap"):
|
286 |
+
corr = df[numerical_cols].corr()
|
287 |
+
fig = px.imshow(corr, text_auto=True, title='Correlation Heatmap')
|
288 |
+
st.plotly_chart(fig, use_container_width=True)
|
289 |
+
|
290 |
+
elif plot_type == "Line Chart":
|
291 |
+
st.info("A line chart shows trends over time or ordered categories.")
|
292 |
+
x_col = st.selectbox("Select X-axis column", df.columns, key='line_x')
|
293 |
+
y_col = st.selectbox("Select Y-axis (numerical) column", numerical_cols, key='line_y')
|
294 |
+
if st.button("Generate Line Chart"):
|
295 |
+
if x_col and y_col:
|
296 |
+
fig = px.line(df, x=x_col, y=y_col, title=f'Line Chart of {y_col} over {x_col}')
|
297 |
+
st.plotly_chart(fig, use_container_width=True)
|
298 |
+
|
299 |
+
elif plot_type == "Pie Chart":
|
300 |
+
st.info("A pie chart shows proportions of categories within a column.")
|
301 |
+
selected_col = st.selectbox("Select a categorical column for Pie Chart", categorical_cols, key='pie_col')
|
302 |
+
if st.button("Generate Pie Chart"):
|
303 |
+
if selected_col:
|
304 |
+
pie_data = df[selected_col].value_counts().reset_index()
|
305 |
+
pie_data.columns = [selected_col, 'Count']
|
306 |
+
fig = px.pie(pie_data, names=selected_col, values='Count', title=f'Pie Chart of {selected_col}')
|
307 |
+
st.plotly_chart(fig, use_container_width=True)
|
308 |
+
|
309 |
+
elif plot_type == "Violin Plot":
|
310 |
+
st.info("A violin plot shows the distribution of a numerical column by categories.")
|
311 |
+
num_col = st.selectbox("Select a numerical column", numerical_cols, key='violin_num')
|
312 |
+
cat_col = st.selectbox("Select a categorical column for grouping", categorical_cols, key='violin_cat')
|
313 |
+
if st.button("Generate Violin Plot"):
|
314 |
+
if num_col and cat_col:
|
315 |
+
fig = px.violin(df, x=cat_col, y=num_col, box=True, points="all", title=f'Violin Plot of {num_col} by {cat_col}')
|
316 |
+
st.plotly_chart(fig, use_container_width=True)
|
317 |
+
|
318 |
+
elif plot_type == "Pair Plot":
|
319 |
+
st.info("A pair plot shows scatter plots for all combinations of numerical columns.")
|
320 |
+
if st.button("Generate Pair Plot"):
|
321 |
+
fig = px.scatter_matrix(df[numerical_cols], dimensions=numerical_cols, title='Pair Plot of Numerical Features')
|
322 |
+
st.plotly_chart(fig, use_container_width=True)
|
323 |
+
|
324 |
+
elif plot_type == "3D Scatter Plot":
|
325 |
+
st.info("A 3D scatter plot shows the relationship between three numerical columns.")
|
326 |
+
x_col = st.selectbox("Select X-axis column", numerical_cols, key='3d_scatter_x')
|
327 |
+
y_col = st.selectbox("Select Y-axis column", numerical_cols, key='3d_scatter_y')
|
328 |
+
z_col = st.selectbox("Select Z-axis column", numerical_cols, key='3d_scatter_z')
|
329 |
+
color_col = st.selectbox("Optional: Select a column for color grouping (optional)", df.columns, key='3d_scatter_color')
|
330 |
+
if st.button("Generate 3D Scatter Plot"):
|
331 |
+
if x_col and y_col and z_col:
|
332 |
+
fig = px.scatter_3d(df, x=x_col, y=y_col, z=z_col, color=color_col if color_col else None,
|
333 |
+
title=f'3D Scatter Plot: {x_col} vs {y_col} vs {z_col}')
|
334 |
+
st.plotly_chart(fig, use_container_width=True)
|
335 |
+
|
336 |
+
elif plot_type == "3D Surface Plot":
|
337 |
+
st.info("A 3D surface plot shows a continuous surface over two variables.")
|
338 |
+
x_col = st.selectbox("Select X-axis column", numerical_cols, key='3d_surface_x')
|
339 |
+
y_col = st.selectbox("Select Y-axis column", numerical_cols, key='3d_surface_y')
|
340 |
+
z_col = st.selectbox("Select Z-axis column", numerical_cols, key='3d_surface_z')
|
341 |
+
if st.button("Generate 3D Surface Plot"):
|
342 |
+
if x_col and y_col and z_col:
|
343 |
+
try:
|
344 |
+
pivot_table = df.pivot_table(index=y_col, columns=x_col, values=z_col, aggfunc='mean')
|
345 |
+
fig = go.Figure(data=[go.Surface(z=pivot_table.values,
|
346 |
+
x=pivot_table.columns,
|
347 |
+
y=pivot_table.index)])
|
348 |
+
fig.update_layout(title=f'3D Surface Plot of {z_col} over {x_col} and {y_col}',
|
349 |
+
scene=dict(
|
350 |
+
xaxis_title=x_col,
|
351 |
+
yaxis_title=y_col,
|
352 |
+
zaxis_title=z_col
|
353 |
+
))
|
354 |
+
st.plotly_chart(fig, use_container_width=True)
|
355 |
+
except Exception as e:
|
356 |
+
st.error(f"Error generating surface plot: {e}")
|
357 |
+
|
358 |
+
|
359 |
+
else:
|
360 |
+
st.info("Please upload a dataset first to generate plots.")
|
361 |
+
|
362 |
+
with col13:
|
363 |
+
with st.expander("📈 Pre Analysis", expanded=False):
|
364 |
+
if st.session_state.updated_df is not None:
|
365 |
+
# Create tabs for different analyses
|
366 |
+
tab1, tab2 = st.tabs(["Statistical Summary", "Dataset Info"])
|
367 |
+
|
368 |
+
with tab1:
|
369 |
+
st.subheader("Statistical Summary (describe)")
|
370 |
+
numeric_df = st.session_state.updated_df.select_dtypes(include=['float64', 'int64'])
|
371 |
+
if not numeric_df.empty:
|
372 |
+
# Display statistical summary
|
373 |
+
st.dataframe(numeric_df.describe())
|
374 |
+
else:
|
375 |
+
st.warning("No numerical columns found in the dataset")
|
376 |
+
|
377 |
+
if st.checkbox("Show additional statistics"):
|
378 |
+
st.write("Skewness:")
|
379 |
+
st.dataframe(numeric_df.skew())
|
380 |
+
st.write("Kurtosis:")
|
381 |
+
st.dataframe(numeric_df.kurtosis())
|
382 |
+
|
383 |
+
with tab2:
|
384 |
+
st.subheader("Dataset Information (info)")
|
385 |
+
# Get DataFrame info
|
386 |
+
buffer = io.StringIO()
|
387 |
+
st.session_state.updated_df.info(buf=buffer)
|
388 |
+
info_str = buffer.getvalue()
|
389 |
+
|
390 |
+
# Display formatted info
|
391 |
+
st.text(info_str)
|
392 |
+
|
393 |
+
st.write("Quick Facts:")
|
394 |
+
col1, col2, col3 = st.columns(3)
|
395 |
+
with col1:
|
396 |
+
st.metric("Total Rows", st.session_state.updated_df.shape[0])
|
397 |
+
with col2:
|
398 |
+
st.metric("Total Columns", st.session_state.updated_df.shape[1])
|
399 |
+
with col3:
|
400 |
+
st.metric("Missing Values", st.session_state.updated_df.isna().sum().sum())
|
401 |
+
|
402 |
+
# Display column types
|
403 |
+
st.write("Column Data Types:")
|
404 |
+
dtypes_df = pd.DataFrame(st.session_state.updated_df.dtypes, columns=['Data Type'])
|
405 |
+
st.dataframe(dtypes_df)
|
406 |
+
else:
|
407 |
+
st.info("Please upload a dataset first.")
|
408 |
+
|
409 |
+
|
410 |
+
#----------------------------------------------------#
|
411 |
+
|
412 |
+
# Sidebar (Keep as is if you are simulating pages in a single file)
|
413 |
+
with st.sidebar:
|
414 |
+
st.markdown('<b>🛠️ Tools</b>', unsafe_allow_html=True)
|
415 |
+
|
416 |
+
# Store the active page in session state
|
417 |
+
if 'current_page' not in st.session_state:
|
418 |
+
st.session_state.current_page = "main"
|
419 |
+
|
420 |
+
if st.button("🏠 Home"):
|
421 |
+
st.session_state.current_page = "main"
|
422 |
+
st.rerun()
|
423 |
+
|
424 |
+
if st.button("📝 Note -- Lite"):
|
425 |
+
st.session_state.current_page = "note_lite"
|
426 |
+
st.rerun()
|
427 |
+
|
428 |
+
if st.button("😶🌫️ WordCloud"):
|
429 |
+
st.session_state.current_page = "word_cloud"
|
430 |
+
st.rerun()
|
431 |
+
|
432 |
+
if st.button("🤖 Viz AI (img)"):
|
433 |
+
st.session_state.current_page = "viz_ai_img"
|
434 |
+
st.rerun()
|
435 |
+
|
436 |
+
if st.button("🧮 Calculator"):
|
437 |
+
st.session_state.current_page = "calculator"
|
438 |
+
st.rerun()
|
439 |
+
|
440 |
+
if st.button("⚙️ Viz Editor"):
|
441 |
+
st.session_state.current_page = "note_edit"
|
442 |
+
# No rerun here — handled differently maybe?
|
443 |
+
|
444 |
+
if st.button("📄 Viz Report"):
|
445 |
+
st.session_state.current_page = "generate_report"
|
446 |
+
st.rerun()
|
447 |
+
|
448 |
+
st.markdown("<hr>",unsafe_allow_html=True)
|
449 |
+
st.markdown("### <center>Other Products</center>", unsafe_allow_html=True)
|
450 |
+
|
451 |
+
|
452 |
+
#---------------------------------------------------------------#
|
453 |
+
|
454 |
+
#---------------------------------------------------------------#
|
455 |
+
|
456 |
+
# Main content columns
|
457 |
+
col_main_left, col_main_right = st.columns([0.6, 0.4]) # Adjusted column widths for better layout
|
458 |
+
|
459 |
+
with col_main_left:
|
460 |
+
st.markdown("<b style='font-size:20px;'>📂 Upload Your Dataset</b>", unsafe_allow_html=True)
|
461 |
+
dataset = st.file_uploader("Choose a dataset file", type=["csv", "xlsx", "txt"], key="file_uploader_main") # Added key
|
462 |
+
|
463 |
+
if dataset is not None:
|
464 |
+
if 'last_uploaded_file_object' not in st.session_state or st.session_state.last_uploaded_file_object != dataset:
|
465 |
+
st.session_state.last_uploaded_file_object = dataset
|
466 |
+
st.session_state.original_df_uploaded = False
|
467 |
+
st.session_state.updated_df = None
|
468 |
+
st.session_state.X_train = st.session_state.X_test = st.session_state.y_train = st.session_state.y_test = None
|
469 |
+
st.session_state.target_column = None
|
470 |
+
st.session_state.feature_columns = None
|
471 |
+
st.session_state.problem_type = None
|
472 |
+
st.session_state.trained_model = None
|
473 |
+
st.session_state.model_metrics = None
|
474 |
+
st.session_state.scaler = None
|
475 |
+
|
476 |
+
|
477 |
+
st.success("✅ File uploaded successfully!")
|
478 |
+
st.write(f"File name: **{dataset.name}**")
|
479 |
+
|
480 |
+
try:
|
481 |
+
if dataset.name.endswith(".csv"):
|
482 |
+
df = pd.read_csv(dataset)
|
483 |
+
elif dataset.name.endswith(".xlsx"):
|
484 |
+
df = pd.read_excel(dataset)
|
485 |
+
elif dataset.name.endswith(".txt"):
|
486 |
+
df = pd.read_csv(dataset, delimiter="\t")
|
487 |
+
else:
|
488 |
+
st.error("Unsupported file type. Please upload a CSV, XLSX, or TXT (tab-separated) file.")
|
489 |
+
df = None
|
490 |
+
|
491 |
+
if df is not None:
|
492 |
+
st.session_state.updated_df = df.copy()
|
493 |
+
st.session_state.original_df_uploaded = True
|
494 |
+
st.rerun()
|
495 |
+
|
496 |
+
except Exception as e:
|
497 |
+
st.error(f"Error reading file: {e}. Please ensure it's a valid CSV, XLSX, or tab-separated TXT.")
|
498 |
+
st.session_state.original_df_uploaded = False
|
499 |
+
st.session_state.updated_df = None
|
500 |
+
|
501 |
+
|
502 |
+
# Original Dataset Preview
|
503 |
+
if st.session_state.original_df_uploaded and st.session_state.updated_df is not None:
|
504 |
+
st.markdown('<div class="dataset-preview">', unsafe_allow_html=True)
|
505 |
+
st.subheader("🔍 Original Dataset Preview")
|
506 |
+
st.dataframe(st.session_state.updated_df, use_container_width=True)
|
507 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
508 |
+
|
509 |
+
# Updated Dataset Preview (after imputation)
|
510 |
+
st.markdown('<div class="dataset-preview">', unsafe_allow_html=True)
|
511 |
+
st.subheader("🔄 Updated Dataset Preview (After Imputation)")
|
512 |
+
st.dataframe(st.session_state.updated_df, use_container_width=True)
|
513 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
514 |
+
|
515 |
+
|
516 |
+
with col_main_right:
|
517 |
+
if st.session_state.updated_df is not None:
|
518 |
+
st.markdown('<div class="section-title">📊 Missing Values Report</div>', unsafe_allow_html=True)
|
519 |
+
null_counts = st.session_state.updated_df.isnull().sum()
|
520 |
+
total_nulls = null_counts.sum()
|
521 |
+
|
522 |
+
if total_nulls == 0:
|
523 |
+
st.success("✅ No null values found in the dataset!")
|
524 |
+
else:
|
525 |
+
st.warning(f"⚠️ Found {total_nulls} null values in the dataset.")
|
526 |
+
st.write(null_counts[null_counts > 0])
|
527 |
+
|
528 |
+
# Automatic Missing Value Handling
|
529 |
+
st.markdown('<div class="section-title">🤖 Automatic Missing Value Handling</div>', unsafe_allow_html=True)
|
530 |
+
|
531 |
+
with st.form("auto_impute_form"):
|
532 |
+
st.write("Apply default handling for all missing values:")
|
533 |
+
auto_impute_option = st.selectbox(
|
534 |
+
"Choose imputation method:",
|
535 |
+
["None", "Mean (Numerical)", "Median (Numerical)", "Mode (All)", "Forward Fill", "Backward Fill"],
|
536 |
+
key="auto_impute_method"
|
537 |
+
)
|
538 |
+
auto_impute_button = st.form_submit_button("Apply Automatic Imputation")
|
539 |
+
|
540 |
+
if auto_impute_button and auto_impute_option != "None":
|
541 |
+
df_to_impute = st.session_state.updated_df.copy()
|
542 |
+
|
543 |
+
if auto_impute_option == "Mean (Numerical)":
|
544 |
+
for col in df_to_impute.select_dtypes(include=['number']).columns:
|
545 |
+
if df_to_impute[col].isnull().sum() > 0:
|
546 |
+
df_to_impute[col].fillna(df_to_impute[col].mean(), inplace=True)
|
547 |
+
elif auto_impute_option == "Median (Numerical)":
|
548 |
+
for col in df_to_impute.select_dtypes(include=['number']).columns:
|
549 |
+
if df_to_impute[col].isnull().sum() > 0:
|
550 |
+
df_to_impute[col].fillna(df_to_impute[col].median(), inplace=True)
|
551 |
+
elif auto_impute_option == "Mode (All)":
|
552 |
+
for col in df_to_impute.columns:
|
553 |
+
if df_to_impute[col].isnull().sum() > 0:
|
554 |
+
if not df_to_impute[col].mode().empty:
|
555 |
+
df_to_impute[col].fillna(df_to_impute[col].mode()[0], inplace=True)
|
556 |
+
else:
|
557 |
+
st.warning(f"Could not compute mode for column '{col}'. Skipping.")
|
558 |
+
elif auto_impute_option == "Forward Fill":
|
559 |
+
df_to_impute.fillna(method='ffill', inplace=True)
|
560 |
+
elif auto_impute_option == "Backward Fill":
|
561 |
+
df_to_impute.fillna(method='bfill', inplace=True)
|
562 |
+
|
563 |
+
st.session_state.updated_df = df_to_impute
|
564 |
+
st.success(f"🎉 Missing values have been handled automatically using **{auto_impute_option}**!")
|
565 |
+
st.rerun()
|
566 |
+
|
567 |
+
# Manual Missing Value Handling
|
568 |
+
st.markdown('<div class="section-title">🛠️ Manual Missing Value Handling</div>', unsafe_allow_html=True)
|
569 |
+
|
570 |
+
cols_with_missing = st.session_state.updated_df.columns[st.session_state.updated_df.isnull().any()].tolist()
|
571 |
+
|
572 |
+
if cols_with_missing:
|
573 |
+
selected_col_manual = st.selectbox(
|
574 |
+
"Select a column to manually handle missing values:",
|
575 |
+
["--- Select a Column ---"] + cols_with_missing,
|
576 |
+
key="manual_col_select"
|
577 |
+
)
|
578 |
+
|
579 |
+
if selected_col_manual != "--- Select a Column ---":
|
580 |
+
col_dtype = st.session_state.updated_df[selected_col_manual].dtype
|
581 |
+
num_missing = st.session_state.updated_df[selected_col_manual].isnull().sum()
|
582 |
+
st.write(f"Column: **{selected_col_manual}** (Missing values: **{num_missing}**)")
|
583 |
+
|
584 |
+
with st.form(key=f"manual_impute_form_{selected_col_manual}"):
|
585 |
+
fill_value_to_apply = None
|
586 |
+
if col_dtype == "object":
|
587 |
+
manual_fill_option = st.selectbox(
|
588 |
+
f"Choose a method for '{selected_col_manual}'",
|
589 |
+
["Mode", "Fill with custom value"],
|
590 |
+
key=f"cat_method_{selected_col_manual}"
|
591 |
+
)
|
592 |
+
if manual_fill_option == "Fill with custom value":
|
593 |
+
fill_value_to_apply = st.text_input(f"Enter the custom value to fill for '{selected_col_manual}'", key=f"cat_value_{selected_col_manual}")
|
594 |
+
elif manual_fill_option == "Mode":
|
595 |
+
if not st.session_state.updated_df[selected_col_manual].mode().empty:
|
596 |
+
fill_value_to_apply = st.session_state.updated_df[selected_col_manual].mode()[0]
|
597 |
+
else:
|
598 |
+
st.warning(f"Mode cannot be calculated for {selected_col_manual}. Please enter a custom value.")
|
599 |
+
|
600 |
+
else:
|
601 |
+
manual_fill_option = st.selectbox(
|
602 |
+
f"Choose a method for '{selected_col_manual}'",
|
603 |
+
["Mean", "Median", "Mode", "Fill with custom value"],
|
604 |
+
key=f"num_method_{selected_col_manual}"
|
605 |
+
)
|
606 |
+
if manual_fill_option == "Fill with custom value":
|
607 |
+
fill_value_to_apply = st.number_input(f"Enter the custom value to fill for '{selected_col_manual}'", value=0.0, key=f"num_value_{selected_col_manual}")
|
608 |
+
elif manual_fill_option == "Mean":
|
609 |
+
fill_value_to_apply = st.session_state.updated_df[selected_col_manual].mean()
|
610 |
+
elif manual_fill_option == "Median":
|
611 |
+
fill_value_to_apply = st.session_state.updated_df[selected_col_manual].median()
|
612 |
+
elif manual_fill_option == "Mode":
|
613 |
+
if not st.session_state.updated_df[selected_col_manual].mode().empty:
|
614 |
+
fill_value_to_apply = st.session_state.updated_df[selected_col_manual].mode()[0]
|
615 |
+
else:
|
616 |
+
st.warning(f"Mode cannot be calculated for {selected_col_manual}. Please enter a custom value.")
|
617 |
+
|
618 |
+
submit_button = st.form_submit_button(f"Apply Manual Imputation to {selected_col_manual}")
|
619 |
+
|
620 |
+
if submit_button and fill_value_to_apply is not None:
|
621 |
+
st.session_state.updated_df[selected_col_manual].fillna(fill_value_to_apply, inplace=True)
|
622 |
+
st.success(f"Filled '{selected_col_manual}' missing values with **'{fill_value_to_apply}'** using {manual_fill_option}!")
|
623 |
+
st.rerun()
|
624 |
+
else:
|
625 |
+
st.info("No columns with missing values to display for manual handling.")
|
626 |
+
|
627 |
+
# Pair Plot button is now below the missing values report
|
628 |
+
st.markdown("---")
|
629 |
+
if st.button("📈 Generate Pair Plot of Numerical Columns"):
|
630 |
+
if st.session_state.updated_df is not None:
|
631 |
+
numerical_data = st.session_state.updated_df.select_dtypes(include=['float64', 'int64'])
|
632 |
+
if not numerical_data.empty:
|
633 |
+
st.markdown("##### 📘 Pair Plot - Seaborn (Static)", unsafe_allow_html=True)
|
634 |
+
fig1 = sns.pairplot(numerical_data)
|
635 |
+
st.pyplot(fig1)
|
636 |
+
plt.clf()
|
637 |
+
st.markdown("##### 🧠 Pair Plot - Plotly (Interactive)", unsafe_allow_html=True)
|
638 |
+
fig2 = px.scatter_matrix(numerical_data,
|
639 |
+
dimensions=numerical_data.columns,
|
640 |
+
height=800, width=800)
|
641 |
+
st.plotly_chart(fig2, use_container_width=True)
|
642 |
+
else:
|
643 |
+
st.warning("No numerical columns found to generate a pair plot.")
|
644 |
+
else:
|
645 |
+
st.warning("Please upload and process a dataset first.")
|
646 |
+
|
647 |
+
|
648 |
+
# --- Machine Learning Operations Section (Full Width, below the two columns) ---
|
649 |
+
st.markdown("---")
|
650 |
+
st.markdown("<h2 style='text-align: center; color: #4A90E2;'>🧠 Machine Learning Operations</h2>", unsafe_allow_html=True)
|
651 |
+
|
652 |
+
if st.session_state.updated_df is not None and st.session_state.trained_model is not None:
|
653 |
+
st.markdown(f"### Model Training Results for **{st.session_state.selected_algo_name}**")
|
654 |
+
|
655 |
+
if st.session_state.model_metrics:
|
656 |
+
if st.session_state.problem_type == 'classification':
|
657 |
+
st.markdown("#### Classification Metrics:")
|
658 |
+
col_m1, col_m2, col_m3, col_m4 = st.columns(4)
|
659 |
+
with col_m1:
|
660 |
+
st.metric(label="Accuracy", value=f"{st.session_state.model_metrics['Accuracy']:.4f}")
|
661 |
+
with col_m2:
|
662 |
+
st.metric(label="Precision", value=f"{st.session_state.model_metrics['Precision']:.4f}")
|
663 |
+
with col_m3:
|
664 |
+
st.metric(label="Recall", value=f"{st.session_state.model_metrics['Recall']:.4f}")
|
665 |
+
with col_m4:
|
666 |
+
st.metric(label="F1 Score", value=f"{st.session_state.model_metrics['F1 Score']:.4f}")
|
667 |
+
|
668 |
+
st.markdown("#### Confusion Matrix:")
|
669 |
+
fig_cm, ax_cm = plt.subplots(figsize=(6, 5))
|
670 |
+
sns.heatmap(st.session_state.model_metrics['Confusion Matrix'], annot=True, fmt='d', cmap='Blues', ax=ax_cm)
|
671 |
+
ax_cm.set_xlabel('Predicted')
|
672 |
+
ax_cm.set_ylabel('True')
|
673 |
+
ax_cm.set_title('Confusion Matrix')
|
674 |
+
st.pyplot(fig_cm)
|
675 |
+
plt.clf()
|
676 |
+
|
677 |
+
elif st.session_state.problem_type == 'regression':
|
678 |
+
st.markdown("#### Regression Metrics:")
|
679 |
+
col_r1, col_r2 = st.columns(2)
|
680 |
+
with col_r1:
|
681 |
+
st.metric(label="Mean Squared Error", value=f"{st.session_state.model_metrics['Mean Squared Error']:.4f}")
|
682 |
+
with col_r2:
|
683 |
+
st.metric(label="R2 Score", value=f"{st.session_state.model_metrics['R2 Score']:.4f}")
|
684 |
+
|
685 |
+
st.markdown("---")
|
686 |
+
|
687 |
+
# --- Test Your Own Values and Download Model ---
|
688 |
+
col_test, col_download = st.columns(2)
|
689 |
+
|
690 |
+
with col_test:
|
691 |
+
st.markdown("### 🧪 Test with Your Own Values")
|
692 |
+
|
693 |
+
if st.session_state.feature_columns:
|
694 |
+
input_data = {}
|
695 |
+
for col in st.session_state.feature_columns:
|
696 |
+
if st.session_state.updated_df[col].dtype == 'object':
|
697 |
+
unique_vals = st.session_state.updated_df[col].unique()
|
698 |
+
input_data[col] = st.selectbox(f"Select value for **{col}**", unique_vals)
|
699 |
+
else:
|
700 |
+
input_data[col] = st.number_input(f"Enter value for **{col}**", value=float(st.session_state.updated_df[col].mean()))
|
701 |
+
|
702 |
+
if st.button("Get Prediction"):
|
703 |
+
input_df = pd.DataFrame([input_data])
|
704 |
+
|
705 |
+
# Preprocess the input data similarly to the training data
|
706 |
+
for col in input_df.select_dtypes(include=['object', 'bool']).columns:
|
707 |
+
le = LabelEncoder()
|
708 |
+
input_df[col] = le.fit_transform(input_df[col].astype(str))
|
709 |
+
|
710 |
+
if st.session_state.scaler:
|
711 |
+
numerical_cols = input_df.select_dtypes(include=['number']).columns
|
712 |
+
if not numerical_cols.empty:
|
713 |
+
input_df[numerical_cols] = st.session_state.scaler.transform(input_df[numerical_cols])
|
714 |
+
|
715 |
+
prediction = st.session_state.trained_model.predict(input_df)
|
716 |
+
st.success(f"**Prediction:** {prediction[0]}")
|
717 |
+
|
718 |
+
with col_download:
|
719 |
+
st.markdown("### 📥 Download Trained Model")
|
720 |
+
|
721 |
+
# Serialize the model for download
|
722 |
+
model_pkl = pickle.dumps(st.session_state.trained_model)
|
723 |
+
b64 = base64.b64encode(model_pkl).decode()
|
724 |
+
|
725 |
+
st.download_button(
|
726 |
+
label="Download Model as .pkl",
|
727 |
+
data=base64.b64decode(b64),
|
728 |
+
file_name=f"{st.session_state.selected_algo_name}_model.pkl",
|
729 |
+
mime="application/octet-stream"
|
730 |
+
)
|
731 |
+
|
732 |
+
|
733 |
+
else:
|
734 |
+
st.info("Upload a dataset and train a model to see results and test your own values.")
|
735 |
+
|
736 |
+
if st.session_state.current_page == "viz_ai_img":
|
737 |
+
viz_ai_img.analyze_image_ui()
|
738 |
+
|
739 |
+
elif st.session_state.current_page == "word_cloud":
|
740 |
+
# Make sure to import your word_cloud module if you have it
|
741 |
+
word_cloud.render_word_cloud_page()
|
742 |
+
|
743 |
+
elif st.session_state.current_page == "note_lite":
|
744 |
+
notepad_lite.render_notepad()
|
745 |
+
|
746 |
+
elif st.session_state.current_page == "calculator":
|
747 |
+
calculator.render_calculator()
|
748 |
+
|
749 |
+
elif st.session_state.current_page == "generate_report":
|
750 |
+
# Make sure to import your viz_report module if you have it
|
751 |
+
# viz_report.generate_report()
|
752 |
+
#viz_report.render_report_page()
|
753 |
+
st.write("Viz Report Page (Implement logic here)")
|
754 |
+
|
755 |
+
# Add custom CSS for better styling
|
756 |
+
st.markdown("""
|
757 |
+
<style>
|
758 |
+
.stButton>button {
|
759 |
+
width: 100%;
|
760 |
+
border-radius: 5px;
|
761 |
+
border: 1px solid #4A90E2;
|
762 |
+
color: #4A90E2;
|
763 |
+
background-color: white;
|
764 |
+
padding: 10px;
|
765 |
+
font-size: 16px;
|
766 |
+
transition: all 0.2s ease-in-out;
|
767 |
+
}
|
768 |
+
.stButton>button:hover {
|
769 |
+
background-color: #4A90E2;
|
770 |
+
color: white;
|
771 |
+
}
|
772 |
+
.section-title {
|
773 |
+
color: #4A90E2;
|
774 |
+
font-size: 18px;
|
775 |
+
margin-top: 15px;
|
776 |
+
margin-bottom: 10px;
|
777 |
+
font-weight: bold;
|
778 |
+
}
|
779 |
+
.dataset-preview {
|
780 |
+
border: 1px solid #ddd;
|
781 |
+
border-radius: 5px;
|
782 |
+
padding: 10px;
|
783 |
+
margin-top: 20px;
|
784 |
+
background-color: #f9f9f9;
|
785 |
+
}
|
786 |
+
h1 {
|
787 |
+
color: #4A90E2;
|
788 |
+
}
|
789 |
+
h2 {
|
790 |
+
color: #4A90E2;
|
791 |
+
}
|
792 |
+
h3 {
|
793 |
+
color: #333;
|
794 |
+
}
|
795 |
+
h4 {
|
796 |
+
color: #555;
|
797 |
+
}
|
798 |
+
.st-emotion-cache-1jmvejs { # Targeting expander header for slightly different styling
|
799 |
+
background-color: #f0f2f6;
|
800 |
+
border-radius: 5px;
|
801 |
+
}
|
802 |
+
</style>
|
803 |
+
""", unsafe_allow_html=True)
|
804 |
+
|
805 |
+
|
806 |
+
|
807 |
+
st.markdown("""
|
808 |
+
<div style="position: fixed; bottom: 0; left: 0; width: 100%; text-align: center; background-color: ; padding: 10px;">
|
809 |
+
<p style="font-size: 12px;">Made with ❤️ by <a href = "https://avarshvir.github.io/arshvir">Arshvir</a> and <a href = "https://jaiho-labs.onrender.com">Jaiho Labs</a></p>
|
810 |
+
</div>
|
811 |
+
""", unsafe_allow_html=True)
|
src/calculator.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from functools import partial
|
3 |
+
|
4 |
+
def render_calculator():
|
5 |
+
"""
|
6 |
+
Renders a fully functional calculator UI and handles its logic.
|
7 |
+
"""
|
8 |
+
st.markdown("<h2 style='text-align: center; color: #4A90E2;'>🧮 Calculator</h2>", unsafe_allow_html=True)
|
9 |
+
|
10 |
+
# --- State Initialization ---
|
11 |
+
# We use session state to keep track of the calculator's current state.
|
12 |
+
if 'calc_display' not in st.session_state:
|
13 |
+
st.session_state.calc_display = '0'
|
14 |
+
st.session_state.first_operand = None
|
15 |
+
st.session_state.operator = None
|
16 |
+
st.session_state.waiting_for_second_operand = False
|
17 |
+
|
18 |
+
# --- Callback Functions ---
|
19 |
+
# These functions modify the state in response to button clicks.
|
20 |
+
|
21 |
+
def handle_digit(digit):
|
22 |
+
"""Appends a digit to the display."""
|
23 |
+
if st.session_state.waiting_for_second_operand:
|
24 |
+
st.session_state.calc_display = digit
|
25 |
+
st.session_state.waiting_for_second_operand = False
|
26 |
+
else:
|
27 |
+
st.session_state.calc_display = st.session_state.calc_display + digit if st.session_state.calc_display != '0' else digit
|
28 |
+
|
29 |
+
def handle_decimal():
|
30 |
+
"""Adds a decimal point if one doesn't already exist."""
|
31 |
+
if '.' not in st.session_state.calc_display:
|
32 |
+
st.session_state.calc_display += '.'
|
33 |
+
|
34 |
+
def handle_operator(op):
|
35 |
+
"""Handles an operator click (+, -, *, /)."""
|
36 |
+
current_value = float(st.session_state.calc_display)
|
37 |
+
|
38 |
+
# This block handles chained operations like 5 * 2 + (result is 10, then we wait for next number)
|
39 |
+
if st.session_state.first_operand is not None and st.session_state.operator is not None and not st.session_state.waiting_for_second_operand:
|
40 |
+
handle_equals()
|
41 |
+
st.session_state.first_operand = float(st.session_state.calc_display)
|
42 |
+
else:
|
43 |
+
st.session_state.first_operand = current_value
|
44 |
+
|
45 |
+
st.session_state.operator = op
|
46 |
+
st.session_state.waiting_for_second_operand = True
|
47 |
+
|
48 |
+
def handle_equals():
|
49 |
+
"""Performs the calculation."""
|
50 |
+
if st.session_state.operator is None or st.session_state.first_operand is None:
|
51 |
+
return
|
52 |
+
|
53 |
+
second_operand = float(st.session_state.calc_display)
|
54 |
+
first_operand = st.session_state.first_operand
|
55 |
+
operator = st.session_state.operator
|
56 |
+
|
57 |
+
if operator == '+':
|
58 |
+
result = first_operand + second_operand
|
59 |
+
elif operator == '-':
|
60 |
+
result = first_operand - second_operand
|
61 |
+
elif operator == '*':
|
62 |
+
result = first_operand * second_operand
|
63 |
+
elif operator == '/':
|
64 |
+
if second_operand == 0:
|
65 |
+
result = 'Error'
|
66 |
+
else:
|
67 |
+
result = first_operand / second_operand
|
68 |
+
|
69 |
+
st.session_state.calc_display = str(result)
|
70 |
+
st.session_state.first_operand = result # So you can chain operations with the result
|
71 |
+
st.session_state.operator = None
|
72 |
+
st.session_state.waiting_for_second_operand = True
|
73 |
+
|
74 |
+
|
75 |
+
def handle_clear():
|
76 |
+
"""Resets the calculator to its initial state."""
|
77 |
+
st.session_state.calc_display = '0'
|
78 |
+
st.session_state.first_operand = None
|
79 |
+
st.session_state.operator = None
|
80 |
+
st.session_state.waiting_for_second_operand = False
|
81 |
+
|
82 |
+
# --- UI Layout ---
|
83 |
+
# Display screen
|
84 |
+
st.text_input("Result", st.session_state.calc_display, key="display", disabled=True)
|
85 |
+
|
86 |
+
# Calculator buttons layout
|
87 |
+
col1, col2, col3, col4 = st.columns(4)
|
88 |
+
|
89 |
+
with col1:
|
90 |
+
st.button('7', on_click=partial(handle_digit, '7'), use_container_width=True)
|
91 |
+
st.button('4', on_click=partial(handle_digit, '4'), use_container_width=True)
|
92 |
+
st.button('1', on_click=partial(handle_digit, '1'), use_container_width=True)
|
93 |
+
st.button('0', on_click=partial(handle_digit, '0'), use_container_width=True)
|
94 |
+
|
95 |
+
with col2:
|
96 |
+
st.button('8', on_click=partial(handle_digit, '8'), use_container_width=True)
|
97 |
+
st.button('5', on_click=partial(handle_digit, '5'), use_container_width=True)
|
98 |
+
st.button('2', on_click=partial(handle_digit, '2'), use_container_width=True)
|
99 |
+
st.button('.', on_click=handle_decimal, use_container_width=True)
|
100 |
+
|
101 |
+
with col3:
|
102 |
+
st.button('9', on_click=partial(handle_digit, '9'), use_container_width=True)
|
103 |
+
st.button('6', on_click=partial(handle_digit, '6'), use_container_width=True)
|
104 |
+
st.button('3', on_click=partial(handle_digit, '3'), use_container_width=True)
|
105 |
+
st.button('=', on_click=handle_equals, use_container_width=True)
|
106 |
+
|
107 |
+
with col4:
|
108 |
+
st.button('/', on_click=partial(handle_operator, '/'), use_container_width=True)
|
109 |
+
st.button('Mul', on_click=partial(handle_operator, '*'), use_container_width=True)
|
110 |
+
st.button('Sub', on_click=partial(handle_operator, '-'), use_container_width=True)
|
111 |
+
st.button('Add', on_click=partial(handle_operator, '+'), use_container_width=True)
|
112 |
+
|
113 |
+
st.button('C', on_click=handle_clear, use_container_width=True)
|
src/notepad_lite.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from datetime import datetime
|
3 |
+
|
4 |
+
def render_notepad():
|
5 |
+
"""
|
6 |
+
Renders a simple, session-based notepad page.
|
7 |
+
"""
|
8 |
+
st.markdown("<h2 style='text-align: center; color: #4A90E2;'>📝 Note -- Lite</h2>", unsafe_allow_html=True)
|
9 |
+
st.markdown("<p style='text-align: center;'>Jot down your thoughts, findings, or reminders. Your notes are saved for the current session.</p>", unsafe_allow_html=True)
|
10 |
+
|
11 |
+
# Initialize the note content in session state if it doesn't exist
|
12 |
+
if 'notepad_text' not in st.session_state:
|
13 |
+
st.session_state.notepad_text = "## My Analysis Notes\n\n- Finding 1:\n- Finding 2:\n"
|
14 |
+
|
15 |
+
# --- DEFINE THE CALLBACK FUNCTION ---
|
16 |
+
# This function will be called when the button is clicked.
|
17 |
+
# It modifies the session state *before* the page is re-rendered.
|
18 |
+
def clear_note_callback():
|
19 |
+
st.session_state.notepad_text = ""
|
20 |
+
# ------------------------------------
|
21 |
+
|
22 |
+
st.text_area(
|
23 |
+
"Your Notes",
|
24 |
+
key='notepad_text',
|
25 |
+
height=400,
|
26 |
+
help="Your text is saved automatically as you type."
|
27 |
+
)
|
28 |
+
|
29 |
+
st.markdown("---")
|
30 |
+
|
31 |
+
col1, col2 = st.columns(2)
|
32 |
+
|
33 |
+
with col1:
|
34 |
+
# --- ATTACH THE CALLBACK TO THE BUTTON ---
|
35 |
+
# Instead of an if-block, we use the on_click parameter.
|
36 |
+
st.button(
|
37 |
+
"🗑️ Clear Note",
|
38 |
+
on_click=clear_note_callback,
|
39 |
+
help="Click to permanently delete the text in the notepad."
|
40 |
+
)
|
41 |
+
# -----------------------------------------
|
42 |
+
|
43 |
+
with col2:
|
44 |
+
st.download_button(
|
45 |
+
label="📥 Download Note as .txt",
|
46 |
+
data=st.session_state.notepad_text,
|
47 |
+
file_name=f"visio_ai_note_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
|
48 |
+
mime="text/plain"
|
49 |
+
)
|
src/viz_ai_img.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import base64
|
3 |
+
import requests
|
4 |
+
from PIL import Image
|
5 |
+
import io
|
6 |
+
|
7 |
+
def analyze_image_ui():
|
8 |
+
"""
|
9 |
+
Renders the UI for the Viz AI Image Analysis tool and handles the
|
10 |
+
logic for sending requests to the OpenRouter API.
|
11 |
+
"""
|
12 |
+
st.markdown("---")
|
13 |
+
st.markdown("<h2 style='text-align: center; color: #4A90E2;'>🤖 Viz AI (Image)</h2>", unsafe_allow_html=True)
|
14 |
+
st.markdown("<h5 style='text-align: center; color: grey;'>Uncover hidden patterns and details in your images.</h5>", unsafe_allow_html=True)
|
15 |
+
|
16 |
+
# Use a two-column layout
|
17 |
+
col1, col2 = st.columns(2)
|
18 |
+
|
19 |
+
with col1:
|
20 |
+
uploaded_image = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
|
21 |
+
|
22 |
+
if uploaded_image:
|
23 |
+
# Display the uploaded image
|
24 |
+
image = Image.open(uploaded_image)
|
25 |
+
st.image(image, caption="Uploaded Image", use_container_width=True)
|
26 |
+
|
27 |
+
with col2:
|
28 |
+
prompt_text = st.text_area(
|
29 |
+
"Your Prompt:",
|
30 |
+
"Describe this image in detail. What are the key objects, arrangements, and potential hidden patterns or meanings?",
|
31 |
+
height=150
|
32 |
+
)
|
33 |
+
|
34 |
+
|
35 |
+
# Add a select box for the model
|
36 |
+
model_selection = st.selectbox(
|
37 |
+
"Choose a model:",
|
38 |
+
(
|
39 |
+
"meta-llama/llama-4-maverick:free",
|
40 |
+
"opengvlab/internvl3-14b:free",
|
41 |
+
"mistralai/mistral-small-3.1-24b-instruct:free",
|
42 |
+
"google/gemma-3-27b-it:free",
|
43 |
+
)
|
44 |
+
)
|
45 |
+
|
46 |
+
analyze_button = st.button("Analyze Image ✨")
|
47 |
+
|
48 |
+
if analyze_button and uploaded_image:
|
49 |
+
if not prompt_text.strip():
|
50 |
+
st.error("Please enter a prompt.")
|
51 |
+
return
|
52 |
+
|
53 |
+
with st.spinner(f"AI is analyzing the image using {model_selection}..."):
|
54 |
+
try:
|
55 |
+
# Get the API key from secrets
|
56 |
+
api_key = st.secrets["OPENROUTER_API_KEY"]
|
57 |
+
if not api_key:
|
58 |
+
st.error("OpenRouter API key is not set. Please add it to your secrets.")
|
59 |
+
return
|
60 |
+
|
61 |
+
# Convert image to base64
|
62 |
+
buffered = io.BytesIO()
|
63 |
+
image.save(buffered, format="PNG")
|
64 |
+
img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
65 |
+
|
66 |
+
response = requests.post(
|
67 |
+
url="https://openrouter.ai/api/v1/chat/completions",
|
68 |
+
headers={
|
69 |
+
"Authorization": f"Bearer {api_key}",
|
70 |
+
"Content-Type": "application/json"
|
71 |
+
},
|
72 |
+
json={
|
73 |
+
"model": model_selection,
|
74 |
+
"messages": [
|
75 |
+
{
|
76 |
+
"role": "user",
|
77 |
+
"content": [
|
78 |
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}"}},
|
79 |
+
{"type": "text", "text": prompt_text}
|
80 |
+
]
|
81 |
+
}
|
82 |
+
]
|
83 |
+
}
|
84 |
+
)
|
85 |
+
|
86 |
+
response.raise_for_status() # Will raise an HTTPError for bad responses (4xx or 5xx)
|
87 |
+
|
88 |
+
result = response.json()
|
89 |
+
ai_response = result['choices'][0]['message']['content']
|
90 |
+
|
91 |
+
st.markdown("---")
|
92 |
+
st.subheader("Analysis Result:")
|
93 |
+
st.markdown(ai_response)
|
94 |
+
# --- ADD THIS BLOCK ---
|
95 |
+
# Save results to session state for the report
|
96 |
+
st.session_state['viz_ai_img_result'] = {
|
97 |
+
"image": image, # The PIL Image object
|
98 |
+
"prompt": prompt_text,
|
99 |
+
"analysis": ai_response,
|
100 |
+
"model": model_selection
|
101 |
+
}
|
102 |
+
st.success("✅ Analysis saved to the session report.")
|
103 |
+
|
104 |
+
except requests.exceptions.HTTPError as http_err:
|
105 |
+
st.error(f"HTTP error occurred: {http_err} - {response.text}")
|
106 |
+
except Exception as e:
|
107 |
+
st.error(f"An error occurred: {e}")
|
108 |
+
|
109 |
+
elif analyze_button and not uploaded_image:
|
110 |
+
st.warning("Please upload an image first.")
|
src/viz_report.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from fpdf import FPDF
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
import io
|
7 |
+
from datetime import datetime
|
8 |
+
import tempfile # To handle temporary files
|
9 |
+
import os # To interact with the operating system
|
10 |
+
|
11 |
+
class ComprehensivePDF(FPDF):
|
12 |
+
def __init__(self, *args, **kwargs):
|
13 |
+
super().__init__(*args, **kwargs)
|
14 |
+
self.user_name = ""
|
15 |
+
|
16 |
+
def header(self):
|
17 |
+
self.set_font('Arial', 'B', 12)
|
18 |
+
self.cell(0, 10, 'Visio AI - Comprehensive Analysis Report', 0, 1, 'C')
|
19 |
+
self.ln(5)
|
20 |
+
|
21 |
+
def footer(self):
|
22 |
+
self.set_y(-15)
|
23 |
+
self.set_font('Arial', 'I', 8)
|
24 |
+
self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
|
25 |
+
|
26 |
+
def add_title_page(self, user_name=""):
|
27 |
+
self.user_name = user_name
|
28 |
+
self.add_page()
|
29 |
+
self.set_font('Arial', 'B', 24)
|
30 |
+
self.cell(0, 20, 'Comprehensive Analysis Report', 0, 1, 'C')
|
31 |
+
self.ln(20)
|
32 |
+
|
33 |
+
self.set_font('Arial', '', 12)
|
34 |
+
if self.user_name:
|
35 |
+
self.cell(0, 10, f"Prepared for: {self.user_name}", 0, 1, 'C')
|
36 |
+
self.cell(0, 10, f"Date Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", 0, 1, 'C')
|
37 |
+
self.ln(20)
|
38 |
+
|
39 |
+
try:
|
40 |
+
self.image("images/favicon.png", x=85, y=100, w=40)
|
41 |
+
except FileNotFoundError:
|
42 |
+
self.set_font('Arial', 'I', 10)
|
43 |
+
self.cell(0, 10, "[Logo Not Found]", 0, 1, 'C')
|
44 |
+
|
45 |
+
self.set_y(-40)
|
46 |
+
self.set_font('Arial', 'I', 10)
|
47 |
+
self.cell(0, 10, "Generated by Visio AI", 0, 1, 'C')
|
48 |
+
|
49 |
+
def add_section_title(self, title):
|
50 |
+
self.add_page()
|
51 |
+
self.set_font('Arial', 'B', 16)
|
52 |
+
self.cell(0, 10, title, 0, 1, 'L')
|
53 |
+
self.ln(5)
|
54 |
+
|
55 |
+
def add_text_content(self, title, content):
|
56 |
+
self.set_font('Arial', 'B', 12)
|
57 |
+
self.cell(0, 10, title, 0, 1, 'L')
|
58 |
+
self.set_font('Courier', '', 10)
|
59 |
+
self.multi_cell(0, 5, content)
|
60 |
+
self.ln(5)
|
61 |
+
|
62 |
+
# --- NEW BULLETPROOF HELPER FUNCTION ---
|
63 |
+
def add_image_from_object(self, image_object, width):
|
64 |
+
"""
|
65 |
+
Saves a matplotlib figure or PIL image to a temporary file
|
66 |
+
and adds it to the PDF, then deletes the file.
|
67 |
+
"""
|
68 |
+
fp = None
|
69 |
+
try:
|
70 |
+
# Create a named temporary file
|
71 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as fp:
|
72 |
+
# Save the image object to the temporary file
|
73 |
+
image_object.save(fp, format="PNG")
|
74 |
+
temp_path = fp.name
|
75 |
+
|
76 |
+
# Add the image to the PDF from the temporary file path
|
77 |
+
self.image(temp_path, w=width)
|
78 |
+
finally:
|
79 |
+
# Ensure the temporary file is deleted
|
80 |
+
if fp and os.path.exists(fp.name):
|
81 |
+
os.remove(fp.name)
|
82 |
+
|
83 |
+
def render_report_page():
|
84 |
+
st.markdown("<h2 style='text-align: center; color: #4A90E2;'>📄 Comprehensive Report Generator</h2>", unsafe_allow_html=True)
|
85 |
+
st.markdown("<p style='text-align: center;'>Generate a complete PDF report of your session's activities.</p>", unsafe_allow_html=True)
|
86 |
+
|
87 |
+
user_name = st.text_input("Enter your name (optional, will be shown on the report cover)")
|
88 |
+
|
89 |
+
if st.button("Generate Full Report 🚀"):
|
90 |
+
if 'updated_df' not in st.session_state and 'viz_ai_img_result' not in st.session_state and 'word_cloud_result' not in st.session_state:
|
91 |
+
st.warning("There is no activity to report. Please train a model or use the AI tools first.")
|
92 |
+
return
|
93 |
+
|
94 |
+
with st.spinner("Assembling your comprehensive report..."):
|
95 |
+
pdf = ComprehensivePDF()
|
96 |
+
pdf.add_title_page(user_name)
|
97 |
+
|
98 |
+
# --- Section 1: Data Analysis & ML ---
|
99 |
+
if 'updated_df' in st.session_state and st.session_state.updated_df is not None:
|
100 |
+
df = st.session_state.updated_df
|
101 |
+
pdf.add_section_title("1. Dataset & Machine Learning Analysis")
|
102 |
+
|
103 |
+
buffer = io.StringIO()
|
104 |
+
df.info(buf=buffer)
|
105 |
+
pdf.add_text_content("Dataset Information", buffer.getvalue())
|
106 |
+
|
107 |
+
pdf.add_text_content("Numerical Summary", df.describe(include='number').to_string())
|
108 |
+
if not df.select_dtypes(include='object').empty:
|
109 |
+
pdf.add_text_content("Categorical Summary", df.describe(include='object').to_string())
|
110 |
+
|
111 |
+
if 'trained_model' in st.session_state and st.session_state.trained_model is not None:
|
112 |
+
metrics = st.session_state.model_metrics
|
113 |
+
algo_name = st.session_state.selected_algo_name
|
114 |
+
|
115 |
+
pdf.set_font('Arial', 'B', 12)
|
116 |
+
pdf.cell(0, 10, f"Machine Learning Model: {algo_name}", 0, 1, 'L')
|
117 |
+
|
118 |
+
metrics_str = ""
|
119 |
+
for key, val in metrics.items():
|
120 |
+
if key != 'Confusion Matrix':
|
121 |
+
metrics_str += f"{key}: {val:.4f}\n" if isinstance(val, float) else f"{key}: {val}\n"
|
122 |
+
pdf.add_text_content("Performance Metrics", metrics_str)
|
123 |
+
|
124 |
+
if 'Confusion Matrix' in metrics:
|
125 |
+
fig_cm, ax_cm = plt.subplots()
|
126 |
+
sns.heatmap(metrics['Confusion Matrix'], annot=True, fmt='d', cmap='Blues', ax=ax_cm)
|
127 |
+
ax_cm.set_title('Confusion Matrix')
|
128 |
+
pdf.set_font('Arial', 'B', 12)
|
129 |
+
pdf.cell(0, 10, "Confusion Matrix", 0, 1, 'L')
|
130 |
+
pdf.add_image_from_object(fig_cm, width=170)
|
131 |
+
plt.close(fig_cm) # Close the figure to free memory
|
132 |
+
|
133 |
+
# --- Section 2: Viz AI Image Analysis ---
|
134 |
+
if 'viz_ai_img_result' in st.session_state and st.session_state.viz_ai_img_result is not None:
|
135 |
+
img_result = st.session_state.viz_ai_img_result
|
136 |
+
pdf.add_section_title("2. Viz AI Image Analysis")
|
137 |
+
|
138 |
+
pdf.add_image_from_object(img_result['image'], width=150)
|
139 |
+
pdf.ln(5)
|
140 |
+
|
141 |
+
pdf.add_text_content("Model Used", img_result['model'])
|
142 |
+
pdf.add_text_content("User Prompt", img_result['prompt'])
|
143 |
+
pdf.add_text_content("AI Analysis", img_result['analysis'])
|
144 |
+
|
145 |
+
# --- Section 3: Word Cloud ---
|
146 |
+
if 'word_cloud_result' in st.session_state and st.session_state.word_cloud_result is not None:
|
147 |
+
wc_result = st.session_state.word_cloud_result
|
148 |
+
pdf.add_section_title("3. Word Cloud Analysis")
|
149 |
+
pdf.add_text_content("Source File", wc_result['source'])
|
150 |
+
pdf.add_text_content("Settings", wc_result['settings'])
|
151 |
+
pdf.set_font('Arial', 'B', 12)
|
152 |
+
pdf.cell(0, 10, "Generated Word Cloud", 0, 1, 'L')
|
153 |
+
pdf.add_image_from_object(wc_result['figure'], width=170)
|
154 |
+
plt.close(wc_result['figure']) # Close the figure to free memory
|
155 |
+
|
156 |
+
# --- Generate Download ---
|
157 |
+
pdf_output = pdf.output()
|
158 |
+
st.success("Report Generated!")
|
159 |
+
st.download_button(
|
160 |
+
label="📥 Download Full Report",
|
161 |
+
data=pdf_output,
|
162 |
+
file_name=f"VisioAI_Comprehensive_Report_{datetime.now().strftime('%Y%m%d')}.pdf",
|
163 |
+
mime="application/pdf"
|
164 |
+
)
|
src/word_cloud.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from wordcloud import WordCloud, STOPWORDS
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import io
|
6 |
+
|
7 |
+
# Libraries for file processing
|
8 |
+
import PyPDF2
|
9 |
+
from docx import Document
|
10 |
+
|
11 |
+
def extract_text_from_file(uploaded_file):
|
12 |
+
"""Extracts text from various file formats."""
|
13 |
+
if uploaded_file.name.endswith('.pdf'):
|
14 |
+
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
15 |
+
text = ""
|
16 |
+
for page in pdf_reader.pages:
|
17 |
+
text += page.extract_text()
|
18 |
+
return text
|
19 |
+
elif uploaded_file.name.endswith('.docx'):
|
20 |
+
doc = Document(uploaded_file)
|
21 |
+
text = "\n".join([para.text for para in doc.paragraphs])
|
22 |
+
return text
|
23 |
+
elif uploaded_file.name.endswith('.txt'):
|
24 |
+
# To read bytes, decode it to string
|
25 |
+
return uploaded_file.read().decode('utf-8')
|
26 |
+
elif uploaded_file.name.endswith(('.csv', '.xlsx')):
|
27 |
+
return pd.read_excel(uploaded_file) if uploaded_file.name.endswith('.xlsx') else pd.read_csv(uploaded_file)
|
28 |
+
return None
|
29 |
+
|
30 |
+
def render_word_cloud_page():
|
31 |
+
"""
|
32 |
+
Renders the UI and logic for the Word Cloud Generator page.
|
33 |
+
"""
|
34 |
+
st.markdown("<h2 style='text-align: center; color: #4A90E2;'>😶🌫️ Word Cloud Generator</h2>", unsafe_allow_html=True)
|
35 |
+
st.markdown("<p style='text-align: center;'>Create beautiful word clouds from your text data. Supports PDF, DOCX, TXT, CSV, and Excel files.</p>", unsafe_allow_html=True)
|
36 |
+
|
37 |
+
uploaded_file = st.file_uploader(
|
38 |
+
"Choose a file",
|
39 |
+
type=['pdf', 'docx', 'txt', 'csv', 'xlsx']
|
40 |
+
)
|
41 |
+
|
42 |
+
text_data = None
|
43 |
+
|
44 |
+
if uploaded_file is not None:
|
45 |
+
with st.spinner("Processing file..."):
|
46 |
+
extracted_content = extract_text_from_file(uploaded_file)
|
47 |
+
|
48 |
+
if isinstance(extracted_content, pd.DataFrame):
|
49 |
+
st.info("CSV/Excel file detected. Please select the column to generate the word cloud from.")
|
50 |
+
df = extracted_content
|
51 |
+
text_columns = df.select_dtypes(include=['object', 'string']).columns.tolist()
|
52 |
+
|
53 |
+
if not text_columns:
|
54 |
+
st.error("No text-based columns found in the uploaded file.")
|
55 |
+
return
|
56 |
+
|
57 |
+
column_to_use = st.selectbox("Select a column:", text_columns)
|
58 |
+
if column_to_use:
|
59 |
+
text_data = " ".join(df[column_to_use].dropna().astype(str))
|
60 |
+
else:
|
61 |
+
text_data = extracted_content
|
62 |
+
|
63 |
+
if text_data:
|
64 |
+
st.markdown("---")
|
65 |
+
st.subheader("Customize Your Word Cloud")
|
66 |
+
|
67 |
+
col1, col2 = st.columns(2)
|
68 |
+
with col1:
|
69 |
+
colormap = st.selectbox("Color Scheme", ["viridis", "plasma", "inferno", "magma", "cividis", "Greys", "Purples", "Blues", "Greens", "Oranges", "Reds"])
|
70 |
+
max_words = st.slider("Maximum Words", 50, 500, 200)
|
71 |
+
bg_color = st.color_picker("Background Color", "#FFFFFF")
|
72 |
+
|
73 |
+
with col2:
|
74 |
+
contour_width = st.slider("Contour Width", 0.0, 5.0, 0.0, 0.1)
|
75 |
+
contour_color = st.color_picker("Contour Color", "#0000FF")
|
76 |
+
add_stopwords = st.text_area("Add Custom Stopwords (comma-separated)")
|
77 |
+
|
78 |
+
if st.button("Generate Word Cloud ✨"):
|
79 |
+
with st.spinner("Creating your masterpiece..."):
|
80 |
+
custom_stopwords = set(STOPWORDS)
|
81 |
+
if add_stopwords:
|
82 |
+
custom_stopwords.update(add_stopwords.lower().split(','))
|
83 |
+
|
84 |
+
try:
|
85 |
+
wordcloud = WordCloud(
|
86 |
+
width=1200,
|
87 |
+
height=600,
|
88 |
+
background_color=bg_color,
|
89 |
+
stopwords=custom_stopwords,
|
90 |
+
max_words=max_words,
|
91 |
+
colormap=colormap,
|
92 |
+
contour_width=contour_width,
|
93 |
+
contour_color=contour_color
|
94 |
+
).generate(text_data)
|
95 |
+
|
96 |
+
st.markdown("---")
|
97 |
+
st.subheader("Generated Word Cloud")
|
98 |
+
fig, ax = plt.subplots(figsize=(12, 6))
|
99 |
+
ax.imshow(wordcloud, interpolation='bilinear')
|
100 |
+
ax.axis('off')
|
101 |
+
st.pyplot(fig)
|
102 |
+
# --- ADD THIS BLOCK ---
|
103 |
+
st.session_state['word_cloud_result'] = {
|
104 |
+
"figure": fig, # The matplotlib figure object
|
105 |
+
"source": uploaded_file.name,
|
106 |
+
"settings": f"Colors: {colormap}, Max Words: {max_words}"
|
107 |
+
}
|
108 |
+
st.success("✅ Word cloud saved to the session report.")
|
109 |
+
# ----------------------
|
110 |
+
|
111 |
+
# Create a download button for the image
|
112 |
+
buf = io.BytesIO()
|
113 |
+
fig.savefig(buf, format="png", bbox_inches='tight')
|
114 |
+
st.download_button(
|
115 |
+
label="📥 Download Image",
|
116 |
+
data=buf.getvalue(),
|
117 |
+
file_name="word_cloud.png",
|
118 |
+
mime="image/png"
|
119 |
+
)
|
120 |
+
|
121 |
+
except Exception as e:
|
122 |
+
st.error(f"An error occurred while generating the word cloud: {e}")
|