Bibek Mukherjee commited on
Commit
57205fc
·
verified ·
1 Parent(s): 474ddf8

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -227
app.py DELETED
@@ -1,227 +0,0 @@
1
-
2
- import pandas as pd
3
- import numpy as np
4
- from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
5
- import matplotlib.pyplot as plt
6
- import seaborn as sns
7
- import pickle
8
- import gradio as gr
9
- import os
10
-
11
- # Load the model
12
- model_path = 'career_prediction_model.pkl'
13
- with open(model_path, 'rb') as f:
14
- saved_data = pickle.load(f)
15
-
16
- model = saved_data['model']
17
- label_encoders = saved_data['label_encoders']
18
- target_encoder = saved_data['target_encoder']
19
- features = saved_data['features']
20
- target = 'What would you like to become when you grow up'
21
-
22
- # Function for individual prediction
23
- def predict_career(work_env, academic_perf, motivation, leadership, tech_savvy):
24
- # Prepare input data
25
- input_data = pd.DataFrame({
26
- 'Preferred Work Environment': [work_env],
27
- 'Academic Performance (CGPA/Percentage)': [float(academic_perf)],
28
- 'Motivation for Career Choice ': [motivation], # Note the space at the end
29
- 'Leadership Experience': [leadership],
30
- 'Tech-Savviness': [tech_savvy]
31
- })
32
-
33
- # Encode categorical features
34
- for feature in features:
35
- if feature in label_encoders and input_data[feature].dtype == 'object':
36
- try:
37
- input_data[feature] = label_encoders[feature].transform(input_data[feature])
38
- except ValueError:
39
- # Handle unknown categories
40
- print(f"Warning: Unknown category in {feature}. Using most frequent category.")
41
- input_data[feature] = 0 # Default to first category
42
-
43
- # Make prediction
44
- prediction = model.predict(input_data)[0]
45
- predicted_career = target_encoder.inverse_transform([int(prediction)])[0]
46
-
47
- # Get probabilities for all classes
48
- if hasattr(model, 'predict_proba'):
49
- probabilities = model.predict_proba(input_data)[0]
50
- class_probs = {target_encoder.inverse_transform([i])[0]: prob
51
- for i, prob in enumerate(probabilities)}
52
- sorted_probs = dict(sorted(class_probs.items(), key=lambda x: x[1], reverse=True))
53
-
54
- result = f"Predicted career: {predicted_career}\n\nProbabilities:\n"
55
- for career, prob in sorted_probs.items():
56
- result += f"{career}: {prob:.2f}\n"
57
- return result
58
- else:
59
- return f"Predicted career: {predicted_career}"
60
-
61
- # Function for batch evaluation
62
- def evaluate_model_with_csv(csv_file):
63
- try:
64
- # Try different encodings
65
- encodings = ['utf-8', 'latin1', 'ISO-8859-1', 'cp1252', 'utf-8-sig']
66
-
67
- # Try each encoding until one works
68
- for encoding in encodings:
69
- try:
70
- test_df = pd.read_csv(csv_file.name, encoding=encoding)
71
- break
72
- except UnicodeDecodeError:
73
- if encoding == encodings[-1]:
74
- return ["Error: Could not decode the CSV file with any common encodings.", None]
75
- continue
76
- except Exception as e:
77
- if encoding == encodings[-1]:
78
- return [f"Error reading CSV: {str(e)}", None]
79
- continue
80
-
81
- # Check if required columns exist
82
- missing_cols = [col for col in features + [target] if col not in test_df.columns]
83
- if missing_cols:
84
- return [f"Error: The following required columns are missing in the CSV: {missing_cols}", None]
85
-
86
- # Preprocess the test data
87
- X_eval = test_df[features].copy()
88
-
89
- # Handle missing values
90
- X_eval = X_eval.fillna('Unknown')
91
-
92
- # Convert Academic Performance to numeric
93
- X_eval['Academic Performance (CGPA/Percentage)'] = pd.to_numeric(
94
- X_eval['Academic Performance (CGPA/Percentage)'], errors='coerce')
95
- X_eval['Academic Performance (CGPA/Percentage)'].fillna(
96
- X_eval['Academic Performance (CGPA/Percentage)'].mean(), inplace=True)
97
-
98
- # Encode categorical features
99
- for feature in features:
100
- if feature in label_encoders and X_eval[feature].dtype == 'object':
101
- # Handle unknown categories by mapping them to 0
102
- X_eval[feature] = X_eval[feature].apply(
103
- lambda x: label_encoders[feature].transform([x])[0]
104
- if x in label_encoders[feature].classes_ else 0
105
- )
106
-
107
- # Get the true labels
108
- y_true = test_df[target].copy()
109
- y_true = y_true.fillna('Corporate Employee')
110
-
111
- # Encode the true labels
112
- y_true_encoded = y_true.apply(
113
- lambda x: target_encoder.transform([x])[0]
114
- if x in target_encoder.classes_ else 0
115
- ).values
116
-
117
- # Make predictions
118
- y_pred = model.predict(X_eval)
119
- y_pred = np.array(y_pred).astype(int)
120
-
121
- # Calculate accuracy
122
- accuracy = accuracy_score(y_true_encoded, y_pred)
123
-
124
- # Create a DataFrame with actual vs predicted values
125
- results_df = pd.DataFrame({
126
- 'Actual Career': [target_encoder.classes_[i] for i in y_true_encoded],
127
- 'Predicted Career': [target_encoder.classes_[i] for i in y_pred]
128
- })
129
-
130
- # Count correct predictions
131
- results_df['Correct'] = results_df['Actual Career'] == results_df['Predicted Career']
132
- correct_count = results_df['Correct'].sum()
133
- total_count = len(results_df)
134
-
135
- # Create confusion matrix
136
- plt.figure(figsize=(12, 10))
137
- cm = pd.crosstab(results_df['Actual Career'], results_df['Predicted Career'])
138
- sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
139
- plt.title('Confusion Matrix')
140
- plt.ylabel('Actual Career')
141
- plt.xlabel('Predicted Career')
142
- plt.tight_layout()
143
-
144
- # Save the confusion matrix
145
- cm_path = 'confusion_matrix.png'
146
- plt.savefig(cm_path)
147
-
148
- # Prepare the results
149
- result_text = f"Model Evaluation Results:\n\n"
150
- result_text += f"Total samples: {total_count}\n"
151
- result_text += f"Correct predictions: {correct_count}\n"
152
- result_text += f"Accuracy: {accuracy:.4f}\n\n"
153
-
154
- # Generate classification report
155
- report = classification_report(y_true_encoded, y_pred,
156
- target_names=target_encoder.classes_,
157
- output_dict=True)
158
-
159
- # Add class-wise metrics
160
- result_text += "Class-wise Performance:\n"
161
- for class_name in target_encoder.classes_:
162
- if class_name in report:
163
- result_text += f"\n{class_name}:\n"
164
- result_text += f" Precision: {report[class_name]['precision']:.4f}\n"
165
- result_text += f" Recall: {report[class_name]['recall']:.4f}\n"
166
- result_text += f" F1-score: {report[class_name]['f1-score']:.4f}\n"
167
-
168
- return [result_text, cm_path]
169
-
170
- except Exception as e:
171
- import traceback
172
- error_details = traceback.format_exc()
173
- print(f"Error in evaluation: {str(e)}\n{error_details}")
174
-
175
- # Create a simple error image
176
- plt.figure(figsize=(6, 4))
177
- plt.text(0.5, 0.5, f"Error: {str(e)}",
178
- horizontalalignment='center', verticalalignment='center', fontsize=12, color='red')
179
- plt.axis('off')
180
- error_path = 'error_image.png'
181
- plt.savefig(error_path)
182
-
183
- return [f"Error: {str(e)}", error_path]
184
-
185
- # Get unique values for dropdowns
186
- work_env_options = list(label_encoders['Preferred Work Environment'].classes_)
187
- motivation_options = list(label_encoders['Motivation for Career Choice '].classes_)
188
- leadership_options = list(label_encoders['Leadership Experience'].classes_)
189
- tech_savvy_options = list(label_encoders['Tech-Savviness'].classes_)
190
-
191
- # Create the Gradio interface
192
- iface = gr.Interface(
193
- fn=predict_career,
194
- inputs=[
195
- gr.Dropdown(work_env_options, label="Preferred Work Environment"),
196
- gr.Number(label="Academic Performance (CGPA/Percentage)", minimum=0, maximum=10),
197
- gr.Dropdown(motivation_options, label="Motivation for Career Choice"),
198
- gr.Dropdown(leadership_options, label="Leadership Experience"),
199
- gr.Dropdown(tech_savvy_options, label="Tech-Savviness")
200
- ],
201
- outputs="text",
202
- title="Career Prediction Model",
203
- description="Enter your details to predict your future career path",
204
- theme="huggingface"
205
- )
206
-
207
- # Create a separate interface for model evaluation
208
- eval_iface = gr.Interface(
209
- fn=evaluate_model_with_csv,
210
- inputs=gr.File(label="Upload Test CSV File"),
211
- outputs=[
212
- gr.Textbox(label="Evaluation Results"),
213
- gr.Image(label="Confusion Matrix")
214
- ],
215
- title="Career Prediction Model Evaluation",
216
- description="Upload a CSV file with test data to evaluate the model's performance",
217
- theme="huggingface"
218
- )
219
-
220
- # Create a tabbed interface
221
- demo = gr.TabbedInterface(
222
- [iface, eval_iface],
223
- ["Individual Prediction", "Batch Evaluation"]
224
- )
225
-
226
- # Launch the interface
227
- demo.launch()