Spaces:
Sleeping
Sleeping
File size: 9,296 Bytes
474ddf8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import gradio as gr
import os
# Load the model
model_path = 'career_prediction_model.pkl'
with open(model_path, 'rb') as f:
saved_data = pickle.load(f)
model = saved_data['model']
label_encoders = saved_data['label_encoders']
target_encoder = saved_data['target_encoder']
features = saved_data['features']
target = 'What would you like to become when you grow up'
# Function for individual prediction
def predict_career(work_env, academic_perf, motivation, leadership, tech_savvy):
# Prepare input data
input_data = pd.DataFrame({
'Preferred Work Environment': [work_env],
'Academic Performance (CGPA/Percentage)': [float(academic_perf)],
'Motivation for Career Choice ': [motivation], # Note the space at the end
'Leadership Experience': [leadership],
'Tech-Savviness': [tech_savvy]
})
# Encode categorical features
for feature in features:
if feature in label_encoders and input_data[feature].dtype == 'object':
try:
input_data[feature] = label_encoders[feature].transform(input_data[feature])
except ValueError:
# Handle unknown categories
print(f"Warning: Unknown category in {feature}. Using most frequent category.")
input_data[feature] = 0 # Default to first category
# Make prediction
prediction = model.predict(input_data)[0]
predicted_career = target_encoder.inverse_transform([int(prediction)])[0]
# Get probabilities for all classes
if hasattr(model, 'predict_proba'):
probabilities = model.predict_proba(input_data)[0]
class_probs = {target_encoder.inverse_transform([i])[0]: prob
for i, prob in enumerate(probabilities)}
sorted_probs = dict(sorted(class_probs.items(), key=lambda x: x[1], reverse=True))
result = f"Predicted career: {predicted_career}\n\nProbabilities:\n"
for career, prob in sorted_probs.items():
result += f"{career}: {prob:.2f}\n"
return result
else:
return f"Predicted career: {predicted_career}"
# Function for batch evaluation
def evaluate_model_with_csv(csv_file):
try:
# Try different encodings
encodings = ['utf-8', 'latin1', 'ISO-8859-1', 'cp1252', 'utf-8-sig']
# Try each encoding until one works
for encoding in encodings:
try:
test_df = pd.read_csv(csv_file.name, encoding=encoding)
break
except UnicodeDecodeError:
if encoding == encodings[-1]:
return ["Error: Could not decode the CSV file with any common encodings.", None]
continue
except Exception as e:
if encoding == encodings[-1]:
return [f"Error reading CSV: {str(e)}", None]
continue
# Check if required columns exist
missing_cols = [col for col in features + [target] if col not in test_df.columns]
if missing_cols:
return [f"Error: The following required columns are missing in the CSV: {missing_cols}", None]
# Preprocess the test data
X_eval = test_df[features].copy()
# Handle missing values
X_eval = X_eval.fillna('Unknown')
# Convert Academic Performance to numeric
X_eval['Academic Performance (CGPA/Percentage)'] = pd.to_numeric(
X_eval['Academic Performance (CGPA/Percentage)'], errors='coerce')
X_eval['Academic Performance (CGPA/Percentage)'].fillna(
X_eval['Academic Performance (CGPA/Percentage)'].mean(), inplace=True)
# Encode categorical features
for feature in features:
if feature in label_encoders and X_eval[feature].dtype == 'object':
# Handle unknown categories by mapping them to 0
X_eval[feature] = X_eval[feature].apply(
lambda x: label_encoders[feature].transform([x])[0]
if x in label_encoders[feature].classes_ else 0
)
# Get the true labels
y_true = test_df[target].copy()
y_true = y_true.fillna('Corporate Employee')
# Encode the true labels
y_true_encoded = y_true.apply(
lambda x: target_encoder.transform([x])[0]
if x in target_encoder.classes_ else 0
).values
# Make predictions
y_pred = model.predict(X_eval)
y_pred = np.array(y_pred).astype(int)
# Calculate accuracy
accuracy = accuracy_score(y_true_encoded, y_pred)
# Create a DataFrame with actual vs predicted values
results_df = pd.DataFrame({
'Actual Career': [target_encoder.classes_[i] for i in y_true_encoded],
'Predicted Career': [target_encoder.classes_[i] for i in y_pred]
})
# Count correct predictions
results_df['Correct'] = results_df['Actual Career'] == results_df['Predicted Career']
correct_count = results_df['Correct'].sum()
total_count = len(results_df)
# Create confusion matrix
plt.figure(figsize=(12, 10))
cm = pd.crosstab(results_df['Actual Career'], results_df['Predicted Career'])
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('Actual Career')
plt.xlabel('Predicted Career')
plt.tight_layout()
# Save the confusion matrix
cm_path = 'confusion_matrix.png'
plt.savefig(cm_path)
# Prepare the results
result_text = f"Model Evaluation Results:\n\n"
result_text += f"Total samples: {total_count}\n"
result_text += f"Correct predictions: {correct_count}\n"
result_text += f"Accuracy: {accuracy:.4f}\n\n"
# Generate classification report
report = classification_report(y_true_encoded, y_pred,
target_names=target_encoder.classes_,
output_dict=True)
# Add class-wise metrics
result_text += "Class-wise Performance:\n"
for class_name in target_encoder.classes_:
if class_name in report:
result_text += f"\n{class_name}:\n"
result_text += f" Precision: {report[class_name]['precision']:.4f}\n"
result_text += f" Recall: {report[class_name]['recall']:.4f}\n"
result_text += f" F1-score: {report[class_name]['f1-score']:.4f}\n"
return [result_text, cm_path]
except Exception as e:
import traceback
error_details = traceback.format_exc()
print(f"Error in evaluation: {str(e)}\n{error_details}")
# Create a simple error image
plt.figure(figsize=(6, 4))
plt.text(0.5, 0.5, f"Error: {str(e)}",
horizontalalignment='center', verticalalignment='center', fontsize=12, color='red')
plt.axis('off')
error_path = 'error_image.png'
plt.savefig(error_path)
return [f"Error: {str(e)}", error_path]
# Get unique values for dropdowns
work_env_options = list(label_encoders['Preferred Work Environment'].classes_)
motivation_options = list(label_encoders['Motivation for Career Choice '].classes_)
leadership_options = list(label_encoders['Leadership Experience'].classes_)
tech_savvy_options = list(label_encoders['Tech-Savviness'].classes_)
# Create the Gradio interface
iface = gr.Interface(
fn=predict_career,
inputs=[
gr.Dropdown(work_env_options, label="Preferred Work Environment"),
gr.Number(label="Academic Performance (CGPA/Percentage)", minimum=0, maximum=10),
gr.Dropdown(motivation_options, label="Motivation for Career Choice"),
gr.Dropdown(leadership_options, label="Leadership Experience"),
gr.Dropdown(tech_savvy_options, label="Tech-Savviness")
],
outputs="text",
title="Career Prediction Model",
description="Enter your details to predict your future career path",
theme="huggingface"
)
# Create a separate interface for model evaluation
eval_iface = gr.Interface(
fn=evaluate_model_with_csv,
inputs=gr.File(label="Upload Test CSV File"),
outputs=[
gr.Textbox(label="Evaluation Results"),
gr.Image(label="Confusion Matrix")
],
title="Career Prediction Model Evaluation",
description="Upload a CSV file with test data to evaluate the model's performance",
theme="huggingface"
)
# Create a tabbed interface
demo = gr.TabbedInterface(
[iface, eval_iface],
["Individual Prediction", "Batch Evaluation"]
)
# Launch the interface
demo.launch()
|