Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import pickle | |
import gradio as gr | |
import os | |
# Load the model | |
model_path = 'career_prediction_model.pkl' | |
with open(model_path, 'rb') as f: | |
saved_data = pickle.load(f) | |
model = saved_data['model'] | |
label_encoders = saved_data['label_encoders'] | |
target_encoder = saved_data['target_encoder'] | |
features = saved_data['features'] | |
target = 'What would you like to become when you grow up' | |
# Function for individual prediction | |
def predict_career(work_env, academic_perf, motivation, leadership, tech_savvy): | |
# Prepare input data | |
input_data = pd.DataFrame({ | |
'Preferred Work Environment': [work_env], | |
'Academic Performance (CGPA/Percentage)': [float(academic_perf)], | |
'Motivation for Career Choice ': [motivation], # Note the space at the end | |
'Leadership Experience': [leadership], | |
'Tech-Savviness': [tech_savvy] | |
}) | |
# Encode categorical features | |
for feature in features: | |
if feature in label_encoders and input_data[feature].dtype == 'object': | |
try: | |
input_data[feature] = label_encoders[feature].transform(input_data[feature]) | |
except ValueError: | |
# Handle unknown categories | |
print(f"Warning: Unknown category in {feature}. Using most frequent category.") | |
input_data[feature] = 0 # Default to first category | |
# Make prediction | |
prediction = model.predict(input_data)[0] | |
predicted_career = target_encoder.inverse_transform([int(prediction)])[0] | |
# Get probabilities for all classes | |
if hasattr(model, 'predict_proba'): | |
probabilities = model.predict_proba(input_data)[0] | |
class_probs = {target_encoder.inverse_transform([i])[0]: prob | |
for i, prob in enumerate(probabilities)} | |
sorted_probs = dict(sorted(class_probs.items(), key=lambda x: x[1], reverse=True)) | |
result = f"Predicted career: {predicted_career}\n\nProbabilities:\n" | |
for career, prob in sorted_probs.items(): | |
result += f"{career}: {prob:.2f}\n" | |
return result | |
else: | |
return f"Predicted career: {predicted_career}" | |
# Function for batch evaluation | |
def evaluate_model_with_csv(csv_file): | |
try: | |
# Try different encodings | |
encodings = ['utf-8', 'latin1', 'ISO-8859-1', 'cp1252', 'utf-8-sig'] | |
# Try each encoding until one works | |
for encoding in encodings: | |
try: | |
test_df = pd.read_csv(csv_file.name, encoding=encoding) | |
break | |
except UnicodeDecodeError: | |
if encoding == encodings[-1]: | |
return ["Error: Could not decode the CSV file with any common encodings.", None] | |
continue | |
except Exception as e: | |
if encoding == encodings[-1]: | |
return [f"Error reading CSV: {str(e)}", None] | |
continue | |
# Check if required columns exist | |
missing_cols = [col for col in features + [target] if col not in test_df.columns] | |
if missing_cols: | |
return [f"Error: The following required columns are missing in the CSV: {missing_cols}", None] | |
# Preprocess the test data | |
X_eval = test_df[features].copy() | |
# Handle missing values | |
X_eval = X_eval.fillna('Unknown') | |
# Convert Academic Performance to numeric | |
X_eval['Academic Performance (CGPA/Percentage)'] = pd.to_numeric( | |
X_eval['Academic Performance (CGPA/Percentage)'], errors='coerce') | |
X_eval['Academic Performance (CGPA/Percentage)'].fillna( | |
X_eval['Academic Performance (CGPA/Percentage)'].mean(), inplace=True) | |
# Encode categorical features | |
for feature in features: | |
if feature in label_encoders and X_eval[feature].dtype == 'object': | |
# Handle unknown categories by mapping them to 0 | |
X_eval[feature] = X_eval[feature].apply( | |
lambda x: label_encoders[feature].transform([x])[0] | |
if x in label_encoders[feature].classes_ else 0 | |
) | |
# Get the true labels | |
y_true = test_df[target].copy() | |
y_true = y_true.fillna('Corporate Employee') | |
# Encode the true labels | |
y_true_encoded = y_true.apply( | |
lambda x: target_encoder.transform([x])[0] | |
if x in target_encoder.classes_ else 0 | |
).values | |
# Make predictions | |
y_pred = model.predict(X_eval) | |
y_pred = np.array(y_pred).astype(int) | |
# Calculate accuracy | |
accuracy = accuracy_score(y_true_encoded, y_pred) | |
# Create a DataFrame with actual vs predicted values | |
results_df = pd.DataFrame({ | |
'Actual Career': [target_encoder.classes_[i] for i in y_true_encoded], | |
'Predicted Career': [target_encoder.classes_[i] for i in y_pred] | |
}) | |
# Count correct predictions | |
results_df['Correct'] = results_df['Actual Career'] == results_df['Predicted Career'] | |
correct_count = results_df['Correct'].sum() | |
total_count = len(results_df) | |
# Create confusion matrix | |
plt.figure(figsize=(12, 10)) | |
cm = pd.crosstab(results_df['Actual Career'], results_df['Predicted Career']) | |
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues') | |
plt.title('Confusion Matrix') | |
plt.ylabel('Actual Career') | |
plt.xlabel('Predicted Career') | |
plt.tight_layout() | |
# Save the confusion matrix | |
cm_path = 'confusion_matrix.png' | |
plt.savefig(cm_path) | |
# Prepare the results | |
result_text = f"Model Evaluation Results:\n\n" | |
result_text += f"Total samples: {total_count}\n" | |
result_text += f"Correct predictions: {correct_count}\n" | |
result_text += f"Accuracy: {accuracy:.4f}\n\n" | |
# Generate classification report | |
report = classification_report(y_true_encoded, y_pred, | |
target_names=target_encoder.classes_, | |
output_dict=True) | |
# Add class-wise metrics | |
result_text += "Class-wise Performance:\n" | |
for class_name in target_encoder.classes_: | |
if class_name in report: | |
result_text += f"\n{class_name}:\n" | |
result_text += f" Precision: {report[class_name]['precision']:.4f}\n" | |
result_text += f" Recall: {report[class_name]['recall']:.4f}\n" | |
result_text += f" F1-score: {report[class_name]['f1-score']:.4f}\n" | |
return [result_text, cm_path] | |
except Exception as e: | |
import traceback | |
error_details = traceback.format_exc() | |
print(f"Error in evaluation: {str(e)}\n{error_details}") | |
# Create a simple error image | |
plt.figure(figsize=(6, 4)) | |
plt.text(0.5, 0.5, f"Error: {str(e)}", | |
horizontalalignment='center', verticalalignment='center', fontsize=12, color='red') | |
plt.axis('off') | |
error_path = 'error_image.png' | |
plt.savefig(error_path) | |
return [f"Error: {str(e)}", error_path] | |
# Get unique values for dropdowns | |
work_env_options = list(label_encoders['Preferred Work Environment'].classes_) | |
motivation_options = list(label_encoders['Motivation for Career Choice '].classes_) | |
leadership_options = list(label_encoders['Leadership Experience'].classes_) | |
tech_savvy_options = list(label_encoders['Tech-Savviness'].classes_) | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=predict_career, | |
inputs=[ | |
gr.Dropdown(work_env_options, label="Preferred Work Environment"), | |
gr.Number(label="Academic Performance (CGPA/Percentage)", minimum=0, maximum=10), | |
gr.Dropdown(motivation_options, label="Motivation for Career Choice"), | |
gr.Dropdown(leadership_options, label="Leadership Experience"), | |
gr.Dropdown(tech_savvy_options, label="Tech-Savviness") | |
], | |
outputs="text", | |
title="Career Prediction Model", | |
description="Enter your details to predict your future career path", | |
theme="huggingface" | |
) | |
# Create a separate interface for model evaluation | |
eval_iface = gr.Interface( | |
fn=evaluate_model_with_csv, | |
inputs=gr.File(label="Upload Test CSV File"), | |
outputs=[ | |
gr.Textbox(label="Evaluation Results"), | |
gr.Image(label="Confusion Matrix") | |
], | |
title="Career Prediction Model Evaluation", | |
description="Upload a CSV file with test data to evaluate the model's performance", | |
theme="huggingface" | |
) | |
# Create a tabbed interface | |
demo = gr.TabbedInterface( | |
[iface, eval_iface], | |
["Individual Prediction", "Batch Evaluation"] | |
) | |
# Launch the interface | |
demo.launch() | |