File size: 9,296 Bytes
474ddf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import gradio as gr
import os

# Load the model
model_path = 'career_prediction_model.pkl'
with open(model_path, 'rb') as f:
    saved_data = pickle.load(f)

model = saved_data['model']
label_encoders = saved_data['label_encoders']
target_encoder = saved_data['target_encoder']
features = saved_data['features']
target = 'What would you like to become when you grow up'

# Function for individual prediction
def predict_career(work_env, academic_perf, motivation, leadership, tech_savvy):
    # Prepare input data
    input_data = pd.DataFrame({
        'Preferred Work Environment': [work_env],
        'Academic Performance (CGPA/Percentage)': [float(academic_perf)],
        'Motivation for Career Choice ': [motivation],  # Note the space at the end
        'Leadership Experience': [leadership],
        'Tech-Savviness': [tech_savvy]
    })
    
    # Encode categorical features
    for feature in features:
        if feature in label_encoders and input_data[feature].dtype == 'object':
            try:
                input_data[feature] = label_encoders[feature].transform(input_data[feature])
            except ValueError:
                # Handle unknown categories
                print(f"Warning: Unknown category in {feature}. Using most frequent category.")
                input_data[feature] = 0  # Default to first category
    
    # Make prediction
    prediction = model.predict(input_data)[0]
    predicted_career = target_encoder.inverse_transform([int(prediction)])[0]
    
    # Get probabilities for all classes
    if hasattr(model, 'predict_proba'):
        probabilities = model.predict_proba(input_data)[0]
        class_probs = {target_encoder.inverse_transform([i])[0]: prob 
                      for i, prob in enumerate(probabilities)}
        sorted_probs = dict(sorted(class_probs.items(), key=lambda x: x[1], reverse=True))
        
        result = f"Predicted career: {predicted_career}\n\nProbabilities:\n"
        for career, prob in sorted_probs.items():
            result += f"{career}: {prob:.2f}\n"
        return result
    else:
        return f"Predicted career: {predicted_career}"

# Function for batch evaluation
def evaluate_model_with_csv(csv_file):
    try:
        # Try different encodings
        encodings = ['utf-8', 'latin1', 'ISO-8859-1', 'cp1252', 'utf-8-sig']
        
        # Try each encoding until one works
        for encoding in encodings:
            try:
                test_df = pd.read_csv(csv_file.name, encoding=encoding)
                break
            except UnicodeDecodeError:
                if encoding == encodings[-1]:
                    return ["Error: Could not decode the CSV file with any common encodings.", None]
                continue
            except Exception as e:
                if encoding == encodings[-1]:
                    return [f"Error reading CSV: {str(e)}", None]
                continue
        
        # Check if required columns exist
        missing_cols = [col for col in features + [target] if col not in test_df.columns]
        if missing_cols:
            return [f"Error: The following required columns are missing in the CSV: {missing_cols}", None]
        
        # Preprocess the test data
        X_eval = test_df[features].copy()
        
        # Handle missing values
        X_eval = X_eval.fillna('Unknown')
        
        # Convert Academic Performance to numeric
        X_eval['Academic Performance (CGPA/Percentage)'] = pd.to_numeric(
            X_eval['Academic Performance (CGPA/Percentage)'], errors='coerce')
        X_eval['Academic Performance (CGPA/Percentage)'].fillna(
            X_eval['Academic Performance (CGPA/Percentage)'].mean(), inplace=True)
        
        # Encode categorical features
        for feature in features:
            if feature in label_encoders and X_eval[feature].dtype == 'object':
                # Handle unknown categories by mapping them to 0
                X_eval[feature] = X_eval[feature].apply(
                    lambda x: label_encoders[feature].transform([x])[0] 
                    if x in label_encoders[feature].classes_ else 0
                )
        
        # Get the true labels
        y_true = test_df[target].copy()
        y_true = y_true.fillna('Corporate Employee')
        
        # Encode the true labels
        y_true_encoded = y_true.apply(
            lambda x: target_encoder.transform([x])[0] 
            if x in target_encoder.classes_ else 0
        ).values
        
        # Make predictions
        y_pred = model.predict(X_eval)
        y_pred = np.array(y_pred).astype(int)
        
        # Calculate accuracy
        accuracy = accuracy_score(y_true_encoded, y_pred)
        
        # Create a DataFrame with actual vs predicted values
        results_df = pd.DataFrame({
            'Actual Career': [target_encoder.classes_[i] for i in y_true_encoded],
            'Predicted Career': [target_encoder.classes_[i] for i in y_pred]
        })
        
        # Count correct predictions
        results_df['Correct'] = results_df['Actual Career'] == results_df['Predicted Career']
        correct_count = results_df['Correct'].sum()
        total_count = len(results_df)
        
        # Create confusion matrix
        plt.figure(figsize=(12, 10))
        cm = pd.crosstab(results_df['Actual Career'], results_df['Predicted Career'])
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.ylabel('Actual Career')
        plt.xlabel('Predicted Career')
        plt.tight_layout()
        
        # Save the confusion matrix
        cm_path = 'confusion_matrix.png'
        plt.savefig(cm_path)
        
        # Prepare the results
        result_text = f"Model Evaluation Results:\n\n"
        result_text += f"Total samples: {total_count}\n"
        result_text += f"Correct predictions: {correct_count}\n"
        result_text += f"Accuracy: {accuracy:.4f}\n\n"
        
        # Generate classification report
        report = classification_report(y_true_encoded, y_pred, 
                                      target_names=target_encoder.classes_, 
                                      output_dict=True)
        
        # Add class-wise metrics
        result_text += "Class-wise Performance:\n"
        for class_name in target_encoder.classes_:
            if class_name in report:
                result_text += f"\n{class_name}:\n"
                result_text += f"  Precision: {report[class_name]['precision']:.4f}\n"
                result_text += f"  Recall: {report[class_name]['recall']:.4f}\n"
                result_text += f"  F1-score: {report[class_name]['f1-score']:.4f}\n"
        
        return [result_text, cm_path]
    
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Error in evaluation: {str(e)}\n{error_details}")
        
        # Create a simple error image
        plt.figure(figsize=(6, 4))
        plt.text(0.5, 0.5, f"Error: {str(e)}", 
                 horizontalalignment='center', verticalalignment='center', fontsize=12, color='red')
        plt.axis('off')
        error_path = 'error_image.png'
        plt.savefig(error_path)
        
        return [f"Error: {str(e)}", error_path]

# Get unique values for dropdowns
work_env_options = list(label_encoders['Preferred Work Environment'].classes_)
motivation_options = list(label_encoders['Motivation for Career Choice '].classes_)
leadership_options = list(label_encoders['Leadership Experience'].classes_)
tech_savvy_options = list(label_encoders['Tech-Savviness'].classes_)

# Create the Gradio interface
iface = gr.Interface(
    fn=predict_career,
    inputs=[
        gr.Dropdown(work_env_options, label="Preferred Work Environment"),
        gr.Number(label="Academic Performance (CGPA/Percentage)", minimum=0, maximum=10),
        gr.Dropdown(motivation_options, label="Motivation for Career Choice"),
        gr.Dropdown(leadership_options, label="Leadership Experience"),
        gr.Dropdown(tech_savvy_options, label="Tech-Savviness")
    ],
    outputs="text",
    title="Career Prediction Model",
    description="Enter your details to predict your future career path",
    theme="huggingface"
)

# Create a separate interface for model evaluation
eval_iface = gr.Interface(
    fn=evaluate_model_with_csv,
    inputs=gr.File(label="Upload Test CSV File"),
    outputs=[
        gr.Textbox(label="Evaluation Results"),
        gr.Image(label="Confusion Matrix")
    ],
    title="Career Prediction Model Evaluation",
    description="Upload a CSV file with test data to evaluate the model's performance",
    theme="huggingface"
)

# Create a tabbed interface
demo = gr.TabbedInterface(
    [iface, eval_iface],
    ["Individual Prediction", "Batch Evaluation"]
)

# Launch the interface
demo.launch()