|
import pandas as pd
|
|
import numpy as np
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.preprocessing import StandardScaler
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
import joblib
|
|
import os
|
|
import sys
|
|
import pickle
|
|
|
|
|
|
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
|
|
sys.path.append(project_root)
|
|
|
|
class LiverDiseaseModel:
|
|
|
|
def __init__(self):
|
|
self.model = None
|
|
self.scaler = None
|
|
self.model_path = os.path.join(project_root, "models", "liver_model.joblib")
|
|
self.scaler_path = os.path.join(project_root, "models", "liver_scaler.joblib")
|
|
self.pkl_model_path = os.path.join(project_root, "models", "liver_disease_model.pkl")
|
|
|
|
|
|
os.makedirs(os.path.dirname(self.model_path), exist_ok=True)
|
|
|
|
print(f"Looking for model at: {self.pkl_model_path}")
|
|
|
|
|
|
|
|
|
|
if os.path.exists(self.pkl_model_path):
|
|
try:
|
|
print(f"Loading model from {self.pkl_model_path}")
|
|
with open(self.pkl_model_path, 'rb') as f:
|
|
model_data = pickle.load(f)
|
|
|
|
|
|
if isinstance(model_data, dict):
|
|
self.model = model_data.get('model')
|
|
self.scaler = model_data.get('scaler')
|
|
print("Successfully loaded model and scaler from .pkl file")
|
|
else:
|
|
|
|
self.model = model_data
|
|
print("Loaded model from .pkl file, but no scaler found")
|
|
|
|
|
|
if os.path.exists(self.scaler_path):
|
|
self.scaler = joblib.load(self.scaler_path)
|
|
print("Loaded scaler from .joblib file")
|
|
else:
|
|
|
|
print("No scaler found, creating a default StandardScaler")
|
|
self.scaler = StandardScaler()
|
|
except Exception as e:
|
|
print(f"Error loading model from .pkl file: {str(e)}")
|
|
import traceback
|
|
print(traceback.format_exc())
|
|
else:
|
|
print(f"Model file not found at: {self.pkl_model_path}")
|
|
|
|
def train(self, X, y):
|
|
"""Train the model on the provided data"""
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
|
|
|
self.scaler = StandardScaler()
|
|
X_train_scaled = self.scaler.fit_transform(X_train)
|
|
|
|
|
|
self.model = RandomForestClassifier(
|
|
n_estimators=100,
|
|
max_depth=10,
|
|
random_state=42
|
|
)
|
|
self.model.fit(X_train_scaled, y_train)
|
|
|
|
|
|
joblib.dump(self.model, self.model_path)
|
|
joblib.dump(self.scaler, self.scaler_path)
|
|
|
|
|
|
with open(self.pkl_model_path, 'wb') as f:
|
|
pickle.dump({'model': self.model, 'scaler': self.scaler}, f)
|
|
|
|
|
|
X_test_scaled = self.scaler.transform(X_test)
|
|
test_score = self.model.score(X_test_scaled, y_test)
|
|
return test_score
|
|
|
|
def predict(self, features):
|
|
"""Make a prediction for the given features"""
|
|
if self.model is None:
|
|
raise ValueError(f"Model not loaded. Please ensure model file exists at {self.pkl_model_path} and is valid.")
|
|
|
|
if self.scaler is None:
|
|
print("Warning: No scaler found. Using raw features without scaling.")
|
|
|
|
|
|
feature_names = ['Age', 'Gender', 'Total_Bilirubin', 'Direct_Bilirubin',
|
|
'Alkaline_Phosphotase', 'Alamine_Aminotransferase',
|
|
'Aspartate_Aminotransferase', 'Total_Protiens',
|
|
'Albumin', 'Albumin_and_Globulin_Ratio']
|
|
|
|
|
|
df = pd.DataFrame([features], columns=feature_names)
|
|
|
|
|
|
if self.scaler is not None:
|
|
try:
|
|
X_scaled = self.scaler.transform(df)
|
|
except Exception as e:
|
|
print(f"Error scaling features: {str(e)}. Using raw features.")
|
|
X_scaled = df.values
|
|
else:
|
|
X_scaled = df.values
|
|
|
|
|
|
try:
|
|
prediction = bool(self.model.predict(X_scaled)[0])
|
|
probability = float(self.model.predict_proba(X_scaled)[0][1])
|
|
except Exception as e:
|
|
print(f"Error making prediction: {str(e)}")
|
|
import traceback
|
|
print(traceback.format_exc())
|
|
raise ValueError(f"Error making prediction: {str(e)}")
|
|
|
|
return {
|
|
"prediction": prediction,
|
|
"probability": probability
|
|
}
|
|
|
|
def get_feature_importance(self):
|
|
"""Return feature importance if available"""
|
|
if self.model is None:
|
|
return None
|
|
|
|
try:
|
|
|
|
if hasattr(self.model, 'feature_importances_'):
|
|
return self.model.feature_importances_.tolist()
|
|
return None
|
|
except:
|
|
return None |