Spaces:

Bibek-Mukherjee
/

Synthack-SyntaxSquad

Sleeping

Synthack-SyntaxSquad / src /api /liver_model.py

Bibek Mukherjee

Upload 77 files

3efedb0 verified about 2 months ago

6.15 kB

	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.ensemble import RandomForestClassifier
	import joblib
	import os
	import sys
	import pickle

	# Add the project root to Python path
	project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
	sys.path.append(project_root)

	class LiverDiseaseModel:
	# In the __init__ method, add more detailed error handling
	def __init__(self):
	self.model = None
	self.scaler = None
	self.model_path = os.path.join(project_root, "models", "liver_model.joblib")
	self.scaler_path = os.path.join(project_root, "models", "liver_scaler.joblib")
	self.pkl_model_path = os.path.join(project_root, "models", "liver_disease_model.pkl")

	# Create models directory if it doesn't exist
	os.makedirs(os.path.dirname(self.model_path), exist_ok=True)

	print(f"Looking for model at: {self.pkl_model_path}")

	# Try to load model and scaler in this order:
	# 1. First try the .pkl file
	# 2. Then try the .joblib files
	if os.path.exists(self.pkl_model_path):
	try:
	print(f"Loading model from {self.pkl_model_path}")
	with open(self.pkl_model_path, 'rb') as f:
	model_data = pickle.load(f)

	# Check if the loaded data is a dictionary containing both model and scaler
	if isinstance(model_data, dict):
	self.model = model_data.get('model')
	self.scaler = model_data.get('scaler')
	print("Successfully loaded model and scaler from .pkl file")
	else:
	# If it's just the model
	self.model = model_data
	print("Loaded model from .pkl file, but no scaler found")

	# Try to load scaler separately if it exists
	if os.path.exists(self.scaler_path):
	self.scaler = joblib.load(self.scaler_path)
	print("Loaded scaler from .joblib file")
	else:
	# Create a default scaler if none exists
	print("No scaler found, creating a default StandardScaler")
	self.scaler = StandardScaler()
	except Exception as e:
	print(f"Error loading model from .pkl file: {str(e)}")
	import traceback
	print(traceback.format_exc())
	else:
	print(f"Model file not found at: {self.pkl_model_path}")

	def train(self, X, y):
	"""Train the model on the provided data"""
	# Split the data
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Initialize and fit the scaler
	self.scaler = StandardScaler()
	X_train_scaled = self.scaler.fit_transform(X_train)

	# Initialize and train the model
	self.model = RandomForestClassifier(
	n_estimators=100,
	max_depth=10,
	random_state=42
	)
	self.model.fit(X_train_scaled, y_train)

	# Save the model and scaler in both formats
	joblib.dump(self.model, self.model_path)
	joblib.dump(self.scaler, self.scaler_path)

	# Also save as .pkl for compatibility
	with open(self.pkl_model_path, 'wb') as f:
	pickle.dump({'model': self.model, 'scaler': self.scaler}, f)

	# Evaluate on test set
	X_test_scaled = self.scaler.transform(X_test)
	test_score = self.model.score(X_test_scaled, y_test)
	return test_score

	def predict(self, features):
	"""Make a prediction for the given features"""
	if self.model is None:
	raise ValueError(f"Model not loaded. Please ensure model file exists at {self.pkl_model_path} and is valid.")

	if self.scaler is None:
	print("Warning: No scaler found. Using raw features without scaling.")

	# Convert features to DataFrame
	feature_names = ['Age', 'Gender', 'Total_Bilirubin', 'Direct_Bilirubin',
	'Alkaline_Phosphotase', 'Alamine_Aminotransferase',
	'Aspartate_Aminotransferase', 'Total_Protiens',
	'Albumin', 'Albumin_and_Globulin_Ratio']

	# Create a DataFrame with the features in the correct order
	df = pd.DataFrame([features], columns=feature_names)

	# Scale the features if scaler is available
	if self.scaler is not None:
	try:
	X_scaled = self.scaler.transform(df)
	except Exception as e:
	print(f"Error scaling features: {str(e)}. Using raw features.")
	X_scaled = df.values
	else:
	X_scaled = df.values

	# Make prediction
	try:
	prediction = bool(self.model.predict(X_scaled)[0])
	probability = float(self.model.predict_proba(X_scaled)[0][1])
	except Exception as e:
	print(f"Error making prediction: {str(e)}")
	import traceback
	print(traceback.format_exc())
	raise ValueError(f"Error making prediction: {str(e)}")

	return {
	"prediction": prediction,
	"probability": probability
	}

	def get_feature_importance(self):
	"""Return feature importance if available"""
	if self.model is None:
	return None

	try:
	# Get feature importance from the model
	if hasattr(self.model, 'feature_importances_'):
	return self.model.feature_importances_.tolist()
	return None
	except:
	return None