Spaces:

Moditha24
/

ML_project

Sleeping

App Files Files Community

ML_project / app.py

Moditha24

Update app.py

e8182c9 verified 27 days ago

raw

history blame contribute delete

6.03 kB

	import gradio as gr
	import numpy as np
	from joblib import load
	from tensorflow.keras.models import load_model
	import tensorflow as tf
	import pickle
	import pandas as pd # Ensure pandas is imported for DataFrame operations
	from sklearn.preprocessing import OneHotEncoder
	from sklearn.compose import ColumnTransformer
	# Load dataset
	df = pd.read_csv('processed_data.csv') # Replace with the correct path to your dataset

	# Load the LabelEncoder and ColumnTransformer before prediction
	with open('label_encoder.pkl', 'rb') as f:
	label_encoder = pickle.load(f)

	categorical_features = [0, 1, 9, 10] # Update if column positions change

	ct = ColumnTransformer(
	transformers=[('encoder', OneHotEncoder(sparse_output=False, drop="first"), categorical_features)],
	remainder="passthrough"
	)

	# Fit it using your training data
	ct.fit(df[['Gender', 'Race (Reported)', 'Age', 'Height (cm)', 'Weight (kg)',
	'Diabetes', 'Simvastatin (Zocor)', 'Amiodarone (Cordarone)',
	'INR on Reported Therapeutic Dose of Warfarin', 'Cyp2C9 genotypes',
	'VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T']])

	# Assuming 'ct' is your ColumnTransformer (replace this with the actual loading code for your preprocessor)
	# Make sure that 'ct' is properly loaded, or use the same transformation logic here.

	# UI Components for user input
	input_Gender = gr.Radio(["male", "female"], label="Gender")
	input_Race = gr.Dropdown(list(dict(df['Race (Reported)'].value_counts()).keys()), label="Race")
	input_Age = gr.Dropdown(list(dict(df['Age'].value_counts())), label='Age')
	input_Height = gr.Number(label='Height (cm)')
	input_Weight = gr.Number(label='Weight (kg)')
	input_Diabetes = gr.Radio([0.0, 1.0], label='Diabetes')
	input_Simvastatin = gr.Radio([0.0, 1.0], label='Simvastatin (Zocor)')
	input_Amiodarone = gr.Radio([0.0, 1.0], label='Amiodarone (Cordarone)')
	input_INR_reported = gr.Number(label='INR on Reported Therapeutic Dose of Warfarin')
	input_Cyp2C9_genotypes = gr.Dropdown(list(dict(df['Cyp2C9 genotypes'].value_counts())), label='Cyp2C9 genotypes')
	input_VKORC1_genotypes = gr.Radio(list(dict(df['VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T'].value_counts())), label='VKORC1 genotypes')
	input_model = gr.Dropdown(['Decision Tree Regression', 'Support Vector Regression', 'Random Forest Regression', 'Deep Learning'], label='Model Selection')

	# Output textbox to display predicted dose
	output_warfarin_dosage = gr.Textbox(label='Therapeutic Dose of Warfarin')

	# Prediction function with renamed input variables
	def predict_dosage(gender, race, age, height, weight, diabetes, simvastatin, amiodarone, inr, cyp2c9, vkorc1, selected_model):
	import numpy as np
	from joblib import load
	from tensorflow.keras.models import load_model
	import tensorflow as tf

	# Optional debug function to inspect data before prediction
	def print_input_debug(transformed_input, final_array):
	print("Transformed input shape:", transformed_input.shape)
	print("Final input shape:", final_array.shape)
	print("Input data type:", final_array.dtype)

	try:
	# Load the selected model
	if selected_model == 'Deep Learning':
	model = load_model('best_DeepLearning_model (2).h5')
	elif selected_model == 'Support Vector Regression':
	model = load('SVR_optimized.joblib')
	elif selected_model == 'Random Forest Regression':
	model = load('RandomForestRegressor_optimized.joblib')
	else:
	model = load("DecisionTreeRegressor_optimized.joblib")

	# Handle unseen labels by attempting to map them to known labels
	def safe_transform_label(encoder, label, default=None):
	try:
	return encoder.transform([label])[0]
	except ValueError:
	# If label is unseen, return default (e.g., most frequent label or a fallback value)
	return default if default is not None else encoder.transform([encoder.classes_[0]])[0]

	# Encode Age using LabelEncoder (catching unseen labels)
	encoded_age = safe_transform_label(label_encoder, age, default=label_encoder.classes_[0])

	# Ensure numerical inputs are valid floats
	height = float(height) if height is not None else 0.0
	weight = float(weight) if weight is not None else 0.0
	inr = float(inr) if inr is not None else 0.0

	# Assemble input for transformation
	raw_inputs = [
	str(gender),
	str(race),
	str(age),
	height,
	weight,
	float(diabetes),
	float(simvastatin),
	float(amiodarone),
	inr,
	str(cyp2c9),
	str(vkorc1)
	]

	# Apply preprocessing pipeline (ct should be defined or loaded)
	transformed_input = ct.transform([raw_inputs])
	transformed_input[0][-7] = encoded_age # Age is encoded, so replace it in the transformed input

	# Convert to NumPy array for model input
	input_array = np.array(transformed_input, dtype=np.float32)
	print_input_debug(transformed_input, input_array)

	# Predict using appropriate model type
	if selected_model == 'Deep Learning':
	tensor_input = tf.convert_to_tensor(input_array)
	prediction = model.predict(tensor_input, verbose=0)
	return float(prediction[0][0])
	else:
	prediction = model.predict(input_array)
	return float(prediction[0])

	except Exception as e:
	print(f"Error in prediction: {str(e)}")
	return f"Error in prediction: {str(e)}"

	# Launch Gradio app
	gr.Interface(
	fn=predict_dosage,
	inputs=[input_Gender, input_Race, input_Age, input_Height, input_Weight,
	input_Diabetes, input_Simvastatin, input_Amiodarone,
	input_INR_reported, input_Cyp2C9_genotypes, input_VKORC1_genotypes, input_model],
	outputs=[output_warfarin_dosage]
	).launch(debug=True)