ML_project / app.py
Moditha24's picture
Update app.py
e8182c9 verified
import gradio as gr
import numpy as np
from joblib import load
from tensorflow.keras.models import load_model
import tensorflow as tf
import pickle
import pandas as pd # Ensure pandas is imported for DataFrame operations
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
# Load dataset
df = pd.read_csv('processed_data.csv') # Replace with the correct path to your dataset
# Load the LabelEncoder and ColumnTransformer before prediction
with open('label_encoder.pkl', 'rb') as f:
label_encoder = pickle.load(f)
categorical_features = [0, 1, 9, 10] # Update if column positions change
ct = ColumnTransformer(
transformers=[('encoder', OneHotEncoder(sparse_output=False, drop="first"), categorical_features)],
remainder="passthrough"
)
# Fit it using your training data
ct.fit(df[['Gender', 'Race (Reported)', 'Age', 'Height (cm)', 'Weight (kg)',
'Diabetes', 'Simvastatin (Zocor)', 'Amiodarone (Cordarone)',
'INR on Reported Therapeutic Dose of Warfarin', 'Cyp2C9 genotypes',
'VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T']])
# Assuming 'ct' is your ColumnTransformer (replace this with the actual loading code for your preprocessor)
# Make sure that 'ct' is properly loaded, or use the same transformation logic here.
# UI Components for user input
input_Gender = gr.Radio(["male", "female"], label="Gender")
input_Race = gr.Dropdown(list(dict(df['Race (Reported)'].value_counts()).keys()), label="Race")
input_Age = gr.Dropdown(list(dict(df['Age'].value_counts())), label='Age')
input_Height = gr.Number(label='Height (cm)')
input_Weight = gr.Number(label='Weight (kg)')
input_Diabetes = gr.Radio([0.0, 1.0], label='Diabetes')
input_Simvastatin = gr.Radio([0.0, 1.0], label='Simvastatin (Zocor)')
input_Amiodarone = gr.Radio([0.0, 1.0], label='Amiodarone (Cordarone)')
input_INR_reported = gr.Number(label='INR on Reported Therapeutic Dose of Warfarin')
input_Cyp2C9_genotypes = gr.Dropdown(list(dict(df['Cyp2C9 genotypes'].value_counts())), label='Cyp2C9 genotypes')
input_VKORC1_genotypes = gr.Radio(list(dict(df['VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T'].value_counts())), label='VKORC1 genotypes')
input_model = gr.Dropdown(['Decision Tree Regression', 'Support Vector Regression', 'Random Forest Regression', 'Deep Learning'], label='Model Selection')
# Output textbox to display predicted dose
output_warfarin_dosage = gr.Textbox(label='Therapeutic Dose of Warfarin')
# Prediction function with renamed input variables
def predict_dosage(gender, race, age, height, weight, diabetes, simvastatin, amiodarone, inr, cyp2c9, vkorc1, selected_model):
import numpy as np
from joblib import load
from tensorflow.keras.models import load_model
import tensorflow as tf
# Optional debug function to inspect data before prediction
def print_input_debug(transformed_input, final_array):
print("Transformed input shape:", transformed_input.shape)
print("Final input shape:", final_array.shape)
print("Input data type:", final_array.dtype)
try:
# Load the selected model
if selected_model == 'Deep Learning':
model = load_model('best_DeepLearning_model (2).h5')
elif selected_model == 'Support Vector Regression':
model = load('SVR_optimized.joblib')
elif selected_model == 'Random Forest Regression':
model = load('RandomForestRegressor_optimized.joblib')
else:
model = load("DecisionTreeRegressor_optimized.joblib")
# Handle unseen labels by attempting to map them to known labels
def safe_transform_label(encoder, label, default=None):
try:
return encoder.transform([label])[0]
except ValueError:
# If label is unseen, return default (e.g., most frequent label or a fallback value)
return default if default is not None else encoder.transform([encoder.classes_[0]])[0]
# Encode Age using LabelEncoder (catching unseen labels)
encoded_age = safe_transform_label(label_encoder, age, default=label_encoder.classes_[0])
# Ensure numerical inputs are valid floats
height = float(height) if height is not None else 0.0
weight = float(weight) if weight is not None else 0.0
inr = float(inr) if inr is not None else 0.0
# Assemble input for transformation
raw_inputs = [
str(gender),
str(race),
str(age),
height,
weight,
float(diabetes),
float(simvastatin),
float(amiodarone),
inr,
str(cyp2c9),
str(vkorc1)
]
# Apply preprocessing pipeline (ct should be defined or loaded)
transformed_input = ct.transform([raw_inputs])
transformed_input[0][-7] = encoded_age # Age is encoded, so replace it in the transformed input
# Convert to NumPy array for model input
input_array = np.array(transformed_input, dtype=np.float32)
print_input_debug(transformed_input, input_array)
# Predict using appropriate model type
if selected_model == 'Deep Learning':
tensor_input = tf.convert_to_tensor(input_array)
prediction = model.predict(tensor_input, verbose=0)
return float(prediction[0][0])
else:
prediction = model.predict(input_array)
return float(prediction[0])
except Exception as e:
print(f"Error in prediction: {str(e)}")
return f"Error in prediction: {str(e)}"
# Launch Gradio app
gr.Interface(
fn=predict_dosage,
inputs=[input_Gender, input_Race, input_Age, input_Height, input_Weight,
input_Diabetes, input_Simvastatin, input_Amiodarone,
input_INR_reported, input_Cyp2C9_genotypes, input_VKORC1_genotypes, input_model],
outputs=[output_warfarin_dosage]
).launch(debug=True)