import gradio as gr import numpy as np from joblib import load from tensorflow.keras.models import load_model import tensorflow as tf import pickle import pandas as pd # Ensure pandas is imported for DataFrame operations from sklearn.preprocessing import OneHotEncoder from sklearn.compose import ColumnTransformer # Load dataset df = pd.read_csv('processed_data.csv') # Replace with the correct path to your dataset # Load the LabelEncoder and ColumnTransformer before prediction with open('label_encoder.pkl', 'rb') as f: label_encoder = pickle.load(f) categorical_features = [0, 1, 9, 10] # Update if column positions change ct = ColumnTransformer( transformers=[('encoder', OneHotEncoder(sparse_output=False, drop="first"), categorical_features)], remainder="passthrough" ) # Fit it using your training data ct.fit(df[['Gender', 'Race (Reported)', 'Age', 'Height (cm)', 'Weight (kg)', 'Diabetes', 'Simvastatin (Zocor)', 'Amiodarone (Cordarone)', 'INR on Reported Therapeutic Dose of Warfarin', 'Cyp2C9 genotypes', 'VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T']]) # Assuming 'ct' is your ColumnTransformer (replace this with the actual loading code for your preprocessor) # Make sure that 'ct' is properly loaded, or use the same transformation logic here. # UI Components for user input input_Gender = gr.Radio(["male", "female"], label="Gender") input_Race = gr.Dropdown(list(dict(df['Race (Reported)'].value_counts()).keys()), label="Race") input_Age = gr.Dropdown(list(dict(df['Age'].value_counts())), label='Age') input_Height = gr.Number(label='Height (cm)') input_Weight = gr.Number(label='Weight (kg)') input_Diabetes = gr.Radio([0.0, 1.0], label='Diabetes') input_Simvastatin = gr.Radio([0.0, 1.0], label='Simvastatin (Zocor)') input_Amiodarone = gr.Radio([0.0, 1.0], label='Amiodarone (Cordarone)') input_INR_reported = gr.Number(label='INR on Reported Therapeutic Dose of Warfarin') input_Cyp2C9_genotypes = gr.Dropdown(list(dict(df['Cyp2C9 genotypes'].value_counts())), label='Cyp2C9 genotypes') input_VKORC1_genotypes = gr.Radio(list(dict(df['VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T'].value_counts())), label='VKORC1 genotypes') input_model = gr.Dropdown(['Decision Tree Regression', 'Support Vector Regression', 'Random Forest Regression', 'Deep Learning'], label='Model Selection') # Output textbox to display predicted dose output_warfarin_dosage = gr.Textbox(label='Therapeutic Dose of Warfarin') # Prediction function with renamed input variables def predict_dosage(gender, race, age, height, weight, diabetes, simvastatin, amiodarone, inr, cyp2c9, vkorc1, selected_model): import numpy as np from joblib import load from tensorflow.keras.models import load_model import tensorflow as tf # Optional debug function to inspect data before prediction def print_input_debug(transformed_input, final_array): print("Transformed input shape:", transformed_input.shape) print("Final input shape:", final_array.shape) print("Input data type:", final_array.dtype) try: # Load the selected model if selected_model == 'Deep Learning': model = load_model('best_DeepLearning_model (2).h5') elif selected_model == 'Support Vector Regression': model = load('SVR_optimized.joblib') elif selected_model == 'Random Forest Regression': model = load('RandomForestRegressor_optimized.joblib') else: model = load("DecisionTreeRegressor_optimized.joblib") # Handle unseen labels by attempting to map them to known labels def safe_transform_label(encoder, label, default=None): try: return encoder.transform([label])[0] except ValueError: # If label is unseen, return default (e.g., most frequent label or a fallback value) return default if default is not None else encoder.transform([encoder.classes_[0]])[0] # Encode Age using LabelEncoder (catching unseen labels) encoded_age = safe_transform_label(label_encoder, age, default=label_encoder.classes_[0]) # Ensure numerical inputs are valid floats height = float(height) if height is not None else 0.0 weight = float(weight) if weight is not None else 0.0 inr = float(inr) if inr is not None else 0.0 # Assemble input for transformation raw_inputs = [ str(gender), str(race), str(age), height, weight, float(diabetes), float(simvastatin), float(amiodarone), inr, str(cyp2c9), str(vkorc1) ] # Apply preprocessing pipeline (ct should be defined or loaded) transformed_input = ct.transform([raw_inputs]) transformed_input[0][-7] = encoded_age # Age is encoded, so replace it in the transformed input # Convert to NumPy array for model input input_array = np.array(transformed_input, dtype=np.float32) print_input_debug(transformed_input, input_array) # Predict using appropriate model type if selected_model == 'Deep Learning': tensor_input = tf.convert_to_tensor(input_array) prediction = model.predict(tensor_input, verbose=0) return float(prediction[0][0]) else: prediction = model.predict(input_array) return float(prediction[0]) except Exception as e: print(f"Error in prediction: {str(e)}") return f"Error in prediction: {str(e)}" # Launch Gradio app gr.Interface( fn=predict_dosage, inputs=[input_Gender, input_Race, input_Age, input_Height, input_Weight, input_Diabetes, input_Simvastatin, input_Amiodarone, input_INR_reported, input_Cyp2C9_genotypes, input_VKORC1_genotypes, input_model], outputs=[output_warfarin_dosage] ).launch(debug=True)