File size: 5,355 Bytes
027053f
 
 
 
 
 
980e735
 
 
26632fb
027053f
dc6947b
027053f
 
 
b662922
 
ca4bef1
027053f
 
b662922
 
027053f
 
 
 
 
 
b662922
 
027053f
 
 
 
 
 
 
b662922
 
 
 
 
 
 
 
 
 
 
027053f
 
 
5200e1f
027053f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b662922
027053f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
980e735
 
 
027053f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
import numpy as np
from joblib import load
from tensorflow.keras.models import load_model
import tensorflow as tf
import pickle
import pandas as pd  # Ensure pandas is imported for DataFrame operations

# Load dataset
df = pd.read_csv('dataset.xls')  # Replace with the correct path to your dataset

# Load the LabelEncoder and ColumnTransformer before prediction
with open('label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

# Assuming 'ct' is your ColumnTransformer (replace this with the actual loading code for your preprocessor)
# Make sure that 'ct' is properly loaded, or use the same transformation logic here.

# UI Components for user input
input_Gender = gr.Radio(["male", "female"], label="Gender")
input_Race = gr.Dropdown(list(dict(df['Race (Reported)'].value_counts()).keys()), label="Race")
input_Age = gr.Dropdown(list(dict(df['Age'].value_counts())), label='Age')
input_Height = gr.Number(label='Height (cm)')
input_Weight = gr.Number(label='Weight (kg)')
input_Diabetes = gr.Radio([0.0, 1.0], label='Diabetes')
input_Simvastatin = gr.Radio([0.0, 1.0], label='Simvastatin (Zocor)')
input_Amiodarone = gr.Radio([0.0, 1.0], label='Amiodarone (Cordarone)')
input_INR_reported = gr.Number(label='INR on Reported Therapeutic Dose of Warfarin')
input_Cyp2C9_genotypes = gr.Dropdown(list(dict(df['Cyp2C9 genotypes'].value_counts())), label='Cyp2C9 genotypes')
input_VKORC1_genotypes = gr.Radio(list(dict(df['VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T'].value_counts())), label='VKORC1 genotypes')
input_model = gr.Dropdown(['Decision Tree Regression', 'Support Vector Regression', 'Random Forest Regression', 'Deep Learning'], label='Model Selection')

# Output textbox to display predicted dose
output_warfarin_dosage = gr.Textbox(label='Therapeutic Dose of Warfarin')

# Prediction function with renamed input variables
def predict_dosage(gender, race, age, height, weight, diabetes, simvastatin, amiodarone, inr, cyp2c9, vkorc1, selected_model):
    import numpy as np
    from joblib import load
    from tensorflow.keras.models import load_model
    import tensorflow as tf

    # Optional debug function to inspect data before prediction
    def print_input_debug(transformed_input, final_array):
        print("Transformed input shape:", transformed_input.shape)
        print("Final input shape:", final_array.shape)
        print("Input data type:", final_array.dtype)

    try:
        # Load the selected model
        if selected_model == 'Deep Learning':
            model = load_model('best_DeepLearning_model (2).h5')
        elif selected_model == 'Support Vector Regression':
            model = load('SVR_optimized.joblib')
        elif selected_model == 'Random Forest Regression':
            model = load('RandomForestRegressor_optimized.joblib')
        else:
            model = load("DecisionTreeRegressor_optimized.joblib")

        # Handle unseen labels by attempting to map them to known labels
        def safe_transform_label(encoder, label, default=None):
            try:
                return encoder.transform([label])[0]
            except ValueError:
                # If label is unseen, return default (e.g., most frequent label or a fallback value)
                return default if default is not None else encoder.transform([encoder.classes_[0]])[0]

        # Encode Age using LabelEncoder (catching unseen labels)
        encoded_age = safe_transform_label(label_encoder, age, default=label_encoder.classes_[0])

        # Ensure numerical inputs are valid floats
        height = float(height) if height is not None else 0.0
        weight = float(weight) if weight is not None else 0.0
        inr = float(inr) if inr is not None else 0.0

        # Assemble input for transformation
        raw_inputs = [
            str(gender),
            str(race),
            str(age),
            height,
            weight,
            float(diabetes),
            float(simvastatin),
            float(amiodarone),
            inr,
            str(cyp2c9),
            str(vkorc1)
        ]

        # Apply preprocessing pipeline (ct should be defined or loaded)
        transformed_input = ct.transform([raw_inputs])
        transformed_input[0][-7] = encoded_age  # Age is encoded, so replace it in the transformed input

        # Convert to NumPy array for model input
        input_array = np.array(transformed_input, dtype=np.float32)
        print_input_debug(transformed_input, input_array)

        # Predict using appropriate model type
        if selected_model == 'Deep Learning':
            tensor_input = tf.convert_to_tensor(input_array)
            prediction = model.predict(tensor_input, verbose=0)
            return float(prediction[0][0])
        else:
            prediction = model.predict(input_array)
            return float(prediction[0])

    except Exception as e:
        print(f"Error in prediction: {str(e)}")
        return f"Error in prediction: {str(e)}"

# Launch Gradio app
gr.Interface(
    fn=predict_dosage,
    inputs=[input_Gender, input_Race, input_Age, input_Height, input_Weight,
            input_Diabetes, input_Simvastatin, input_Amiodarone,
            input_INR_reported, input_Cyp2C9_genotypes, input_VKORC1_genotypes, input_model],
    outputs=[output_warfarin_dosage]
).launch(debug=True)