File size: 3,421 Bytes
b9ba714
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""
@author: Caglar Aytekin
contact: caglar@deepcause.ai 
"""
# %% IMPORT
from LEURN import LEURN
import torch
from DATA import split_and_processing
from TRAINER import Trainer
import numpy as np 
import openml 



#DEMO FOR CREDIT SCORING DATASET: OPENML ID : 31
#MORE INFO: https://www.openml.org/search?type=data&sort=runs&id=31&status=active
#%% Set Neural Network Hyperparameters
depth=2
batch_size=1024
lr=5e-3
epochs=300
droprate=0.
output_type=1 #0: regression, 1: binary classification, 2: multi-class classification

#%%  Check if CUDA is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


#%%  Load the dataset
#Read dataset from openml
open_ml_dataset_id=1590
dataset = openml.datasets.get_dataset(open_ml_dataset_id)
X, y, categoricals, attribute_names = dataset.get_data(target=dataset.default_target_attribute)
#Alternatively load your own dataset from another source (excel,csv etc)
#Be mindful that X and y should be dataframes, categoricals is a boolean list indicating categorical features, attribute_names is a list of feature names

# %% Process data, save useful statistics
X_train,X_val,X_test,y_train,y_val,y_test,preprocessor=split_and_processing(X,y,categoricals,output_type,attribute_names)

#%% Initialize model, loss function, optimizer, and learning rate scheduler
model = LEURN(preprocessor, depth=depth,droprate=droprate).to(device)


#%%Train model
model_trainer=Trainer(model, X_train, X_val, y_train, y_val,lr=lr,batch_size=batch_size,epochs=epochs,problem_type=output_type)
model_trainer.train()
#Load best weights
model.load_state_dict(torch.load('best_model_weights.pth'))

#%%Evaluate performance
perf=model_trainer.evaluate(X_train, y_train)
perf=model_trainer.evaluate(X_test, y_test)
perf=model_trainer.evaluate(X_val, y_val)

#%%TESTS
model.eval()

#%%Check sample in original format:
print(preprocessor.inverse_transform_X(X_test[0:1]))
#%% Explain single example
Exp_df_test_sample,result,result_original_format=model.explain(X_test[0:1])
#%%  Check results
print(result,result_original_format)
#%% Check explanation
print(Exp_df_test_sample)
#%% Influences
effects=model.influence_matrix()
new_list = [a for c, a in zip(categoricals, attribute_names) if c]+[a for c, a in zip(categoricals, attribute_names) if not(c)]
torch.argmax(effects,dim=1)
global_importances=model.global_importance()
#%% tests
#model output and sum of contributions should be the same
print(result,model.output,model(X_test[0:1]),Exp_df_test_sample['Contribution'].values.sum())


#%% GENERATION FROM SAME CATEGORY
generated_sample_nn_friendly, generated_sample_original_input_format,output=model.generate_from_same_category(X_test[0:1])
#%%Check sample in original format:
print(preprocessor.inverse_transform_X(X_test[0:1]))
print(generated_sample_original_input_format)
#%% Explain single example
Exp_df_generated_sample,result,result_original_format=model.explain(generated_sample_nn_friendly)
print(Exp_df_generated_sample)
print(Exp_df_test_sample.equals(Exp_df_generated_sample)) #this should be true


#%% GENERATE FROM SCRATCH
generated_sample_nn_friendly, generated_sample_original_input_format,output=model.generate()
Exp_df_generated_sample,result,result_original_format=model.explain(generated_sample_nn_friendly)
print(Exp_df_generated_sample)
print(result,result_original_format)