Spaces:

CaglarAytekin
/

LEURN

Runtime error

App Files Files Community

CaglarAytekin commited on Mar 18, 2024

Commit

b9ba714

1 Parent(s): 1e7a8d6

first commit

Browse files

Files changed (11) hide show

Causality_Example.png +0 -0
DATA.py +198 -0
DEMO.py +97 -0
LEURN.py +695 -0
LICENSE +201 -0
Presentation_Product.pdf +0 -0
Presentation_Technical.pdf +0 -0
README.md +27 -13
TRAINER.py +186 -0
app.py +176 -0
requirements.txt +7 -0

Causality_Example.png ADDED Viewed

DATA.py ADDED Viewed

	@@ -0,0 +1,198 @@

+"""
+@author: Caglar Aytekin
+contact: caglar@deepcause.ai
+"""
+import numpy as np
+from sklearn.preprocessing import LabelEncoder, MinMaxScaler
+import warnings
+from sklearn.model_selection import train_test_split
+import torch
+import pandas as pd
+pd.set_option('display.max_rows', None)  # None means show all rows
+pd.set_option('display.max_columns', None)  # None means show all columns
+pd.set_option('display.width', None)  # Use appropriate width to display columns
+pd.set_option('display.max_colwidth', None)  # Show full content of each column
+warnings.filterwarnings("ignore")
+def split_and_processing(X,y,categoricals,output_type,attribute_names):
+    #If every entryin a column  of a dataframe is None drop it
+    columns_to_keep_mask = ~X.isna().all()
+    X = X.dropna(axis=1, how='all')
+    # Update the categoricals list to reflect the columns not dropped
+    categoricals = [cat for cat, keep in zip(categoricals, columns_to_keep_mask) if keep]
+    attribute_names= [cat for cat, keep in zip(attribute_names, columns_to_keep_mask) if keep]
+    # Split into train and remaining
+    X_train, X_remaining, y_train, y_remaining = train_test_split(X, y, test_size=0.2, random_state=42)
+    # Split remaining into validation and test
+    X_val, X_test, y_val, y_test = train_test_split(X_remaining, y_remaining, test_size=0.5, random_state=42)
+    # Initialize preprocessor
+    preprocessor=DataProcessor(categoricals,output_type)
+    #Fit and transform for training set
+    X_train=torch.from_numpy(preprocessor.fit_transform_X(X_train).values).float()
+    y_train=torch.from_numpy(preprocessor.fit_transform_y(y_train)).float()
+    if output_type<2:
+        y_train=y_train.unsqueeze(dim=-1)
+    else:
+        y_train=y_train.long()
+    #Transform for validation and test set
+    X_val=torch.from_numpy(preprocessor.transform_X(X_val).values).float()
+    y_val=torch.from_numpy(preprocessor.transform_y(y_val)).float()
+    if output_type<2:
+        y_val=y_val.unsqueeze(dim=-1)
+    else:
+        y_val=y_val.long()
+    X_test=torch.from_numpy(preprocessor.transform_X(X_test).values).float()
+    y_test=torch.from_numpy(preprocessor.transform_y(y_test)).float()
+    if output_type<2:
+        y_test=y_test.unsqueeze(dim=-1)
+    else:
+        y_test=y_test.long()
+    preprocessor.attribute_names=attribute_names
+    preprocessor.output_type=output_type
+    #Determine class no
+    if output_type==0:
+        output_dim=y_train.shape[1]
+    elif output_type==1:
+        output_dim=1
+    else:
+        output_dim=len(np.unique(y_train))
+    preprocessor.output_dim=output_dim
+    return X_train,X_val,X_test,y_train,y_val,y_test,preprocessor
+class DataProcessor:
+    def __init__(self, categoricals, output_type):
+        self.categoricals = categoricals
+        self.output_type = output_type
+        self.label_encoders = {}
+        self.scaler = MinMaxScaler(feature_range=(-1, 1))
+        self.target_scaler = MinMaxScaler(feature_range=(-1, 1))
+        self.most_common_categories = {}
+        self.target_encoder = None # For binary and multiclass
+        self.unique_targets = None # To store unique targets for binary classification
+        self.category_details=[]
+        self.suggested_embeddings=None
+        self.encoders_for_nn={}
+    def fit_transform_X(self, X):
+        # Convert all numerical columns to float precision
+        X.iloc[:, ~np.array(self.categoricals)] = X.iloc[:, ~np.array(self.categoricals)].astype(float)
+        X.iloc[:, np.array(self.categoricals)] = X.iloc[:, np.array(self.categoricals)].astype(str)
+        X_transformed = X.copy()
+        for i, is_categorical in enumerate(self.categoricals):
+            if is_categorical:
+                encoder = LabelEncoder()
+                X_transformed.iloc[:, i] = encoder.fit_transform(X.iloc[:, i])
+                self.label_encoders[i] = encoder
+                self.encoders_for_nn[X_transformed.columns[i]] = dict(zip(encoder.classes_, encoder.transform(encoder.classes_)))
+                self.most_common_categories[i] = X.iloc[:, i].mode()[0]
+                self.category_details.append((i, len(encoder.classes_)))
+            else:
+                # Fill missing values with the median for numerical columns
+                X_transformed.iloc[:, i] = X.iloc[:, i].fillna(X.iloc[:, i].median())
+        # Scale numerical features
+        numerical_features = X_transformed.iloc[:, ~np.array(self.categoricals)]
+        if numerical_features.shape[-1]>0:
+            self.scaler.fit(numerical_features)
+            X_transformed.iloc[:, ~np.array(self.categoricals)] = self.scaler.transform(numerical_features)
+        self.suggested_embeddings=[max(2, int(np.log2(x[1]))) for x in self.category_details]
+        return X_transformed.astype(float)
+    def transform_X(self, X):
+        X.iloc[:, np.array(self.categoricals)] = X.iloc[:, np.array(self.categoricals)].astype(str)
+        X_transformed = X.copy()
+        for i, is_categorical in enumerate(self.categoricals):
+            if is_categorical:
+                encoder = self.label_encoders[i]
+                # Transform categories, replace unseen with most common category
+                X_transformed.iloc[:, i] = X.iloc[:, i].map(lambda x: x if x in encoder.classes_ else self.most_common_categories[i])
+                X_transformed.iloc[:, i] = encoder.transform(X_transformed.iloc[:, i])
+            else:
+                X_transformed.iloc[:, i] = X.iloc[:, i].fillna(X.iloc[:, i].mean())
+        # Scale numerical features
+        numerical_features = X_transformed.iloc[:, ~np.array(self.categoricals)]
+        if numerical_features.shape[-1]>0:
+            X_transformed.iloc[:, ~np.array(self.categoricals)] = self.scaler.transform(numerical_features)
+        return X_transformed.astype(float)
+    def inverse_transform_X(self, sample):
+        #inverse transform from pytorch tensor
+        sample=sample.detach().numpy()
+        sample_inverse_transformed = pd.DataFrame(sample.copy())
+        #Handle numerical features
+        numerical_features_indices = np.where(~np.array(self.categoricals))[0]
+        if len(numerical_features_indices)>0:
+            sample_inverse_transformed.iloc[:,numerical_features_indices] = self.scaler.inverse_transform(sample[:,numerical_features_indices])
+        for i, is_categorical in enumerate(self.categoricals):
+            if is_categorical:
+                encoder = self.label_encoders[i]
+                sample_inverse_transformed.iloc[:, i] = encoder.inverse_transform(sample[:, i].astype('int'))
+        sample_inverse_transformed.columns = self.attribute_names
+        return sample_inverse_transformed
+    def fit_transform_y(self, y):
+        if self.output_type == 0: # Regression
+            y_transformed = self.target_scaler.fit_transform(y.values.reshape(-1, 1)).flatten()
+        elif self.output_type == 1: # Binary classification
+            self.unique_targets = y.unique()
+            mapping = {category: idx for idx, category in enumerate(self.unique_targets)}
+            y_transformed = y.map(mapping).astype(int).values
+        elif self.output_type == 2: # Multiclass classification
+            self.target_encoder = LabelEncoder()
+            y_transformed = self.target_encoder.fit_transform(y)
+        else:
+            raise ValueError("Invalid output type")
+        return y_transformed
+    def transform_y(self, y):
+        if self.output_type == 0: # Regression
+            y_transformed = self.target_scaler.transform(y.values.reshape(-1, 1)).flatten()
+        elif self.output_type == 1: # Binary classification
+            mapping = {category: idx for idx, category in enumerate(self.unique_targets)}
+            y_transformed = y.map(mapping).astype(int).values
+        elif self.output_type == 2: # Multiclass classification
+            y_transformed = self.target_encoder.transform(y)
+        else:
+            raise ValueError("Invalid output type")
+        return y_transformed
+    def inverse_transform_y(self, nn_output):
+        if self.output_type == 0: # Regression
+            y_transformed=nn_output.squeeze().detach().numpy()
+            return self.target_scaler.inverse_transform(y_transformed.reshape(-1, 1)).flatten()
+        elif self.output_type == 1: # Binary classification
+            y_transformed=int(np.round(torch.sigmoid(nn_output).squeeze().detach().numpy()))
+            inverse_mapping = {idx: category for idx, category in enumerate(self.unique_targets)}
+            return inverse_mapping[y_transformed]
+        elif self.output_type == 2: # Multiclass classification
+            y_transformed=int(np.round(torch.argmax(nn_output).squeeze().detach().numpy()))
+            return self.target_encoder.inverse_transform([y_transformed])
+        else:
+            raise ValueError("Invalid output type")

DEMO.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""
+@author: Caglar Aytekin
+contact: caglar@deepcause.ai
+"""
+# %% IMPORT
+from LEURN import LEURN
+import torch
+from DATA import split_and_processing
+from TRAINER import Trainer
+import numpy as np
+import openml
+#DEMO FOR CREDIT SCORING DATASET: OPENML ID : 31
+#MORE INFO: https://www.openml.org/search?type=data&sort=runs&id=31&status=active
+#%% Set Neural Network Hyperparameters
+depth=2
+batch_size=1024
+lr=5e-3
+epochs=300
+droprate=0.
+output_type=1 #0: regression, 1: binary classification, 2: multi-class classification
+#%%  Check if CUDA is available and set the device accordingly
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print("Using device:", device)
+#%%  Load the dataset
+#Read dataset from openml
+open_ml_dataset_id=1590
+dataset = openml.datasets.get_dataset(open_ml_dataset_id)
+X, y, categoricals, attribute_names = dataset.get_data(target=dataset.default_target_attribute)
+#Alternatively load your own dataset from another source (excel,csv etc)
+#Be mindful that X and y should be dataframes, categoricals is a boolean list indicating categorical features, attribute_names is a list of feature names
+# %% Process data, save useful statistics
+X_train,X_val,X_test,y_train,y_val,y_test,preprocessor=split_and_processing(X,y,categoricals,output_type,attribute_names)
+#%% Initialize model, loss function, optimizer, and learning rate scheduler
+model = LEURN(preprocessor, depth=depth,droprate=droprate).to(device)
+#%%Train model
+model_trainer=Trainer(model, X_train, X_val, y_train, y_val,lr=lr,batch_size=batch_size,epochs=epochs,problem_type=output_type)
+model_trainer.train()
+#Load best weights
+model.load_state_dict(torch.load('best_model_weights.pth'))
+#%%Evaluate performance
+perf=model_trainer.evaluate(X_train, y_train)
+perf=model_trainer.evaluate(X_test, y_test)
+perf=model_trainer.evaluate(X_val, y_val)
+#%%TESTS
+model.eval()
+#%%Check sample in original format:
+print(preprocessor.inverse_transform_X(X_test[0:1]))
+#%% Explain single example
+Exp_df_test_sample,result,result_original_format=model.explain(X_test[0:1])
+#%%  Check results
+print(result,result_original_format)
+#%% Check explanation
+print(Exp_df_test_sample)
+#%% Influences
+effects=model.influence_matrix()
+new_list = [a for c, a in zip(categoricals, attribute_names) if c]+[a for c, a in zip(categoricals, attribute_names) if not(c)]
+torch.argmax(effects,dim=1)
+global_importances=model.global_importance()
+#%% tests
+#model output and sum of contributions should be the same
+print(result,model.output,model(X_test[0:1]),Exp_df_test_sample['Contribution'].values.sum())
+#%% GENERATION FROM SAME CATEGORY
+generated_sample_nn_friendly, generated_sample_original_input_format,output=model.generate_from_same_category(X_test[0:1])
+#%%Check sample in original format:
+print(preprocessor.inverse_transform_X(X_test[0:1]))
+print(generated_sample_original_input_format)
+#%% Explain single example
+Exp_df_generated_sample,result,result_original_format=model.explain(generated_sample_nn_friendly)
+print(Exp_df_generated_sample)
+print(Exp_df_test_sample.equals(Exp_df_generated_sample)) #this should be true
+#%% GENERATE FROM SCRATCH
+generated_sample_nn_friendly, generated_sample_original_input_format,output=model.generate()
+Exp_df_generated_sample,result,result_original_format=model.explain(generated_sample_nn_friendly)
+print(Exp_df_generated_sample)
+print(result,result_original_format)

LEURN.py ADDED Viewed

	@@ -0,0 +1,695 @@

+"""
+@author: Caglar Aytekin
+contact: caglar@deepcause.ai
+"""
+import torch
+import torch.nn as nn
+import random
+import numpy as np
+import pandas as pd
+import copy
+class CustomEncodingFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x, tau,alpha):
+        ctx.save_for_backward(x, tau)
+        # Perform the tanh operation on (x + tau)
+        y = torch.tanh(x + tau)
+        # The actual forward output : binarized  output
+        forward_output = alpha * (2 * torch.round((y + 1) / 2) - 1) + (1-alpha)*y
+        return forward_output
+    @staticmethod
+    def backward(ctx, grad_output):
+        x, tau = ctx.saved_tensors
+        # Use the derivative of tanh for the backward pass: 1 - tanh^2(x + tau)
+        grad_input = grad_output * (1 - torch.tanh(x + tau) ** 2)
+        return grad_input, grad_input,None  # Assuming tau also requires gradient
+# Wrapping the custom function in a nn.Module for easier use
+class EncodingLayer(nn.Module):
+    def __init__(self):
+        super(EncodingLayer, self).__init__()
+    def forward(self, x, tau,alpha):
+        return CustomEncodingFunction.apply(x, tau,alpha)
+class LEURN(nn.Module):
+    def __init__(self, preprocessor,depth,droprate):
+        """
+        Initializes the model.
+        Parameters:
+        - preprocessor: A class containing useful info about the dataset
+            - Including: attribute names, categorical features details, suggested embedding size for each category, output type, output dimension, transformation information
+        - depth: Depth of the network
+        - droprate: dropout rate
+        """
+        super(LEURN, self).__init__()
+        #Find categorical indices and category numbers for each
+        self.alpha=1.0
+        self.preprocessor=preprocessor
+        self.attribute_names=preprocessor.attribute_names
+        self.label_encoders=preprocessor.encoders_for_nn
+        self.categorical_indices = [info[0] for info in preprocessor.category_details]
+        self.num_categories = [info[1] for info in preprocessor.category_details]
+        #If embedding_size is integer, cast it to all categories
+        if isinstance(preprocessor.suggested_embeddings, int):
+            embedding_sizes = [preprocessor.suggested_embeddings] * len(self.categorical_indices)
+        else:
+            assert len(preprocessor.suggested_embeddings) == len(self.categorical_indices), "Length of embedding_size must match number of categorical features"
+            embedding_sizes = preprocessor.suggested_embeddings
+        self.embedding_sizes=embedding_sizes
+        #Embedding layers for categorical features
+        self.embeddings = nn.ModuleList([
+            nn.Embedding(num_categories, embedding_dim)
+            for num_categories, embedding_dim in zip(self.num_categories, embedding_sizes)
+        ])
+        for embedding_now in self.embeddings:
+            nn.init.uniform_(embedding_now.weight, -1.0, 1.0)
+        self.total_embedding_size = sum(embedding_sizes) #number of categorical features for NN
+        self.non_cat_input_dim = len(self.attribute_names) - len(self.categorical_indices) #Number of numerical features for NN
+        self.nn_input_dim = self.total_embedding_size + self.non_cat_input_dim #Number of features for NN
+        #LAYERS
+        self.tau_initial = nn.Parameter(torch.zeros(1,self.nn_input_dim))  # Initial tau as a learnable parameter
+        self.layers = nn.ModuleList()
+        self.depth = depth
+        self.output_type=preprocessor.output_type
+        for d_now in range(depth):
+            # Each iteration adds an encoding layer followed by a dropout and then a linear layer
+            self.layers.append(EncodingLayer())
+            self.layers.append(nn.Dropout1d(droprate))
+            linear_layer = nn.Linear((d_now + 1) * self.nn_input_dim, self.nn_input_dim)
+            self._init_weights(linear_layer,d_now+1) #special layer initialization
+            self.layers.append(linear_layer)
+        # Final stage: dropout and linear layer
+        self.final_dropout=nn.Dropout1d(droprate)
+        self.final_linear = nn.Linear(depth * self.nn_input_dim, self.preprocessor.output_dim)
+        self._init_weights(self.final_linear, depth)
+    def set_alpha(self, alpha):
+        """Method to update the dynamic parameter."""
+        self.alpha = alpha
+    def _init_weights(self, layer,depth_now):
+        # Custom initialization
+        # Considering the binary (-1,1) nature of the input,
+        # when we initialize layer in (-1/dim,1/dim) range, output is bounded at (-1,1)
+        # Knowing our input is roughly at (-1,1) range, this serves as good initialization for tau
+        if not(self.embedding_sizes==[]):
+            init_tensor = torch.tensor([1/size for size in self.embedding_sizes for _ in range(size)])
+            if init_tensor.shape[0]<self.nn_input_dim: #Means we have numericals too
+                init_tensor=torch.cat((init_tensor, torch.ones(self.non_cat_input_dim)), dim=0)
+        else:
+            init_tensor = torch.ones(self.non_cat_input_dim)
+        init_tensor=init_tensor/((depth_now+1)*torch.tensor(len(self.attribute_names)))
+        init_tensor=init_tensor.unsqueeze(0).repeat_interleave(repeats=layer.weight.shape[0],dim=0).repeat_interleave(repeats=depth_now,dim=1)
+        layer.weight.data.uniform_(-1, 1)
+        layer.weight=torch.nn.Parameter(layer.weight*init_tensor)
+    def forward(self, x):
+        # Defines forward function for provided input: Normalizes numericals, embeds categoricals, and gives to neural network.
+        # Separate categorical and numerical features for easier handling
+        cat_features = [x[:, i].long() for i in self.categorical_indices]
+        non_cat_features = [x[:, i] for i in range(x.size(1)) if i not in self.categorical_indices]
+        non_cat_features = torch.stack(non_cat_features, dim=1) if non_cat_features else x.new_empty(x.size(0), 0)
+        # Embed categoricals
+        embedded_features = [embedding(cat_feature) for embedding, cat_feature in zip(self.embeddings, cat_features)]
+        # Combine categoricals and numericals
+        try:
+            embedded_features = torch.cat(embedded_features, dim=1)
+            nninput = torch.cat([embedded_features, non_cat_features], dim=1)
+        except:
+            nninput=non_cat_features
+        self.nninput=nninput
+        # Forward pass neural network
+        output=self.forward_from_embeddings(self.nninput)
+        self.output=output
+        return output
+    def forward_from_embeddings(self,x):
+        # Forward function for normalized numericals and embedded categoricals
+        tau=self.tau_initial
+        tau=torch.repeat_interleave(tau,x.shape[0],0)  #tau is 1xF, cast it for batch
+        # For each depth
+        for i in range(0, self.depth * 3, 3):
+            # encode, drop and find next tau
+            encoding_layer = self.layers[i]
+            dropout_layer = self.layers[i + 1]
+            linear_layer = self.layers[i + 2]
+            #encode and drop
+            encoded_x =dropout_layer( encoding_layer(x, tau,self.alpha))
+            #save encodings and thresholds
+            #notice that threshold is -tau, not tau since we binarize x+tau
+            if i==0:
+                encodings=encoded_x
+                taus=-tau
+            else:
+                encodings=torch.cat((encodings,encoded_x),dim=-1)
+                taus=torch.cat((taus,-tau),dim=-1)
+            #find next thresholds
+            tau = linear_layer(encodings) #not used, redundant for last layer
+        self.encodings=encodings
+        self.taus=taus
+        #Final layer: drop and linear
+        output=self.final_linear(self.final_dropout(encodings))
+        return output
+    def find_boundaries(self, x):
+        """
+        Given input, find boundaries for numerical features and valid categories for categorical features
+        Can accept unnormalized and not embedded input - set embedding False
+        """
+        # Ensure x is the correct shape [1, input_dim]
+        if x.ndim == 1:
+            x = x.unsqueeze(0)  # Add batch dimension if not present
+        # Perform a forward pass to update self.encodings and self.taus
+        # to update self.taus
+        self(x)
+        # self.taus has the shape [1, depth * input_dim]
+        # reshape to [depth, input_dim] for easier boundary finding
+        taus_reshaped = self.taus.view(self.depth, self.nn_input_dim)
+        # embedded and normalized input
+        embedded_x=self.nninput
+        # Initialize boundaries - numericals are in (-1,1) range and categoricals are from embeddings.
+        # So -100,100 is safe min and max. -inf,+inf is not chosen since problematic for later sampling
+        upper_boundaries = torch.full((embedded_x.size(1),), 100.0)
+        lower_boundaries = torch.full((embedded_x.size(1),), -100.0)
+        # Compare each threshold in self.taus with the corresponding input value
+        for feature_index in range(self.nn_input_dim):
+            for depth_index in range(self.depth):
+                threshold = taus_reshaped[depth_index, feature_index]
+                input_value = embedded_x[0, feature_index]
+                # If the threshold is greater than the input value and less than the current upper boundary, update the upper boundary
+                if threshold > input_value and threshold < upper_boundaries[feature_index]:
+                    upper_boundaries[feature_index] = threshold
+                # If the threshold is less than the input value and greater than the current lower boundary, update the lower boundary
+                if threshold < input_value and threshold > lower_boundaries[feature_index]:
+                    lower_boundaries[feature_index] = threshold
+        # Convert boundaries to a list of tuples [(lower, upper), ...] for each feature
+        boundaries = list(zip(lower_boundaries.tolist(), upper_boundaries.tolist()))
+        self.upper_boundaries=upper_boundaries
+        self.lower_boundaries=lower_boundaries
+        return boundaries
+    def categories_within_boundaries(self):
+        """
+        For each categorical feature, checks if embedding weights fall within the specified upper and lower boundaries.
+        Returns a dictionary with categorical feature indices as keys and lists of category indices that fall within the boundaries.
+        """
+        categories_within_bounds = {}
+        emb_st=0
+        for cat_index, emb_layer in zip(range(len(self.categorical_indices)), self.embeddings):
+            # Extract upper and lower boundaries for this categorical feature
+            lower_bound=self.lower_boundaries[emb_st:emb_st+self.embedding_sizes[cat_index]]
+            upper_bound=self.upper_boundaries[emb_st:emb_st+self.embedding_sizes[cat_index]]
+            emb_st=emb_st+self.embedding_sizes[cat_index]
+            # Initialize list to hold categories that fall within boundaries
+            categories_within = []
+            # Iterate over each embedding vector in the layer
+            for i, weight in enumerate(emb_layer.weight):
+                # Check if the embedding weight falls within the boundaries
+                if torch.all(weight >= lower_bound) and torch.all(weight <= upper_bound):
+                    categories_within.append(i)  # Using index i as category identifier
+            # Store the categories that fall within the boundaries for this feature
+            categories_within_bounds[cat_index] = categories_within
+        return categories_within_bounds
+    def global_importance(self):
+        final_layer_weight=torch.clone(self.final_linear.weight).detach().numpy()
+        importances=np.sum(np.abs(final_layer_weight),0)
+        importances=importances.reshape(importances.shape[0]//self.nn_input_dim,self.nn_input_dim)
+        importances=np.sum(importances,0)
+        importances_features=[]
+        st=0
+        for i in range(len(self.attribute_names)):
+            try:
+                importances_features.append(np.sum(importances[st:st+self.embedding_sizes[i]]))
+                st=st+self.embedding_sizes[i]
+            except:
+                st=st+1
+        return np.argsort(importances_features)[::-1],np.sort(importances_features)[::-1]
+    def influence_matrix(self):
+        """
+        Finds ADG from how each feature effects other's threshold via weight matrices
+        """
+        def create_block_sum_matrix(sizes, matrix):
+            L = len(sizes)
+            # Initialize the output matrix with zeros, using PyTorch
+            block_sum_matrix = torch.zeros((L, L))
+            # Define the starting row and column indices for slicing
+            start_row = 0
+            for i, row_size in enumerate(sizes):
+                start_col = 0
+                for j, col_size in enumerate(sizes):
+                    # Calculate the sum of the current block using PyTorch
+                    block_sum = torch.sum(matrix[start_row:start_row+row_size, start_col:start_col+col_size])
+                    block_sum_matrix[i, j] = block_sum
+                    # Update the starting column index for the next block in the row
+                    start_col += col_size
+                # Update the starting row index for the next block in the column
+                start_row += row_size
+            return block_sum_matrix
+        def add_ones_until_target(initial_list, target_sum):
+            # Continue adding 1s until the sum of the list equals the target sum
+            while sum(initial_list) < target_sum:
+                initial_list.append(1)
+            return initial_list
+        for i in range(0, self.depth * 3, 3):
+            # encode, drop and find next tau
+            weight_now=self.layers[i + 2].weight
+            weight_now_reshaped=weight_now.reshape((weight_now.shape[0], weight_now.shape[1]//self.nn_input_dim,self.nn_input_dim)) #shape: output x depth x input
+            if i==0:
+                # effects=np.sum(np.abs(weight_now_reshaped.numpy()),axis=1)/self.depth #shape: output x input
+                effects=torch.sum(torch.abs(weight_now_reshaped), dim=1) / self.depth
+            else:
+                effects=effects+torch.sum(torch.abs(weight_now_reshaped), dim=1) / self.depth
+        effects=effects.t()  #shape: input x output
+        modified_list = add_ones_until_target(copy.deepcopy(self.embedding_sizes), effects.shape[0])
+        effects=create_block_sum_matrix(modified_list,effects)
+        return effects
+    def explain_without_causal_effects(self,x):
+        """
+        Explains decisions of the neural network for input sample.
+        For numericals, extracts upper and lower boundaries on the sample
+        For categoricals displays possible categories
+        Also calculates contributions of each feature to final result
+        """
+        self.find_boundaries(x) #find upper, lower boundaries for all nn inputs
+        #find valid categories for categorical features
+        valid_categories=self.categories_within_boundaries()
+        #numerical boundaries
+        upper_numerical=self.upper_boundaries[sum(self.embedding_sizes):].detach().numpy()
+        lower_numerical=self.lower_boundaries[sum(self.embedding_sizes):].detach().numpy()
+        #Find contribution from each feature in final linear layer, distribute bias evenly
+        contributions=self.encodings * self.final_linear.weight + self.final_linear.bias.unsqueeze(dim=-1)/self.final_linear.weight.shape[1]
+        contributions=contributions.detach().resize_((contributions.shape[0], contributions.shape[1]//self.nn_input_dim,self.nn_input_dim))
+        contributions=torch.sum(contributions,dim=1)
+        # Initialize an empty list to store the summed contributions
+        summed_contributions = []
+        # Initialize start index for slicing
+        start_idx = 0
+        #Sum contribution of each categorical within respective embedding
+        for size in self.embedding_sizes:
+            # Calculate end index for the current chunk
+            end_idx = start_idx + size
+            # Sum the contributions in the current chunk
+            chunk_sum = contributions[:, start_idx:end_idx].sum(dim=1, keepdim=True)
+            # Append the summed chunk to the list
+            summed_contributions.append(chunk_sum)
+            # Update the start index for the next chunk
+            start_idx = end_idx
+        # If there are remaining elements not covered by embedding_sizes, add them as is (numerical features)
+        if start_idx < contributions.shape[1]:
+            remaining = contributions[:, start_idx:]
+            summed_contributions.append(remaining)
+        # Concatenate the summed contributions back into a tensor
+        summed_contributions = torch.cat(summed_contributions, dim=1)
+        # This is to handle multi-class explanations, for binary this is 0 automatically
+        # Note: multi-output regression is not supported yet. This will just return largest regressed value's explanations
+        highest_index=torch.argmax(summed_contributions.sum(dim=1))
+        # This is contribution from each feature
+        result=summed_contributions[highest_index]
+        self.result=result
+        #Explanation and Contribution formats are in ordered format (categoricals first, numericals later)
+        #Bring them to original format in user input
+        #Combine categoricals and numericals explanations and contributions
+        Explanation = [None] * (len(self.categorical_indices) + len(upper_numerical))
+        Contribution = np.zeros((len(self.categorical_indices) + len(upper_numerical),))
+        # Fill in the categorical samples
+        for j, cat_index in enumerate(self.categorical_indices):
+            Explanation[cat_index] = valid_categories[j]
+            Contribution[cat_index] = result[j].numpy()
+        #INVERSE TRANSFORM PART 1-------------------------------------------------------------------------------------------
+        #Inverse transform upper and lower_numericals
+        len_num=len(upper_numerical)
+        if len_num>0:
+            upper_numerical=self.preprocessor.scaler.inverse_transform(upper_numerical.reshape(1,-1))
+            lower_numerical=self.preprocessor.scaler.inverse_transform(lower_numerical.reshape(1,-1))
+            if len_num>1:
+                upper_numerical=np.squeeze(upper_numerical)
+                lower_numerical=np.squeeze(lower_numerical)
+            upper_iter = iter(upper_numerical)
+            lower_iter = iter(lower_numerical)
+        cnt=0
+        for i in range(len(Explanation)):
+            if Explanation[i] is None:
+                #Note the denormalization here
+                Explanation[i] = next(lower_iter),next(upper_iter)
+                if len(self.categorical_indices)>0:
+                    Contribution[i] = result[j+cnt+1].numpy()
+                else:
+                    Contribution[i] = result[cnt].numpy()
+                cnt=cnt+1
+        attribute_names_list = []
+        revised_explanations_list = []
+        contributions_list = []
+        # Process each feature to fill lists
+        for idx, attr_name in enumerate(self.attribute_names):
+            if isinstance(Explanation[idx], list):  # Categorical
+                #INVERSE TRANSFORM PART 2-------------------------------------------------------------------------------------------
+                #Inverse transform categoricals
+                category_names = [key for key, value in self.label_encoders[attr_name].items() if value in Explanation[idx]]
+                revised_explanation = " ,OR, ".join(category_names)
+            elif isinstance(Explanation[idx], tuple):  # Numerical
+                revised_explanation = f"{Explanation[idx][0].item()} to {Explanation[idx][1].item()}"
+            else:
+                revised_explanation = "Unknown"  #shouldn't really happen
+            # Append to lists
+            attribute_names_list.append(attr_name)
+            revised_explanations_list.append(revised_explanation)
+            contributions_list.append(Contribution[idx] if idx < len(Contribution) else None)
+        # Construct DataFrame
+        Explanation_df = pd.DataFrame({
+            'Name': attribute_names_list,
+            'Category': revised_explanations_list,
+            'Contribution': contributions_list
+        })
+        result=self.preprocessor.inverse_transform_y(self.output)
+        # Explanation_df['Result'] = [result] * len(Explanation_df)
+        return copy.deepcopy(Explanation_df),self.output.clone(),copy.deepcopy(result),copy.deepcopy(Explanation)
+    def explain(self,x,include_causal_analysis=False):
+        """
+        Fixes all features but one, sweeps that feature across its own categories, reports the average change from other categories to current one.
+        """
+        def update_intervals(available_intervals, incoming_interval):
+            updated_intervals = []
+            for interval in available_intervals:
+                if incoming_interval[1] <= interval[0] or incoming_interval[0] >= interval[1]:
+                    # The incoming interval does not overlap, keep the interval as is
+                    updated_intervals.append(interval)
+                else:
+                    # There is some overlap, possibly split the interval
+                    if incoming_interval[0] > interval[0]:
+                        # Add the left part that doesn't overlap
+                        updated_intervals.append((interval[0], incoming_interval[0]))
+                    if incoming_interval[1] < interval[1]:
+                        # Add the right part that doesn't overlap
+                        updated_intervals.append((incoming_interval[1], interval[1]))
+            return updated_intervals
+        def sample_from_intervals(available_intervals):
+            if not available_intervals:
+                return None
+            # Choose a random interval
+            chosen_interval = random.choice(available_intervals)
+            # Sample a random point within this interval
+            return random.uniform(chosen_interval[0], chosen_interval[1])
+        Explanation_df,output,result,Explanation=self.explain_without_causal_effects(x)
+        if include_causal_analysis:
+            # Causal analysis
+            causal_effect=np.zeros((x.shape[-1],))
+            numerical_cnt=0
+            for idx, attr_name in enumerate(self.attribute_names):
+                if isinstance(Explanation[idx], list):  # Categorical
+                    all_category_names = [value for key, value in self.label_encoders[attr_name].items()]
+                    sweeped_category_names = [value for key, value in self.label_encoders[attr_name].items() if value in Explanation[idx]]
+                    if list(set(all_category_names)-set(sweeped_category_names)) == []:
+                        is_category_empty=True
+                    else:
+                        is_category_empty=False
+                    cnt=0
+                    while is_category_empty==False:
+                        new_x=x.clone()
+                        next_category=list(set(all_category_names)-set(sweeped_category_names))[0]
+                        new_x[0,idx]=float(next_category)
+                        Explanation_df_new,output_new,result_new,Explanation_new=self.explain_without_causal_effects(new_x)
+                        sweeped_category_names = sweeped_category_names+[value for key, value in self.label_encoders[attr_name].items() if value in Explanation_new[idx]]
+                        if list(set(all_category_names)-set(sweeped_category_names)) == []:
+                            is_category_empty=True
+                        else:
+                            is_category_empty=False
+                        causal_effect[idx]=causal_effect[idx]+(output-output_new).detach().numpy()[0,0]
+                        cnt=cnt+1
+                    if cnt>0:
+                        causal_effect[idx]=causal_effect[idx]/cnt
+                else:
+                    search_complete=False
+                    # Initial available interval . we know -100,100 from initial setting up lower, upper bounds
+                    available_intervals = [(-100, 100)]
+                    # Example incoming intervals
+                    #numerical boundaries
+                    self.explain_without_causal_effects(x)
+                    upper_numerical=self.upper_boundaries[sum(self.embedding_sizes):].detach().numpy()
+                    lower_numerical=self.lower_boundaries[sum(self.embedding_sizes):].detach().numpy()
+                    incoming_interval = (lower_numerical[numerical_cnt],upper_numerical[numerical_cnt])
+                    available_intervals = update_intervals(available_intervals, incoming_interval)
+                    cnt=0
+                    while not(search_complete):
+                        new_sample=sample_from_intervals(available_intervals)
+                        new_x=x.clone()
+                        new_x[0,idx]=new_sample
+                        Explanation_df_new,output_new,result_new,Explanation_new=self.explain_without_causal_effects(new_x)
+                        causal_effect[idx]=causal_effect[idx]+(output-output_new).detach().numpy()[0,0]
+                        cnt=cnt+1
+                        upper_numerical=self.upper_boundaries[sum(self.embedding_sizes):].detach().numpy()
+                        lower_numerical=self.lower_boundaries[sum(self.embedding_sizes):].detach().numpy()
+                        incoming_interval = (lower_numerical[numerical_cnt],upper_numerical[numerical_cnt])
+                        available_intervals = update_intervals(available_intervals, incoming_interval)
+                        if available_intervals == []:
+                            search_complete=True
+                    if cnt>0:
+                        causal_effect[idx]=causal_effect[idx]/cnt
+                    numerical_cnt=numerical_cnt+1
+            Explanation_df['Causal Effects'] = causal_effect
+        return Explanation_df,output,result
+    def sample_from_boundaries(self):
+        """
+        Assumes higher and lower boundaries are already extracted (eg self.explain is run on one input already)
+        Samples a value for each feature within the specified upper and lower boundaries stored in the class instance.
+        For numericals, samples a value, for categoricals samples a category from possible categories
+        Returns:
+        - A tensor containing sampled values within the given boundaries for each feature.
+        """
+        #First sample from categories
+        categories_within_bounds=self.categories_within_boundaries()
+        try:
+            sampled_indices = [random.choice(categories) for categories in categories_within_bounds.values()]
+        except:
+            categories_within_bounds=self.categories_within_boundaries()
+        #Then from numericals
+        samples = []
+        cnt=0
+        for lower, upper in zip(self.lower_boundaries[sum(self.embedding_sizes):], self.upper_boundaries[sum(self.embedding_sizes):]):
+            # Sample from a uniform distribution between lower and upper boundaries
+            sample = lower + (upper - lower) * torch.rand(1)
+            samples.append(sample)
+            cnt=cnt+1
+        #Combine categoricals and numericals
+        # Initialize an empty list to hold the combined samples
+        combined_samples = [None] * (len(self.categorical_indices) + len(samples))
+        # Fill in the categorical samples
+        for i, cat_index in enumerate(self.categorical_indices):
+            combined_samples[cat_index] = torch.tensor([sampled_indices[i]], dtype=torch.float)
+        # Fill in the numerical samples
+        num_samples_iter = iter(samples)
+        for i in range(len(combined_samples)):
+            if combined_samples[i] is None:
+                combined_samples[i] = next(num_samples_iter)
+        # Combine into a single tensor
+        combined_tensor = torch.cat(combined_samples, dim=-1)
+        return combined_tensor.unsqueeze(dim=0)
+    def generate(self):
+        """
+        Generates a data sample from learned network
+        """
+        def sample_with_tau(tau,max_bound,min_bound):
+            # Sample according to tau, lower and upper bounds
+            sampled=torch.zeros((self.nn_input_dim))
+            st=0
+            # Randomly pick from valid categories
+            for embedding in self.embeddings:
+                categories_within = []
+                # Iterate over each embedding vector in the layer
+                for i, weight in enumerate(embedding.weight):
+                    # Check if the embedding weight falls within the boundaries
+                    if torch.all(weight >= min_bound[st:st+embedding.weight.shape[-1]]) and torch.all(weight <= max_bound[st:st+embedding.weight.shape[-1]]):
+                        categories_within.append(i)  # Using index i as category identifier
+                feature_now=embedding.weight[np.random.choice(categories_within),:]
+                cnt=0
+                for j in range(st,st+embedding.weight.shape[-1]):
+                    if feature_now[cnt]>-tau[0,j]:
+                        sampled[j]=1.0
+                    elif feature_now[cnt]<=-tau[0,j]:
+                        sampled[j]=-1.0
+                    cnt=cnt+1
+                st=st+embedding.weight.shape[-1]
+            #Randomly sample for numericals
+            for i in range(st,self.nn_input_dim):
+                if -tau[0,i]>max_bound[i]: #In this case you have to pick -1
+                    sampled[i]=-1.0
+                elif -tau[0,i]<=min_bound[i]: #In this case you have to pick 1
+                    sampled[i]=1.0
+                else:
+                    sampled[i] = (torch.randint(low=0, high=2, size=(1,)) * 2 - 1).float()
+            return sampled
+        def bound_update(tau,max_bound,min_bound,sampled):
+            for i in range(self.nn_input_dim):
+                if sampled[i]>0: #means input is larger than -tau, so -tau might set a lower bound
+                    if -tau[0,i]>min_bound[i]:
+                        min_bound[i]=-tau[0,i]
+                elif sampled[i]<=0: #means input is smaller than -tau, so -tau might set an upper bound
+                    if -tau[0,i]<max_bound[i]:
+                        max_bound[i]=-tau[0,i]
+            return max_bound,min_bound
+        # Read first tau
+        tau=self.tau_initial
+        # Set initial maximum and minimum bounds
+        max_bound=torch.zeros((self.nn_input_dim))+100.0
+        min_bound=torch.zeros((self.nn_input_dim))-100.0
+        for i in range(0, self.depth * 3, 3):
+            encoding_layer = self.layers[i] #NOT USED HERE, WE ENCODE RANDOMLY MANUALLY
+            dropout_layer = self.layers[i + 1]
+            linear_layer = self.layers[i + 2]
+            #Sample with current tau
+            sample_now=sample_with_tau(tau,max_bound,min_bound)
+            #Update bounds with new sample
+            max_bound,min_bound=bound_update(tau,max_bound,min_bound,sample_now)
+            encoded_x = dropout_layer(sample_now.unsqueeze(dim=0))
+            if i==0:
+                encodings=encoded_x
+                taus=-tau
+            else:
+                encodings=torch.cat((encodings,encoded_x),dim=-1)
+                taus=torch.cat((taus,-tau),dim=-1)
+            tau = linear_layer(encodings) #not used for last layer
+        self.encodings=encodings
+        self.taus=taus
+        self.upper_boundaries=torch.clone(max_bound)
+        self.lower_boundaries=torch.clone(min_bound)
+        generated_sample=self.sample_from_boundaries()
+        ##Check if manually found and network generated boundaries are same
+        # if torch.equal(self.upper_boundaries,max_bound) and torch.equal(self.lower_boundaries,min_bound):
+        #     print(True)
+        self.explain_without_causal_effects(generated_sample)
+        generated_sample_original_format=self.preprocessor.inverse_transform_X(generated_sample)
+        result=self.preprocessor.inverse_transform_y(self.output)
+        return generated_sample,generated_sample_original_format,result
+    def generate_from_same_category(self,x):
+        self.explain_without_causal_effects(x)
+        generated_sample=self.sample_from_boundaries()
+        generated_sample_original_format=self.preprocessor.inverse_transform_X(generated_sample)
+        result=self.preprocessor.inverse_transform_y(self.output)
+        return generated_sample,generated_sample_original_format,result

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Presentation_Product.pdf ADDED Viewed

Binary file (648 kB). View file

Presentation_Technical.pdf ADDED Viewed

Binary file (297 kB). View file

README.md CHANGED Viewed

@@ -1,13 +1,27 @@
----
-title: LEURN
-emoji: 🚀
-colorFrom: blue
-colorTo: green
-sdk: streamlit
-sdk_version: 1.32.2
-app_file: app.py
-pinned: false
-license: apache-2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# LEURN
+Official Repository for LEURN: Learning Explainable Univariate Rules with Neural Networks
+https://arxiv.org/abs/2303.14937
+Detailed information about LEURN is given in the presentations.
+A demo is provided for training, making local explanations and data generation in DEMO.py
+NEW! Streamlit demo is now available
+Just activate the environment and run the following in your command line.
+streamlit run UI.py
+Make sure you check the explanation video at:
+https://www.linkedin.com/posts/caglaraytekin_ai-machinelearning-dataanalysis-activity-7172866316691869697-5-nB?utm_source=share&utm_medium=member_desktop
+NEW! LEURN now includes Causal Effects
+Thanks to its unique design, LEURN can make controlled experiments at lightning speed, discovering average causal effects.
+![plot](./Causality_Example.png)
+Main difference of this implementation from the paper:
+- LEURN is now much simpler and uses binarized tanh (k=1 always) with no degradation in performance.
+Notes:
+- For top performance, a thorough hyperparameter search as described in paper is needed.
+- Human-in-the-loop continuous training is not implemented in this repository.
+- Deepcause provides consultancy services to make the most out of LEURN
+Contact:
+caglar@deepcause.ai

TRAINER.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""
+@author: Caglar Aytekin
+contact: caglar@deepcause.ai
+"""
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader, TensorDataset
+from sklearn.metrics import accuracy_score as accuracy
+from sklearn.metrics import roc_auc_score
+from torch.optim.lr_scheduler import StepLR
+import numpy as np
+import copy
+class Trainer:
+    def __init__(self, model, X_train, X_val, y_train, y_val,lr,batch_size,epochs,problem_type,verbose=True):
+        self.model = model
+        self.optimizer = torch.optim.Adam(model.parameters(), lr=lr)
+        self.problem_type=problem_type
+        self.verbose=verbose
+        if self.problem_type==0:
+            self.criterion = nn.MSELoss()
+        elif self.problem_type==1:
+            self.criterion = nn.BCEWithLogitsLoss()
+        elif self.problem_type==2:
+            self.criterion = nn.CrossEntropyLoss()
+            y_train=y_train.squeeze().long()
+            y_val=y_val.squeeze().long()
+        train_dataset = TensorDataset(X_train, y_train)
+        val_dataset = TensorDataset(X_val, y_val)
+        self.train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
+        self.val_loader = DataLoader(dataset=val_dataset, batch_size=len(val_dataset), shuffle=False)
+        self.batch_size=batch_size
+        self.epochs=epochs
+        self.best_metric = float('inf') if problem_type == 0 else float('-inf')
+        self.scheduler = StepLR(self.optimizer, step_size=epochs//3, gamma=0.2)
+    def train_epoch(self):
+        self.model.train()
+        total_loss = 0
+        total=0
+        correct=0
+        for inputs, labels in self.train_loader:
+            self.optimizer.zero_grad()
+            outputs = self.model(inputs)
+            loss = self.criterion(outputs, labels)# + torch.sum(torch.abs(self.model.causal_discovery()))*1
+            loss.backward()
+            self.optimizer.step()
+            total_loss += loss.item()
+            total += len(labels.squeeze())
+            if self.problem_type==1:
+                correct += (torch.round(torch.sigmoid(outputs.data)).squeeze() == labels.squeeze()).sum().item()
+            elif self.problem_type==2:
+                correct += (torch.max(outputs.data, 1)[1] == labels.squeeze()).sum().item()
+        return total_loss/len(self.train_loader) , correct/total
+    def validate(self):
+        self.model.eval()
+        val_loss = 0
+        total=0
+        val_predictions = []
+        val_targets = []
+        with torch.no_grad():
+            for inputs, labels in self.val_loader:
+                outputs = self.model(inputs)
+                val_loss += self.criterion(outputs, labels).item()
+                total += len(labels.squeeze())
+                if self.problem_type==1:
+                    val_predictions.extend(torch.sigmoid(outputs).view(-1).cpu().numpy())
+                elif self.problem_type==2:
+                    val_predictions.extend(torch.max(outputs.data, 1)[1].view(-1).cpu().numpy())
+                val_targets.extend(labels.view(-1).cpu().numpy())
+            if self.problem_type==1:
+                val_roc_auc =roc_auc_score(val_targets, val_predictions)
+                val_acc = accuracy(val_targets, np.round(val_predictions))
+            elif self.problem_type==2:
+                val_acc = accuracy(val_targets,val_predictions)
+                val_roc_auc=0
+            else:
+                val_roc_auc=0
+                val_acc=0
+        return val_loss /len(self.val_loader), val_acc,val_roc_auc
+    def train(self):
+        for epoch in range(self.epochs):
+            #Increase alpha up to 1-tenth of entire epochs
+            alpha_now=np.minimum(1.0,float(epoch)/float(self.epochs/10))
+            # print(alpha_now)
+            self.model.set_alpha(alpha_now)
+            if epoch>self.epochs//10:
+                save_permit=True
+            else:
+                save_permit=False
+            tr_loss, tr_acc = self.train_epoch()
+            val_loss, val_acc , val_roc_auc= self.validate()
+            if self.problem_type == 0:
+                if self.verbose:
+                    print(f'Epoch {epoch}: Train Loss {tr_loss:.4f}, Val Loss {val_loss:.4f}')
+                if (val_loss < self.best_metric)and(save_permit):
+                    self.best_metric = val_loss
+                    # Save model checkpoint
+                    self.model.nninput=None #Delete data remaining from training
+                    self.encodings=None
+                    self.taus=None
+                    # torch.save(self.model, 'best_model.pth')
+                    # torch.save(self.model.state_dict(), 'best_model_weights.pth')
+                    self.best_model=copy.deepcopy(self.model.state_dict())
+                    # print("Saving model with best validation loss.")
+            # Problem type 1: Focus on loss, accuracy, and AUC
+            elif self.problem_type == 1:
+                if self.verbose:
+                    print(f'Epoch {epoch}: Train Loss {tr_loss:.4f}, Train Acc {tr_acc:.4f}, Val Loss {val_loss:.4f}, Val Acc {val_acc:.4f}, Val ROC AUC {val_roc_auc:.4f}')
+                if (val_roc_auc > self.best_metric)and(save_permit):
+                    self.best_metric = val_roc_auc
+                    # Save model checkpoint
+                    self.model.nninput=None #Delete data remaining from training
+                    self.encodings=None
+                    self.taus=None
+                    # torch.save(self.model, 'best_model.pth')
+                    # torch.save(self.model.state_dict(), 'best_model_weights.pth')
+                    self.best_model=copy.deepcopy(self.model.state_dict())
+                    # print("Saving model with best validation ROC AUC.")
+            # Problem type 2: Focus on loss and accuracy
+            elif self.problem_type == 2:
+                if self.verbose:
+                    print(f'Epoch {epoch}: Train Loss {tr_loss:.4f}, Train Acc {tr_acc:.4f}, Val Loss {val_loss:.4f}, Val Acc {val_acc:.4f}')
+                if (val_acc > self.best_metric)and(save_permit):
+                    self.best_metric = val_acc
+                    # Save model checkpoint
+                    self.model.nninput=None #Delete data remaining from training
+                    self.encodings=None
+                    self.taus=None
+                    # torch.save(self.model, 'best_model.pth')
+                    # torch.save(self.model.state_dict(), 'best_model_weights.pth')
+                    self.best_model=copy.deepcopy(self.model.state_dict())
+                    # print("Saving model with best validation accuracy.")
+            self.scheduler.step()
+        # Load best validation model
+        self.model.load_state_dict(self.best_model)
+        # self.model = torch.load('best_model.pth')
+    def evaluate(self,X_test, y_test,verbose=True):
+        test_loader=DataLoader(dataset=TensorDataset(X_test, y_test), batch_size=len(y_test), shuffle=True)
+        self.model.eval()
+        test_loss = 0
+        total=0
+        test_predictions = []
+        test_targets = []
+        with torch.no_grad():
+            for inputs, labels in test_loader:
+                outputs = self.model(inputs)
+                test_loss += self.criterion(outputs, labels).item()
+                total += len(labels.squeeze())
+                if self.problem_type==1:
+                    test_predictions.extend(torch.sigmoid(outputs).view(-1).cpu().numpy())
+                elif self.problem_type==2:
+                    test_predictions.extend(torch.max(outputs.data, 1)[1].view(-1).cpu().numpy())
+                test_targets.extend(labels.view(-1).cpu().numpy())
+            if self.problem_type==1:
+                test_roc_auc =roc_auc_score(test_targets, test_predictions)
+                test_acc = accuracy(test_targets, np.round(test_predictions))
+                if verbose:
+                    print('ROC-AUC: ', test_roc_auc)
+                return test_roc_auc
+            elif self.problem_type==2:
+                test_acc = accuracy(test_targets,test_predictions)
+                test_roc_auc=0
+                if verbose:
+                    print('ACC: ', test_acc)
+                return test_acc
+            else:
+                test_roc_auc=0
+                test_acc=0
+                if verbose:
+                    print('MSE: ', test_loss /len(test_loader))
+                return test_loss /len(test_loader)

app.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from LEURN import LEURN
+import torch
+from DATA import split_and_processing
+from TRAINER import Trainer
+import numpy as np
+import openml
+# Streamlit application layout
+st.title("LEURN")
+# Initialize or reset session states if necessary
+if 'init' not in st.session_state:
+    st.session_state['training_completed'] = False
+    st.session_state['data_chosen'] = False
+    st.session_state['init'] = True
+    st.session_state['selected_row']=False
+    st.session_state['explanation_made']=False
+    st.session_state['result']=False
+# Upload csv or excel
+st.subheader("File Uploader")
+uploaded_file = st.file_uploader("Upload your Excel/CSV file", type=["csv", "xlsx"])
+if uploaded_file is not None:
+    # Reading the uploaded file
+    df = pd.read_csv(uploaded_file) if uploaded_file.type == "text/csv" else pd.read_excel(uploaded_file)
+    st.write("Data Preview:")
+    st.write(df.head())
+    st.subheader("Categorical Feature and Target Selection")
+    # Selecting the target variable
+    target = st.selectbox("Select the target variable", options=df.columns)
+    # Define features and target
+    X = df.drop(target, axis=1)
+    y = df[target]
+    attribute_names = X.columns
+    # Select categorical variables
+    st.write("Select categorical variables:")
+    categoricals = [st.checkbox(f"{col} is categorical", key=col) for col in X.columns]
+    # User input for model parameters
+    st.subheader("Model Training Parameters")
+    depth = st.selectbox("Select Model Depth", options=[1, 2, 3, 4, 5], index=2)
+    batch_size = st.selectbox("Select Batch Size", options=[64, 128, 256, 512, 1024, 2048, 4096], index=4)
+    lr = st.selectbox("Select Learning Rate", options=[1e-4, 5e-4, 1e-3, 5e-3, 1e-2], index=3)
+    epochs = st.number_input("Enter Number of Epochs", min_value=1, max_value=1000, value=300)
+    droprate = st.slider("Select Dropout Rate", min_value=0.0, max_value=1.0, value=0.0, step=0.05)
+    output_type = st.radio("Select Output Type (0: regression, 1: binary classification, 2: multi-class classification)", options=[0, 1, 2], index=0)
+if st.button("Train Neural Network"):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    #Split and process
+    X_train, X_val, X_test, y_train, y_val, y_test, preprocessor = split_and_processing(X, y, categoricals, output_type, attribute_names)
+    #Initialize model
+    model = LEURN(preprocessor, depth=depth, droprate=droprate).to(device)
+    #Train model
+    model_trainer = Trainer(model, X_train, X_val, y_train, y_val, lr=lr, batch_size=batch_size, epochs=epochs, problem_type=output_type, verbose=False)
+    model_trainer.train()
+    #Load best model
+    model.load_state_dict(model_trainer.best_model)
+    #Get performances
+    perf_train = model_trainer.evaluate(X_train, y_train)
+    perf_val = model_trainer.evaluate(X_val, y_val)
+    perf_test = model_trainer.evaluate(X_test, y_test)
+    st.session_state['perf_train']=perf_train
+    st.session_state['perf_val']=perf_val
+    st.session_state['perf_test']=perf_test
+    #Save test dataset and model to explain/generate later
+    X_test_inverse = preprocessor.inverse_transform_X(X_test)
+    X_test_inverse.to_csv('test.csv',index=False)
+    st.session_state['training_completed'] = True
+    st.session_state['model'] = model  # Adjusted for compatibility
+if st.session_state['training_completed'] == True:
+    #Print performances
+    st.write("Here are performances, try different hyperparameters if not satisfied")
+    if output_type == 0:
+        st.subheader("Training Results (MSE)")
+    elif output_type == 1:
+        st.subheader("Training Results (ROC-AUC)")
+    else:
+        st.subheader("Training Results (ACC)")
+    st.write(f"Training Score: {st.session_state['perf_train']:.4f}")
+    st.write(f"Validation Score: {st.session_state['perf_val']:.4f}")
+    st.write(f"Test Score: {st.session_state['perf_test']:.4f}")
+    # File uploader for explanation
+    st.subheader("Explain New Inputs")
+    uploaded_file_to_explain = st.file_uploader("Upload your Excel/CSV file to explain. Uploaded file should not have the target variable.", type=["csv", "xlsx"])
+    print(uploaded_file_to_explain)
+    if uploaded_file_to_explain is not None:
+        # Reading the uploaded file
+        X_test_inverse = pd.read_csv(uploaded_file_to_explain) if uploaded_file_to_explain.type == "text/csv" else pd.read_excel(uploaded_file_to_explain)
+        # Save DataFrame
+        st.session_state['X_test_inverse_df'] = X_test_inverse.to_json()
+        st.session_state['data_chosen'] = True  # Flag to indicate data is chosen
+    if st.session_state['data_chosen'] == True:
+        # Load DataFrame from session state
+        X_test_inverse = pd.read_json(st.session_state['X_test_inverse_df'])
+        # Always display the DataFrame to ensure it's visible for selection
+        st.write("Test DataFrame:")
+        st.write(X_test_inverse)
+        # Let users select a row, selection is dynamic and updates session state
+        selected_index = st.selectbox("Select a row:", options=X_test_inverse.index, key="selected_index")
+        selected_row = X_test_inverse.loc[[st.session_state['selected_index']]]
+        st.write("Selected Data for Explanation:")
+        st.write(selected_row)
+        st.session_state['selected_row'] = selected_row
+        #Explain selected row
+        if st.button("Explain"):
+            model=st.session_state['model']
+            Exp_df_test_sample,result,result_original_format=model.explain(torch.from_numpy(model.preprocessor.transform_X(st.session_state['selected_row']).values.astype('float32')),include_causal_analysis=True)
+            st.session_state['explanation_made']=True
+            st.session_state['Exp_df_test_sample']=Exp_df_test_sample
+            st.session_state['result_original_format']=result_original_format
+            st.session_state['result']=result
+        #Print explanations
+        if st.session_state['explanation_made']==True:
+            st.write("Explanation DataFrame:")
+            st.write(st.session_state['Exp_df_test_sample'])
+            st.write("Predicted Output: (Network format)")
+            st.write(st.session_state['result'].detach().numpy().astype('str'))
+            if output_type==1:
+                if np.sign(st.session_state['result'].detach().numpy())>0:
+                    st.write("Result here is positive; this means output class below is represented by positive sign. In the explanation dataframe, positive contributions increase class likelihood")
+                else:
+                    st.write("Result here is negative; this means output class below is represented by negative sign. In the explanation dataframe, negative contributions increase class likelihood")
+            st.write("Predicted Output: (original format)")
+            st.write(st.session_state['result_original_format'])
+    #Data generation part
+    st.subheader("Generate Data From Scratch")
+    if st.button("Generate"):
+        model=st.session_state['model']
+        generated_sample_nn_friendly, generated_sample_original_input_format,output=model.generate()
+        Exp_df_generated_sample,result,result_original_format=model.explain(generated_sample_nn_friendly,include_causal_analysis=True)
+        st.write("Explanation DataFrame:")
+        st.write(Exp_df_generated_sample)
+        st.write("Predicted Output: (Network format)")
+        st.write(result.detach().numpy().astype('str'))
+        if output_type==1:
+            if np.sign(result.detach().numpy())>0:
+                st.write("Result here is positive; this means output class below is represented by positive sign. In the explanation dataframe, positive contributions increase class likelihood")
+            else:
+                st.write("Result here is negative; this means output class below is represented by negative sign. In the explanation dataframe, negative contributions increase class likelihood")
+        st.write("Predicted Output: (original format)")
+        st.write(result_original_format)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch
+pandas
+openpyxl
+openml
+numpy
+scikit-learn
+streamlit==1.29.0