In [4]:
import sys
sys.path.insert(0,'..')

In [10]:
import torch
from torch import nn


from train import train
import priors
import encoders
import positional_encodings
import utils
import bar_distribution
import transformer

from samlib.utils import chunker

In [12]:
kwargs = \
{
 'nlayers': 6, 
 'dropout': 0.0, 'steps_per_epoch': 100, 
}
    
    
def train_and_compare_fast_gp_mix(*args, **kwargs):
    hps = kwargs['extra_prior_kwargs_dict']['hyperparameters']
    num_features = kwargs['extra_prior_kwargs_dict']['num_features']
    baseline_res = priors.fast_gp_mix.evaluate(
        *args[0].get_batch_method(10000,kwargs['bptt'],num_features, hyperparameters=hps),
        hyperparameters=hps, 
        use_mse=Losses.mse == args[2])
    print(baseline_res, 'with fast_gp_mix')
    
    res = train(*args, **kwargs)
    return res, baseline_res

def train_and_compare_fast_gp(*args, num_evals=1000, **kwargs):
    hps = kwargs['extra_prior_kwargs_dict']['hyperparameters']
    num_features = kwargs['extra_prior_kwargs_dict']['num_features']
    baseline_res = priors.fast_gp.evaluate(
        *args[0].get_batch_method(num_evals,kwargs['bptt'],num_features, hyperparameters=hps, device='cpu'),
        hyperparameters=hps, 
        use_mse=Losses.mse == args[2], device='cpu')
    print(baseline_res, 'with fast_gp')
    
    res = train(*args, **kwargs)
    return res, baseline_res

def train_and_compare_gp(*args, num_evals=10000, **kwargs):
    num_features = kwargs['extra_prior_kwargs_dict']['num_features']
    baseline_res = priors.gp.evaluate(
        *args[0].get_batch_method(num_evals,kwargs['bptt'],num_features),
        use_mse=Losses.mse == args[2])
    print(baseline_res, 'with fast_gp')
    
    res = train(*args, **kwargs)
    return res, baseline_res



In [13]:
import gpytorch
hps = {'noise': 1e-4, 'outputscale': 1., 'lengthscale': .6, 'fast_computations': (False,False,False)}

import numpy as np, scipy.stats as st

def compute_mean_and_conf_interval(accuracies, confidence=.95):
    accuracies = np.array(accuracies)
    n = len(accuracies)
    m, se = np.mean(accuracies, -1), st.sem(accuracies, -1)
    h = se * st.t.ppf((1 + confidence) / 2., n-1)
    return m, h


def bl(hps,bptt, num_evals=100, num_features=1, step_size=1, evals_per_batch=None, speedups=(False,False,False,False)):
    if evals_per_batch is None:
        evals_per_batch = num_evals
    else:
        assert num_evals%evals_per_batch == 0
    results = []
    for batch_i in range(num_evals//evals_per_batch):
        with gpytorch.settings.fast_computations(False,False,False):
            batch = priors.fast_gp.get_batch(evals_per_batch,bptt,num_features, hyperparameters=hps)
        with gpytorch.settings.fast_pred_var(speedups[0]), gpytorch.settings.fast_computations(*speedups[1:]):
            all_res, baseline_res,_ = priors.fast_gp.evaluate(
                    *batch,
                    hyperparameters=hps, step_size=step_size
                    )
        print(baseline_res, 'with fast_gp')
        
        results.append(all_res)
    all_results = torch.cat(results,1) # seq x batch_size
    return compute_mean_and_conf_interval(all_results) # mean array, var array
    
    
#settings = [{'num_evals':n,} for n in [100,1000]]
    
#js = [ex.submit(bl, hps, 2000, step_size=100, evals_per_batch=2, num_features=5, **kwargs) for kwargs in settings]




In [None]:
# below you can simply replace the prior to priors.fast_gp_mix to do experiments over mixtures of GPs

In [None]:
num_features = 5
hps = {'noise': 1e-4, 'outputscale': 1., 'lengthscale': .6, 'fast_computations': (False,False,False)}
ys = priors.fast_gp.get_batch(100000,20,num_features, hyperparameters=hps)[1]
fivefeature_jobs = [
    train(priors.fast_gp.DataLoader, bar_distribution.FullSupportBarDistribution(bar_distribution.get_bucket_limits(num_borders, ys=ys)), enc, emsize=emsize, nhead=nhead, warmup_epochs=warmup_epochs, y_encoder_generator=y_enc, pos_encoder_generator=pos_enc,
              batch_size=batch_size, scheduler=decay, extra_prior_kwargs_dict={'num_features': num_features, 'fuse_x_y': False, 'hyperparameters': hps},
              epochs=epochs, lr=lr, input_normalization=input_norm, bptt=2010, single_eval_pos_gen=single_eval_pos,aggregate_k_gradients=step_every, **kwargs) 
    for enc in [encoders.Linear] for y_enc in [encoders.Linear] for emsize in [512] for nhead in [4] for nhid in [emsize*2] for epochs in [50*25,100*25,200*25,400*25] 
    for warmup_epochs in [epochs//4] for input_norm in [False]
    for batch_size in [4] for step_every in [100//batch_size] for lr in [.0001,.0003,.001] for decay in [utils.get_cosine_schedule_with_warmup] for num_borders in [1000,10000] 
    for single_eval_pos in [utils.get_weighted_single_eval_pos_sampler(2000)]
    for pos_enc in [positional_encodings.PositionalEncoding if single_eval_pos is None else positional_encodings.NoPositionalEncoding] 
    for redo in range(1)
]





In [14]:
import numpy as np, scipy.stats as st

def compute_mean_and_conf_interval(accuracies, confidence=.95):
    accuracies = np.array(accuracies)
    n = len(accuracies)
    m, se = np.mean(accuracies), st.sem(accuracies)
    h = se * st.t.ppf((1 + confidence) / 2., n-1)
    return m, h
hps = {'noise': 1e-4, 'outputscale': 1., 'lengthscale': .6, 'fast_computations': (False,False,False)}

@torch.inference_mode()
def run_test(model,device='cuda:0',step_size=100, start_pos=1, batch_size=1000, sub_batch_size=10, seq_len=2000):
    assert batch_size % sub_batch_size == 0
    model.to(device)

    model.eval()
    nlls = []
    nll_confidences = []
    mses = []
    max_mses = []
    eval_positions = []
    
    def get_metrics(model, eval_pos, batch_size):
        x,y, target_y = priors.fast_gp.get_batch(batch_size=batch_size, seq_len=eval_pos+1, num_features=5,hyperparameters=hps, device=device)
        logits = model((x,y), single_eval_pos=eval_pos)
        if isinstance(model.criterion,nn.GaussianNLLLoss):
            nll = model.criterion(logits[0][...,0], target_y[eval_pos], var=logits[0][...,1].abs())
            return nll, 0., 0.
        means = model.criterion.mean(logits) # num_evals x batch_size
        maxs = (model.criterion.borders[logits.argmax(-1)] + model.criterion.borders[logits.argmax(-1)+1])/2
        mse = nn.MSELoss()
        nll = model.criterion(logits[0], target_y[eval_pos])
        return nll, mse(means[0], target_y[eval_pos]), mse(maxs[0], target_y[eval_pos])
        
    
    for eval_pos in range(start_pos, seq_len, step_size):
        eval_positions.append(eval_pos)
        print(eval_pos)
        
        nll = []
        mean_mse = []
        max_mse = []
        for i in range(batch_size//sub_batch_size):
            batch_nll, batch_mean_mse, batch_max_mse = get_metrics(model, eval_pos, sub_batch_size)
            nll.append(batch_nll)
            mean_mse.append(batch_mean_mse)
            max_mse.append(batch_max_mse)
        
        nll = torch.cat(nll)
        mean_mse = torch.tensor(mean_mse).mean()
        max_mse = torch.tensor(max_mse).mean()
        
        
        mses.append(mean_mse)
        max_mses.append(max_mse)
        nlls.append(nll.mean())
        nll_confidences.append(compute_mean_and_conf_interval(nll.to('cpu'))[1])
    return eval_positions, torch.stack(mses).to('cpu'), torch.stack(max_mses).to('cpu'), torch.stack(nlls).to('cpu'), torch.tensor(nll_confidences).to('cpu')



