cf

Sleeping

App Files Files Community

cf / pgm /layers.py

Yeefei

Upload layers.py

7462b5c verified about 1 year ago

raw

history blame contribute delete

10.3 kB

	import pyro
	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from typing import Dict
	from pyro.distributions.conditional import (
	ConditionalTransformModule,
	ConditionalTransformedDistribution,
	TransformedDistribution,
	)
	from pyro.distributions.torch_distribution import TorchDistributionMixin

	from torch.distributions import constraints
	from torch.distributions.utils import _sum_rightmost
	from torch.distributions.transforms import Transform


	class ConditionalAffineTransform(ConditionalTransformModule):
	def __init__(self, context_nn, event_dim=0, **kwargs):
	super().__init__(**kwargs)
	self.event_dim = event_dim
	self.context_nn = context_nn

	def condition(self, context):
	loc, log_scale = self.context_nn(context)
	return torch.distributions.transforms.AffineTransform(
	loc, log_scale.exp(), event_dim=self.event_dim
	)


	class MLP(nn.Module):
	def __init__(self, num_inputs=1, width=32, num_outputs=1):
	super().__init__()
	activation = nn.LeakyReLU()
	self.mlp = nn.Sequential(
	nn.Linear(num_inputs, width, bias=False),
	nn.BatchNorm1d(width),
	activation,
	nn.Linear(width, width, bias=False),
	nn.BatchNorm1d(width),
	activation,
	nn.Linear(width, num_outputs),
	)

	def forward(self, x):
	return self.mlp(x)


	class CNN(nn.Module):
	def __init__(self, in_shape=(1, 192, 192), width=16, num_outputs=1, context_dim=0):
	super().__init__()
	in_channels = in_shape[0]
	res = in_shape[1]
	s = 2 if res > 64 else 1
	activation = nn.LeakyReLU()
	self.cnn = nn.Sequential(
	nn.Conv2d(in_channels, width, 7, s, 3, bias=False),
	nn.BatchNorm2d(width),
	activation,
	(nn.MaxPool2d(2, 2) if res > 32 else nn.Identity()),
	nn.Conv2d(width, 2 * width, 3, 2, 1, bias=False),
	nn.BatchNorm2d(2 * width),
	activation,
	nn.Conv2d(2 * width, 2 * width, 3, 1, 1, bias=False),
	nn.BatchNorm2d(2 * width),
	activation,
	nn.Conv2d(2 * width, 4 * width, 3, 2, 1, bias=False),
	nn.BatchNorm2d(4 * width),
	activation,
	nn.Conv2d(4 * width, 4 * width, 3, 1, 1, bias=False),
	nn.BatchNorm2d(4 * width),
	activation,
	nn.Conv2d(4 * width, 8 * width, 3, 2, 1, bias=False),
	nn.BatchNorm2d(8 * width),
	activation,
	)
	self.fc = nn.Sequential(
	nn.Linear(8 * width + context_dim, 8 * width, bias=False),
	nn.BatchNorm1d(8 * width),
	activation,
	nn.Linear(8 * width, num_outputs),
	)

	def forward(self, x, y=None):
	x = self.cnn(x)
	x = x.mean(dim=(-2, -1)) # avg pool
	if y is not None:
	x = torch.cat([x, y], dim=-1)
	return self.fc(x)


	class ArgMaxGumbelMax(Transform):
	r"""ArgMax as Transform, but inv conditioned on logits"""

	def __init__(self, logits, event_dim=0, cache_size=0):
	super(ArgMaxGumbelMax, self).__init__(cache_size=cache_size)
	self.logits = logits
	self._event_dim = event_dim
	self._categorical = pyro.distributions.torch.Categorical(
	logits=self.logits
	).to_event(0)

	@property
	def event_dim(self):
	return self._event_dim

	def __call__(self, gumbels):
	"""
	Computes the forward transform
	"""
	assert self.logits != None, "Logits not defined."

	if self._cache_size == 0:
	return self._call(gumbels)

	y = self._call(gumbels)
	return y

	def _call(self, gumbels):
	"""
	Abstract method to compute forward transformation.
	"""
	assert self.logits != None, "Logits not defined."
	y = gumbels + self.logits
	# print(f'y: {y}')
	# print(f'logits: {self.logits}')
	return y.argmax(-1, keepdim=True)

	@property
	def domain(self):
	""" "
	Domain of input(gumbel variables), Real
	"""
	if self.event_dim == 0:
	return constraints.real
	return constraints.independent(constraints.real, self.event_dim)

	@property
	def codomain(self):
	""" "
	Domain of output(categorical variables), should be natural numbers, but set to Real for now
	"""
	if self.event_dim == 0:
	return constraints.real
	return constraints.independent(constraints.real, self.event_dim)

	def inv(self, k):
	"""Infer the gumbels noises given k and logits."""
	assert self.logits != None, "Logits not defined."

	# uniforms = torch.rand(
	# self.logits.shape, dtype=self.logits.dtype, device=self.logits.device
	# )
	# gumbels = -((-(uniforms.log())).log())
	# # print(f'gumbels: {gumbels.size()}, {gumbels.dtype}')
	# # (batch_size, num_classes) mask to select kth class
	# # print(f'k : {k.size()}')
	# mask = F.one_hot(
	# k.squeeze(-1).to(torch.int64), num_classes=self.logits.shape[-1]
	# )
	# # print(f'mask: {mask.size()}, {mask.dtype}')
	# # (batch_size, 1) select topgumbel for truncation of other classes
	# topgumbel = (mask * gumbels).sum(dim=-1, keepdim=True) - (
	# mask * self.logits
	# ).sum(dim=-1, keepdim=True)
	# mask = 1 - mask # invert mask to select other != k classes
	# g = gumbels + self.logits
	# # (batch_size, num_classes)
	# epsilons = -torch.log(mask * torch.exp(-g) + torch.exp(-topgumbel)) - (
	# mask * self.logits
	# )

	def sample_gumbel(shape, eps=1e-20):
	U = torch.rand(shape)
	U = U.to(torch.device('cpu'))
	# U = U.to(torch.device('cuda:1'))
	return -torch.log(-torch.log(U + eps) + eps)
	def gumbel_softmax_sample(logits, temperature):
	y = logits + sample_gumbel(logits.shape)
	return F.softmax(y / temperature, dim=-1)
	def gumbel_softmax(logits, temperature,k, hard=False):
	"""
	ST-gumple-softmax
	input: [*, n_class]
	return: flatten --> [*, n_class] an one-hot vector
	"""
	y = gumbel_softmax_sample(logits, temperature)

	if not hard:
	return y.view(-1, logits.shape[-1])

	shape = y.size()
	_, ind = k.max(dim=-1)
	y_hard = torch.zeros_like(y).view(-1, shape[-1])
	y_hard.scatter_(1, ind.view(-1, 1), 1)
	y_hard = y_hard.view(*shape)
	# Set gradients w.r.t. y_hard gradients w.r.t. y
	y_hard = (y_hard - y).detach() + y
	return y_hard.view(-1, logits.shape[-1])
	epsilons = gumbel_softmax(self.logits,1e-3,k)

	return epsilons

	def log_abs_det_jacobian(self, x, y):
	"""We use the log_abs_det_jacobian to account for the categorical prob
	x: Gumbels; y: argmax(x+logits)
	P(y) = softmax
	"""
	# print(f"logits: {torch.log(F.softmax(self.logits, dim=-1)).size()}")
	# print(f'y: {y.size()} ')
	# print(f"log_abs_det_jacobian: {self._categorical.log_prob(y.squeeze(-1)).unsqueeze(-1).size()}")
	return -self._categorical.log_prob(y.squeeze(-1)).unsqueeze(-1)


	class ConditionalGumbelMax(ConditionalTransformModule):
	r"""Given gumbels+logits, output the OneHot Categorical"""

	def __init__(self, context_nn, event_dim=0, **kwargs):
	# The logits_nn which predict the logits given ages:
	super().__init__(**kwargs)
	self.context_nn = context_nn
	self.event_dim = event_dim

	def condition(self, context):
	"""Given context (age), output the Categorical results"""
	logits = self.context_nn(
	context
	) # The logits for calculating argmax(Gumbel + logits)
	return ArgMaxGumbelMax(logits)

	def _logits(self, context):
	"""Return logits given context"""
	return self.context_nn(context)

	@property
	def domain(self):
	""" "
	Domain of input(gumbel variables), Real
	"""
	if self.event_dim == 0:
	return constraints.real
	return constraints.independent(constraints.real, self.event_dim)

	@property
	def codomain(self):
	""" "
	Domain of output(categorical variables), should be natural numbers, but set to Real for now
	"""
	if self.event_dim == 0:
	return constraints.real
	return constraints.independent(constraints.real, self.event_dim)


	class TransformedDistributionGumbelMax(TransformedDistribution, TorchDistributionMixin):
	r"""Define a TransformedDistribution class for Gumbel max"""
	arg_constraints: Dict[str, constraints.Constraint] = {}

	def log_prob(self, value):
	"""
	We do not use the log_prob() of the base Gumbel distribution, because the likelihood for
	each class for Gumbel Max sampling is determined by the logits.
	"""
	# print("This happens")
	if self._validate_args:
	self._validate_sample(value)
	event_dim = len(self.event_shape)
	log_prob = 0.0
	y = value
	for transform in reversed(self.transforms):
	x = transform.inv(y)
	event_dim += transform.domain.event_dim - transform.codomain.event_dim
	log_prob = log_prob - _sum_rightmost(
	transform.log_abs_det_jacobian(x, y),
	event_dim - transform.domain.event_dim,
	)
	y = x
	# print(f"log_prob: {log_prob.size()}")
	return log_prob


	class ConditionalTransformedDistributionGumbelMax(ConditionalTransformedDistribution):
	def condition(self, context):
	base_dist = self.base_dist.condition(context)
	transforms = [t.condition(context) for t in self.transforms]
	# return TransformedDistribution(base_dist, transforms)
	return TransformedDistributionGumbelMax(base_dist, transforms)

	def clear_cache(self):
	pass