File size: 1,787 Bytes
3440f83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
from torchvision import transforms
from PIL import Image
class MinMaxResize:
def __init__(self, shorter=800, longer=1333):
self.min = shorter
self.max = longer
def __call__(self, x):
w, h = x.size
scale = self.min / min(w, h)
if h < w:
newh, neww = self.min, scale * w
else:
newh, neww = scale * h, self.min
if max(newh, neww) > self.max:
scale = self.max / max(newh, neww)
newh = newh * scale
neww = neww * scale
newh, neww = int(newh + 0.5), int(neww + 0.5)
newh, neww = newh // 32 * 32, neww // 32 * 32
return x.resize((neww, newh), resample=Image.BICUBIC)
class UnNormalize(object):
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, tensor):
"""
Args:
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
Returns:
Tensor: Normalized image.
"""
for t, m, s in zip(tensor, self.mean, self.std):
t.mul_(s).add_(m)
# The normalize code -> t.sub_(m).div_(s)
return tensor
# This is simple maximum entropy normalization performed in Inception paper
inception_normalize = transforms.Compose([transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])
# ViT uses simple non-biased inception normalization
# https://github.com/google-research/vision_transformer/blob/master/vit_jax/input_pipeline.py#L132
inception_unnormalize = transforms.Compose([UnNormalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])
cn_clip_normalize = transforms.Compose(
[transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])]
)
|