File size: 5,675 Bytes
666de43 19e21cf 2fd4d28 525fd00 68e3f69 525fd00 2fd4d28 666de43 529009f 666de43 b1dd78e 666de43 a6f7fc9 666de43 b1dd78e 666de43 b1dd78e a6f7fc9 666de43 58cbf33 529009f 666de43 70d6b7c a6f7fc9 666de43 5466dd4 666de43 5466dd4 666de43 529009f 666de43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import torch
import torch.nn as nn
import torchvision.models as models
from PIL import Image
# from vgg_weights import VGG19_Weights
#Content Loss
class ContentLoss(nn.Module):
def __init__(self, target,):
super(ContentLoss, self).__init__()
'''
we 'detach' the target content from the tree used
to dynamically compute the gradient: this is a stated value,
not a variable. Otherwise the forward method of the criterion
will throw an error.
'''
self.target = target.detach()
def forward(self, input):
self.loss = F.mse_loss(input, self.target)
return input
#Style Loss
def gram_matrix(input):
a, b, c, d = input.size() # a=batch size(=1)
# b=number of feature maps
# (c,d)=dimensions of a f. map (N=c*d)
features = input.view(a * b, c * d) # resize F_XL into \hat F_XL
G = torch.mm(features, features.t()) # compute the gram product
# we 'normalize' the values of the gram matrix
# by dividing by the number of element in each feature maps.
return G.div(a * b * c * d)
class StyleLoss(nn.Module):
def __init__(self, target_feature):
super(StyleLoss, self).__init__()
self.target = gram_matrix(target_feature).detach()
def forward(self, input):
G = gram_matrix(input)
self.loss = F.mse_loss(G, self.target)
return input
#Normalization
cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406])
cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225])
#image transformation
# def image_transform(image):
# if isinstance(image, str):
# # If image is a path to a file, open it using PIL
# image = Image.open(image).convert('RGB')
# else:
# # If image is a NumPy array, convert it to a PIL image
# image = Image.fromarray(image.astype('uint8'), 'RGB')
# # Apply the same transformations as before
# image = transform(image).unsqueeze(0)
# return image
def image_transform(image):
if image is None:
return None
if isinstance(image, str):
# If image is a path to a file, open it using PIL
with open(image, "rb") as f:
image = Image.open(f).convert('RGB')
else:
# If image is already a PIL image, just convert it to RGB mode
image = image.convert('RGB')
# Apply the same transformations as before
image =image_transform(image).unsqueeze(0)
return image
# Create EffNetB2 pretrained weights, transforms and model
weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
transforms = weights.transforms()
model = torchvision.models.efficientnet_b2(weights=weights)
#Defining a model
# weights=weights=torchvision.models.VGG19_Weights.IMAGENET1K_V1
# cnn = models.vgg19(weights=weights).features.eval()
weights = models.vgg19(pretrained='imagenet')
cnn = weights.features.eval()
#getting the input optimizer
def get_input_optimizer(input_img):
# this line to show that input is a parameter that requires a gradient
optimizer = optim.LBFGS([input_img])
return optimizer
# desired depth layers to compute style/content losses :
content_layers_default = ['conv_4']
style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
style_img, content_img,
content_layers=content_layers_default,
style_layers=style_layers_default):
# normalization module
normalization = Normalization(normalization_mean, normalization_std)
# just in order to have an iterable access to or list of content/style
# losses
content_losses = []
style_losses = []
# assuming that ``cnn`` is a ``nn.Sequential``, so we make a new ``nn.Sequential``
# to put in modules that are supposed to be activated sequentially
model = nn.Sequential(normalization)
i = 0 # increment every time we see a conv
for layer in cnn.children():
if isinstance(layer, nn.Conv2d):
i += 1
name = 'conv_{}'.format(i)
elif isinstance(layer, nn.ReLU):
name = 'relu_{}'.format(i)
# The in-place version doesn't play very nicely with the ``ContentLoss``
# and ``StyleLoss`` we insert below. So we replace with out-of-place
# ones here.
layer = nn.ReLU(inplace=False)
elif isinstance(layer, nn.MaxPool2d):
name = 'pool_{}'.format(i)
elif isinstance(layer, nn.BatchNorm2d):
name = 'bn_{}'.format(i)
else:
raise RuntimeError('Unrecognized layer: {}'.format(layer.__class__.__name__))
model.add_module(name, layer)
if name in content_layers:
# add content loss:
target = model(content_img).detach()
content_loss = ContentLoss(target)
model.add_module("content_loss_{}".format(i), content_loss)
content_losses.append(content_loss)
if name in style_layers:
# add style loss:
target_feature = model(style_img).detach()
style_loss = StyleLoss(target_feature)
model.add_module("style_loss_{}".format(i), style_loss)
style_losses.append(style_loss)
# now we trim off the layers after the last content and style losses
for i in range(len(model) - 1, -1, -1):
if isinstance(model[i], ContentLoss) or isinstance(model[i], StyleLoss):
break
model = model[:(i + 1)]
return model, style_losses, content_losses
|