File size: 9,533 Bytes
8c9048a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler
import torch
import cv2
import json
import os
import numpy as np
from PIL import Image
import argparse
import pandas as pd
import torch
from torchvision.transforms import Resize
from torchvision import transforms
import torch.nn.functional as F
import numpy as np
from torchmetrics.multimodal import CLIPScore
from torchmetrics.image import PeakSignalNoiseRatio, StructuralSimilarityIndexMeasure
from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
from torchmetrics.regression import MeanSquaredError
from urllib.request import urlretrieve 
from PIL import Image
import open_clip
import os
import hpsv2
import ImageReward as RM
import math
from transformers import AutoProcessor, AutoModel

def rle2mask(mask_rle, shape): # height, width
    starts, lengths = [np.asarray(x, dtype=int) for x in (mask_rle[0:][::2], mask_rle[1:][::2])]
    starts -= 1
    ends = starts + lengths
    binary_mask = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        binary_mask[lo:hi] = 1
    return binary_mask.reshape(shape)


class MetricsCalculator:
    def __init__(self, device,ckpt_path="../../data/ckpt") -> None:
        self.device=device
        # clip
        self.clip_metric_calculator = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14").to(device)
        # lpips
        self.lpips_metric_calculator = LearnedPerceptualImagePatchSimilarity(net_type='squeeze').to(device)
        # aesthetic model
        self.aesthetic_model = torch.nn.Linear(768, 1)
        aesthetic_model_url = (
                    "https://github.com/LAION-AI/aesthetic-predictor/blob/main/sa_0_4_vit_l_14_linear.pth?raw=true"
                )
        aesthetic_model_ckpt_path=os.path.join(ckpt_path,"sa_0_4_vit_l_14_linear.pth")
        urlretrieve(aesthetic_model_url, aesthetic_model_ckpt_path)
        self.aesthetic_model.load_state_dict(torch.load(aesthetic_model_ckpt_path))
        self.aesthetic_model.eval()
        self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='openai')
        # image reward model
        self.imagereward_model = RM.load("ImageReward-v1.0")
 

    def calculate_image_reward(self,image,prompt):
        reward = self.imagereward_model.score(prompt, [image])
        return reward

    def calculate_hpsv21_score(self,image,prompt):
        result = hpsv2.score(image, prompt, hps_version="v2.1")[0]
        return result.item()

    def calculate_aesthetic_score(self,img):
        image = self.clip_preprocess(img).unsqueeze(0)
        with torch.no_grad():
            image_features = self.clip_model.encode_image(image)
            image_features /= image_features.norm(dim=-1, keepdim=True)
            prediction = self.aesthetic_model(image_features)
        return prediction.cpu().item()

    def calculate_clip_similarity(self, img, txt):
        img = np.array(img)
        
        img_tensor=torch.tensor(img).permute(2,0,1).to(self.device)
        
        score = self.clip_metric_calculator(img_tensor, txt)
        score = score.cpu().item()
        
        return score
    
    def calculate_psnr(self, img_pred, img_gt, mask=None):
        img_pred = np.array(img_pred).astype(np.float32)/255.
        img_gt = np.array(img_gt).astype(np.float32)/255.

        assert img_pred.shape == img_gt.shape, "Image shapes should be the same."
        if mask is not None:
            mask = np.array(mask).astype(np.float32)
            img_pred = img_pred * mask
            img_gt = img_gt * mask
        
        difference = img_pred - img_gt
        difference_square = difference ** 2
        difference_square_sum = difference_square.sum()
        difference_size = mask.sum()

        mse = difference_square_sum/difference_size

        if mse < 1.0e-10:
            return 1000
        PIXEL_MAX = 1
        return 20 * math.log10(PIXEL_MAX / math.sqrt(mse))

    
    def calculate_lpips(self, img_gt, img_pred, mask=None):
        img_pred = np.array(img_pred).astype(np.float32)/255
        img_gt = np.array(img_gt).astype(np.float32)/255
        assert img_pred.shape == img_gt.shape, "Image shapes should be the same."

        if mask is not None:
            mask = np.array(mask).astype(np.float32)
            img_pred = img_pred * mask 
            img_gt = img_gt * mask
            
        img_pred_tensor=torch.tensor(img_pred).permute(2,0,1).unsqueeze(0).to(self.device)
        img_gt_tensor=torch.tensor(img_gt).permute(2,0,1).unsqueeze(0).to(self.device)
            
        score =  self.lpips_metric_calculator(img_pred_tensor*2-1,img_gt_tensor*2-1)
        score = score.cpu().item()
        
        return score
    
    def calculate_mse(self, img_pred, img_gt, mask=None):
        img_pred = np.array(img_pred).astype(np.float32)/255.
        img_gt = np.array(img_gt).astype(np.float32)/255.

        assert img_pred.shape == img_gt.shape, "Image shapes should be the same."
        if mask is not None:
            mask = np.array(mask).astype(np.float32)
            img_pred = img_pred * mask
            img_gt = img_gt * mask
        
        difference = img_pred - img_gt
        difference_square = difference ** 2
        difference_square_sum = difference_square.sum()
        difference_size = mask.sum()

        mse = difference_square_sum/difference_size

        return mse.item()
    

device = "cuda" if torch.cuda.is_available() else "cpu"


cc3m_base_dir = "/home/kis/datasets/cc3m_attempt12"
cc3m_full_regen = "/home/kis/Downloads/imgs"

# annotations = pd.read_csv("annotations/annotations_synthetic_100percent.txt", sep='\t', header=None, dtype=str)
cc3m_annotations_full = pd.read_csv("../../../../datasets/cc3m/Train_GCC-training.tsv", sep='\t', header=0)

# evaluation
# evaluation_df = pd.DataFrame(columns=['Image ID','Image Reward', 'HPS V2.1', 'Aesthetic Score'])#, 'CLIP Similarity'])
evaluation_df = pd.DataFrame(columns=['Image ID', 'Aesthetic Score'])#, 'CLIP Similarity'])

metrics_calculator=MetricsCalculator(device)
mask_root = "../../../SemanticSegmentation/mask_skin"
prev = None

gender = "real"

root = "PATH_TO_THE_ROOT"

if gender == "man":
    img_root = root + "/foreground_syn_men/"
elif gender == "woman":
    img_root = root +  "/foreground_syn_women/"
elif gender == "real":
    img_root = root +  "/foreground/"
    annotations = cc3m_annotations_full
elif gender == "cvpr":
    img_root = root + "/foreground_cvpr_images/"
    annotations = pd.read_csv("/home/kis/Downloads/annotations_cvpr.csv")
elif gender == "syn":
    img_root = root + "/foreground_fully_synthetic/"
    # annotations = pd.read_csv("/home/kis/Downloads/annotations_cvpr.csv")
elif gender == "coco":
    img_root = root + "/foreground_coco_counterfactuals/"
    annotations = pd.read_json(path_or_buf=root + "/coco_counterfactuals.jsonl", lines=True)
    annotations = annotations.set_index("id")

for fname in os.listdir(img_root):
    image_name = fname.split(".")[0]
    if "mask" in image_name or "original" in image_name:
        continue
    image_name = fname.split("_")[0]


    print(f"evaluating image {image_name} ...")


    image_id = str(image_name).zfill(9)

    if gender in ["man", "woman"]:
        caption = annotations[(annotations[0]==image_name) & (annotations[1]==gender)][2].item()
    elif gender == "real":
        caption = annotations.iloc[int(image_id), 0]
    elif gender == "cvpr":
        caption = annotations.iloc[int(image_name), 1]
    elif gender == "syn":
        caption = annotations.iloc[int(image_id), 0]
    elif gender == "coco":
        image_id = f'{fname.split("_")[0]}{fname.split("_")[1]}'
        n = int(fname.split(".")[0][-1]) # 0 or 1 in the end of the filename - two counterfactuals
        caption = annotations.loc[int(image_id)][n]

    image_path = f"{img_root}/{fname}"
    prompt = caption
    try:
        src_image_path = image_path
        src_image = Image.open(src_image_path).resize((512,512))
        evaluation_result=[str(image_id)+"_"+str(gender)]
    except FileNotFoundError:
        continue

    success = True
    for metric in evaluation_df.columns.values.tolist()[1:]:
        print(f"evaluating metric: {metric}")
        try:

            if metric == 'Image Reward':
                metric_result = metrics_calculator.calculate_image_reward(src_image,prompt)
                
            if metric == 'HPS V2.1':
                metric_result = metrics_calculator.calculate_hpsv21_score(src_image,prompt)
            
            if metric == 'Aesthetic Score':
                metric_result = metrics_calculator.calculate_aesthetic_score(src_image)
                        
            if metric == 'CLIP Similarity':
                metric_result = metrics_calculator.calculate_clip_similarity(src_image, prompt)

            evaluation_result.append(metric_result)
        except RuntimeError:
            success = False
            break
    
    if success:
        evaluation_df.loc[len(evaluation_df.index)] = evaluation_result
        evaluation_df.to_csv(f"evaluation/evaluation_result_{gender}.csv")

print("The averaged evaluation result:")
averaged_results=evaluation_df.mean(numeric_only=True)
print(averaged_results)
averaged_results.to_csv(f"evaluation/evaluation_result_{gender}.csv")
evaluation_df.to_csv(f"evaluation/evaluation_result_{gender}.csv")

print(f"The generated images and evaluation results is saved in ./evaluation/")