|
from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler |
|
import torch |
|
import cv2 |
|
import json |
|
import os |
|
import numpy as np |
|
from PIL import Image |
|
import argparse |
|
import pandas as pd |
|
import torch |
|
from torchvision.transforms import Resize |
|
from torchvision import transforms |
|
import torch.nn.functional as F |
|
import numpy as np |
|
from torchmetrics.multimodal import CLIPScore |
|
from torchmetrics.image import PeakSignalNoiseRatio, StructuralSimilarityIndexMeasure |
|
from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity |
|
from torchmetrics.regression import MeanSquaredError |
|
from urllib.request import urlretrieve |
|
from PIL import Image |
|
import open_clip |
|
import os |
|
import hpsv2 |
|
import ImageReward as RM |
|
import math |
|
from transformers import AutoProcessor, AutoModel |
|
|
|
def rle2mask(mask_rle, shape): |
|
starts, lengths = [np.asarray(x, dtype=int) for x in (mask_rle[0:][::2], mask_rle[1:][::2])] |
|
starts -= 1 |
|
ends = starts + lengths |
|
binary_mask = np.zeros(shape[0] * shape[1], dtype=np.uint8) |
|
for lo, hi in zip(starts, ends): |
|
binary_mask[lo:hi] = 1 |
|
return binary_mask.reshape(shape) |
|
|
|
|
|
class MetricsCalculator: |
|
def __init__(self, device,ckpt_path="../../data/ckpt") -> None: |
|
self.device=device |
|
|
|
self.clip_metric_calculator = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14").to(device) |
|
|
|
self.lpips_metric_calculator = LearnedPerceptualImagePatchSimilarity(net_type='squeeze').to(device) |
|
|
|
self.aesthetic_model = torch.nn.Linear(768, 1) |
|
aesthetic_model_url = ( |
|
"https://github.com/LAION-AI/aesthetic-predictor/blob/main/sa_0_4_vit_l_14_linear.pth?raw=true" |
|
) |
|
aesthetic_model_ckpt_path=os.path.join(ckpt_path,"sa_0_4_vit_l_14_linear.pth") |
|
urlretrieve(aesthetic_model_url, aesthetic_model_ckpt_path) |
|
self.aesthetic_model.load_state_dict(torch.load(aesthetic_model_ckpt_path)) |
|
self.aesthetic_model.eval() |
|
self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='openai') |
|
|
|
self.imagereward_model = RM.load("ImageReward-v1.0") |
|
|
|
|
|
def calculate_image_reward(self,image,prompt): |
|
reward = self.imagereward_model.score(prompt, [image]) |
|
return reward |
|
|
|
def calculate_hpsv21_score(self,image,prompt): |
|
result = hpsv2.score(image, prompt, hps_version="v2.1")[0] |
|
return result.item() |
|
|
|
def calculate_aesthetic_score(self,img): |
|
image = self.clip_preprocess(img).unsqueeze(0) |
|
with torch.no_grad(): |
|
image_features = self.clip_model.encode_image(image) |
|
image_features /= image_features.norm(dim=-1, keepdim=True) |
|
prediction = self.aesthetic_model(image_features) |
|
return prediction.cpu().item() |
|
|
|
def calculate_clip_similarity(self, img, txt): |
|
img = np.array(img) |
|
|
|
img_tensor=torch.tensor(img).permute(2,0,1).to(self.device) |
|
|
|
score = self.clip_metric_calculator(img_tensor, txt) |
|
score = score.cpu().item() |
|
|
|
return score |
|
|
|
def calculate_psnr(self, img_pred, img_gt, mask=None): |
|
img_pred = np.array(img_pred).astype(np.float32)/255. |
|
img_gt = np.array(img_gt).astype(np.float32)/255. |
|
|
|
assert img_pred.shape == img_gt.shape, "Image shapes should be the same." |
|
if mask is not None: |
|
mask = np.array(mask).astype(np.float32) |
|
img_pred = img_pred * mask |
|
img_gt = img_gt * mask |
|
|
|
difference = img_pred - img_gt |
|
difference_square = difference ** 2 |
|
difference_square_sum = difference_square.sum() |
|
difference_size = mask.sum() |
|
|
|
mse = difference_square_sum/difference_size |
|
|
|
if mse < 1.0e-10: |
|
return 1000 |
|
PIXEL_MAX = 1 |
|
return 20 * math.log10(PIXEL_MAX / math.sqrt(mse)) |
|
|
|
|
|
def calculate_lpips(self, img_gt, img_pred, mask=None): |
|
img_pred = np.array(img_pred).astype(np.float32)/255 |
|
img_gt = np.array(img_gt).astype(np.float32)/255 |
|
assert img_pred.shape == img_gt.shape, "Image shapes should be the same." |
|
|
|
if mask is not None: |
|
mask = np.array(mask).astype(np.float32) |
|
img_pred = img_pred * mask |
|
img_gt = img_gt * mask |
|
|
|
img_pred_tensor=torch.tensor(img_pred).permute(2,0,1).unsqueeze(0).to(self.device) |
|
img_gt_tensor=torch.tensor(img_gt).permute(2,0,1).unsqueeze(0).to(self.device) |
|
|
|
score = self.lpips_metric_calculator(img_pred_tensor*2-1,img_gt_tensor*2-1) |
|
score = score.cpu().item() |
|
|
|
return score |
|
|
|
def calculate_mse(self, img_pred, img_gt, mask=None): |
|
img_pred = np.array(img_pred).astype(np.float32)/255. |
|
img_gt = np.array(img_gt).astype(np.float32)/255. |
|
|
|
assert img_pred.shape == img_gt.shape, "Image shapes should be the same." |
|
if mask is not None: |
|
mask = np.array(mask).astype(np.float32) |
|
img_pred = img_pred * mask |
|
img_gt = img_gt * mask |
|
|
|
difference = img_pred - img_gt |
|
difference_square = difference ** 2 |
|
difference_square_sum = difference_square.sum() |
|
difference_size = mask.sum() |
|
|
|
mse = difference_square_sum/difference_size |
|
|
|
return mse.item() |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
cc3m_base_dir = "/home/kis/datasets/cc3m_attempt12" |
|
cc3m_full_regen = "/home/kis/Downloads/imgs" |
|
|
|
|
|
cc3m_annotations_full = pd.read_csv("../../../../datasets/cc3m/Train_GCC-training.tsv", sep='\t', header=0) |
|
|
|
|
|
|
|
evaluation_df = pd.DataFrame(columns=['Image ID', 'Aesthetic Score']) |
|
|
|
metrics_calculator=MetricsCalculator(device) |
|
mask_root = "../../../SemanticSegmentation/mask_skin" |
|
prev = None |
|
|
|
gender = "real" |
|
|
|
root = "PATH_TO_THE_ROOT" |
|
|
|
if gender == "man": |
|
img_root = root + "/foreground_syn_men/" |
|
elif gender == "woman": |
|
img_root = root + "/foreground_syn_women/" |
|
elif gender == "real": |
|
img_root = root + "/foreground/" |
|
annotations = cc3m_annotations_full |
|
elif gender == "cvpr": |
|
img_root = root + "/foreground_cvpr_images/" |
|
annotations = pd.read_csv("/home/kis/Downloads/annotations_cvpr.csv") |
|
elif gender == "syn": |
|
img_root = root + "/foreground_fully_synthetic/" |
|
|
|
elif gender == "coco": |
|
img_root = root + "/foreground_coco_counterfactuals/" |
|
annotations = pd.read_json(path_or_buf=root + "/coco_counterfactuals.jsonl", lines=True) |
|
annotations = annotations.set_index("id") |
|
|
|
for fname in os.listdir(img_root): |
|
image_name = fname.split(".")[0] |
|
if "mask" in image_name or "original" in image_name: |
|
continue |
|
image_name = fname.split("_")[0] |
|
|
|
|
|
print(f"evaluating image {image_name} ...") |
|
|
|
|
|
image_id = str(image_name).zfill(9) |
|
|
|
if gender in ["man", "woman"]: |
|
caption = annotations[(annotations[0]==image_name) & (annotations[1]==gender)][2].item() |
|
elif gender == "real": |
|
caption = annotations.iloc[int(image_id), 0] |
|
elif gender == "cvpr": |
|
caption = annotations.iloc[int(image_name), 1] |
|
elif gender == "syn": |
|
caption = annotations.iloc[int(image_id), 0] |
|
elif gender == "coco": |
|
image_id = f'{fname.split("_")[0]}{fname.split("_")[1]}' |
|
n = int(fname.split(".")[0][-1]) |
|
caption = annotations.loc[int(image_id)][n] |
|
|
|
image_path = f"{img_root}/{fname}" |
|
prompt = caption |
|
try: |
|
src_image_path = image_path |
|
src_image = Image.open(src_image_path).resize((512,512)) |
|
evaluation_result=[str(image_id)+"_"+str(gender)] |
|
except FileNotFoundError: |
|
continue |
|
|
|
success = True |
|
for metric in evaluation_df.columns.values.tolist()[1:]: |
|
print(f"evaluating metric: {metric}") |
|
try: |
|
|
|
if metric == 'Image Reward': |
|
metric_result = metrics_calculator.calculate_image_reward(src_image,prompt) |
|
|
|
if metric == 'HPS V2.1': |
|
metric_result = metrics_calculator.calculate_hpsv21_score(src_image,prompt) |
|
|
|
if metric == 'Aesthetic Score': |
|
metric_result = metrics_calculator.calculate_aesthetic_score(src_image) |
|
|
|
if metric == 'CLIP Similarity': |
|
metric_result = metrics_calculator.calculate_clip_similarity(src_image, prompt) |
|
|
|
evaluation_result.append(metric_result) |
|
except RuntimeError: |
|
success = False |
|
break |
|
|
|
if success: |
|
evaluation_df.loc[len(evaluation_df.index)] = evaluation_result |
|
evaluation_df.to_csv(f"evaluation/evaluation_result_{gender}.csv") |
|
|
|
print("The averaged evaluation result:") |
|
averaged_results=evaluation_df.mean(numeric_only=True) |
|
print(averaged_results) |
|
averaged_results.to_csv(f"evaluation/evaluation_result_{gender}.csv") |
|
evaluation_df.to_csv(f"evaluation/evaluation_result_{gender}.csv") |
|
|
|
print(f"The generated images and evaluation results is saved in ./evaluation/") |